mc_translate.c revision f517634b4a879b7653efa40d60c62fa3419809ed
1 2/*--------------------------------------------------------------------*/ 3/*--- Instrument IR to perform memory checking operations. ---*/ 4/*--- mc_translate.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of MemCheck, a heavyweight Valgrind tool for 9 detecting memory errors. 10 11 Copyright (C) 2000-2012 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_tool_basics.h" 33#include "pub_tool_poolalloc.h" // For mc_include.h 34#include "pub_tool_hashtable.h" // For mc_include.h 35#include "pub_tool_libcassert.h" 36#include "pub_tool_libcprint.h" 37#include "pub_tool_tooliface.h" 38#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 39#include "pub_tool_xarray.h" 40#include "pub_tool_mallocfree.h" 41#include "pub_tool_libcbase.h" 42 43#include "mc_include.h" 44 45 46/* FIXMEs JRS 2011-June-16. 47 48 Check the interpretation for vector narrowing and widening ops, 49 particularly the saturating ones. I suspect they are either overly 50 pessimistic and/or wrong. 51*/ 52 53/* This file implements the Memcheck instrumentation, and in 54 particular contains the core of its undefined value detection 55 machinery. For a comprehensive background of the terminology, 56 algorithms and rationale used herein, read: 57 58 Using Valgrind to detect undefined value errors with 59 bit-precision 60 61 Julian Seward and Nicholas Nethercote 62 63 2005 USENIX Annual Technical Conference (General Track), 64 Anaheim, CA, USA, April 10-15, 2005. 65 66 ---- 67 68 Here is as good a place as any to record exactly when V bits are and 69 should be checked, why, and what function is responsible. 70 71 72 Memcheck complains when an undefined value is used: 73 74 1. In the condition of a conditional branch. Because it could cause 75 incorrect control flow, and thus cause incorrect externally-visible 76 behaviour. [mc_translate.c:complainIfUndefined] 77 78 2. As an argument to a system call, or as the value that specifies 79 the system call number. Because it could cause an incorrect 80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read] 81 82 3. As the address in a load or store. Because it could cause an 83 incorrect value to be used later, which could cause externally-visible 84 behaviour (eg. via incorrect control flow or an incorrect system call 85 argument) [complainIfUndefined] 86 87 4. As the target address of a branch. Because it could cause incorrect 88 control flow. [complainIfUndefined] 89 90 5. As an argument to setenv, unsetenv, or putenv. Because it could put 91 an incorrect value into the external environment. 92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)] 93 94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn). 95 [complainIfUndefined] 96 97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and 98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user 99 requested it. [in memcheck.h] 100 101 102 Memcheck also complains, but should not, when an undefined value is used: 103 104 8. As the shift value in certain SIMD shift operations (but not in the 105 standard integer shift operations). This inconsistency is due to 106 historical reasons.) [complainIfUndefined] 107 108 109 Memcheck does not complain, but should, when an undefined value is used: 110 111 9. As an input to a client request. Because the client request may 112 affect the visible behaviour -- see bug #144362 for an example 113 involving the malloc replacements in vg_replace_malloc.c and 114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument 115 isn't identified. That bug report also has some info on how to solve 116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST] 117 118 119 In practice, 1 and 2 account for the vast majority of cases. 120*/ 121 122/*------------------------------------------------------------*/ 123/*--- Forward decls ---*/ 124/*------------------------------------------------------------*/ 125 126struct _MCEnv; 127 128static IRType shadowTypeV ( IRType ty ); 129static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 130static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig ); 131 132static IRExpr *i128_const_zero(void); 133 134/*------------------------------------------------------------*/ 135/*--- Memcheck running state, and tmp management. ---*/ 136/*------------------------------------------------------------*/ 137 138/* Carries info about a particular tmp. The tmp's number is not 139 recorded, as this is implied by (equal to) its index in the tmpMap 140 in MCEnv. The tmp's type is also not recorded, as this is present 141 in MCEnv.sb->tyenv. 142 143 When .kind is Orig, .shadowV and .shadowB may give the identities 144 of the temps currently holding the associated definedness (shadowV) 145 and origin (shadowB) values, or these may be IRTemp_INVALID if code 146 to compute such values has not yet been emitted. 147 148 When .kind is VSh or BSh then the tmp is holds a V- or B- value, 149 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is 150 illogical for a shadow tmp itself to be shadowed. 151*/ 152typedef 153 enum { Orig=1, VSh=2, BSh=3 } 154 TempKind; 155 156typedef 157 struct { 158 TempKind kind; 159 IRTemp shadowV; 160 IRTemp shadowB; 161 } 162 TempMapEnt; 163 164 165/* Carries around state during memcheck instrumentation. */ 166typedef 167 struct _MCEnv { 168 /* MODIFIED: the superblock being constructed. IRStmts are 169 added. */ 170 IRSB* sb; 171 Bool trace; 172 173 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the 174 current kind and possibly shadow temps for each temp in the 175 IRSB being constructed. Note that it does not contain the 176 type of each tmp. If you want to know the type, look at the 177 relevant entry in sb->tyenv. It follows that at all times 178 during the instrumentation process, the valid indices for 179 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is 180 total number of Orig, V- and B- temps allocated so far. 181 182 The reason for this strange split (types in one place, all 183 other info in another) is that we need the types to be 184 attached to sb so as to make it possible to do 185 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the 186 instrumentation process. */ 187 XArray* /* of TempMapEnt */ tmpMap; 188 189 /* MODIFIED: indicates whether "bogus" literals have so far been 190 found. Starts off False, and may change to True. */ 191 Bool bogusLiterals; 192 193 /* READONLY: indicates whether we should use expensive 194 interpretations of integer adds, since unfortunately LLVM 195 uses them to do ORs in some circumstances. Defaulted to True 196 on MacOS and False everywhere else. */ 197 Bool useLLVMworkarounds; 198 199 /* READONLY: the guest layout. This indicates which parts of 200 the guest state should be regarded as 'always defined'. */ 201 VexGuestLayout* layout; 202 203 /* READONLY: the host word type. Needed for constructing 204 arguments of type 'HWord' to be passed to helper functions. 205 Ity_I32 or Ity_I64 only. */ 206 IRType hWordTy; 207 } 208 MCEnv; 209 210/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 211 demand), as they are encountered. This is for two reasons. 212 213 (1) (less important reason): Many original tmps are unused due to 214 initial IR optimisation, and we do not want to spaces in tables 215 tracking them. 216 217 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 218 table indexed [0 .. n_types-1], which gives the current shadow for 219 each original tmp, or INVALID_IRTEMP if none is so far assigned. 220 It is necessary to support making multiple assignments to a shadow 221 -- specifically, after testing a shadow for definedness, it needs 222 to be made defined. But IR's SSA property disallows this. 223 224 (2) (more important reason): Therefore, when a shadow needs to get 225 a new value, a new temporary is created, the value is assigned to 226 that, and the tmpMap is updated to reflect the new binding. 227 228 A corollary is that if the tmpMap maps a given tmp to 229 IRTemp_INVALID and we are hoping to read that shadow tmp, it means 230 there's a read-before-write error in the original tmps. The IR 231 sanity checker should catch all such anomalies, however. 232*/ 233 234/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to 235 both the table in mce->sb and to our auxiliary mapping. Note that 236 newTemp may cause mce->tmpMap to resize, hence previous results 237 from VG_(indexXA)(mce->tmpMap) are invalidated. */ 238static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) 239{ 240 Word newIx; 241 TempMapEnt ent; 242 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty); 243 ent.kind = kind; 244 ent.shadowV = IRTemp_INVALID; 245 ent.shadowB = IRTemp_INVALID; 246 newIx = VG_(addToXA)( mce->tmpMap, &ent ); 247 tl_assert(newIx == (Word)tmp); 248 return tmp; 249} 250 251 252/* Find the tmp currently shadowing the given original tmp. If none 253 so far exists, allocate one. */ 254static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) 255{ 256 TempMapEnt* ent; 257 /* VG_(indexXA) range-checks 'orig', hence no need to check 258 here. */ 259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 260 tl_assert(ent->kind == Orig); 261 if (ent->shadowV == IRTemp_INVALID) { 262 IRTemp tmpV 263 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 264 /* newTemp may cause mce->tmpMap to resize, hence previous results 265 from VG_(indexXA) are invalid. */ 266 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 267 tl_assert(ent->kind == Orig); 268 tl_assert(ent->shadowV == IRTemp_INVALID); 269 ent->shadowV = tmpV; 270 } 271 return ent->shadowV; 272} 273 274/* Allocate a new shadow for the given original tmp. This means any 275 previous shadow is abandoned. This is needed because it is 276 necessary to give a new value to a shadow once it has been tested 277 for undefinedness, but unfortunately IR's SSA property disallows 278 this. Instead we must abandon the old shadow, allocate a new one 279 and use that instead. 280 281 This is the same as findShadowTmpV, except we don't bother to see 282 if a shadow temp already existed -- we simply allocate a new one 283 regardless. */ 284static void newShadowTmpV ( MCEnv* mce, IRTemp orig ) 285{ 286 TempMapEnt* ent; 287 /* VG_(indexXA) range-checks 'orig', hence no need to check 288 here. */ 289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 290 tl_assert(ent->kind == Orig); 291 if (1) { 292 IRTemp tmpV 293 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 294 /* newTemp may cause mce->tmpMap to resize, hence previous results 295 from VG_(indexXA) are invalid. */ 296 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 297 tl_assert(ent->kind == Orig); 298 ent->shadowV = tmpV; 299 } 300} 301 302 303/*------------------------------------------------------------*/ 304/*--- IRAtoms -- a subset of IRExprs ---*/ 305/*------------------------------------------------------------*/ 306 307/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 308 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 309 input, most of this code deals in atoms. Usefully, a value atom 310 always has a V-value which is also an atom: constants are shadowed 311 by constants, and temps are shadowed by the corresponding shadow 312 temporary. */ 313 314typedef IRExpr IRAtom; 315 316/* (used for sanity checks only): is this an atom which looks 317 like it's from original code? */ 318static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 319{ 320 if (a1->tag == Iex_Const) 321 return True; 322 if (a1->tag == Iex_RdTmp) { 323 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 324 return ent->kind == Orig; 325 } 326 return False; 327} 328 329/* (used for sanity checks only): is this an atom which looks 330 like it's from shadow code? */ 331static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 332{ 333 if (a1->tag == Iex_Const) 334 return True; 335 if (a1->tag == Iex_RdTmp) { 336 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 337 return ent->kind == VSh || ent->kind == BSh; 338 } 339 return False; 340} 341 342/* (used for sanity checks only): check that both args are atoms and 343 are identically-kinded. */ 344static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 345{ 346 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp) 347 return True; 348 if (a1->tag == Iex_Const && a2->tag == Iex_Const) 349 return True; 350 return False; 351} 352 353 354/*------------------------------------------------------------*/ 355/*--- Type management ---*/ 356/*------------------------------------------------------------*/ 357 358/* Shadow state is always accessed using integer types. This returns 359 an integer type with the same size (as per sizeofIRType) as the 360 given type. The only valid shadow types are Bit, I8, I16, I32, 361 I64, I128, V128, V256. */ 362 363static IRType shadowTypeV ( IRType ty ) 364{ 365 switch (ty) { 366 case Ity_I1: 367 case Ity_I8: 368 case Ity_I16: 369 case Ity_I32: 370 case Ity_I64: 371 case Ity_I128: return ty; 372 case Ity_F32: return Ity_I32; 373 case Ity_D32: return Ity_I32; 374 case Ity_F64: return Ity_I64; 375 case Ity_D64: return Ity_I64; 376 case Ity_F128: return Ity_I128; 377 case Ity_D128: return Ity_I128; 378 case Ity_V128: return Ity_V128; 379 case Ity_V256: return Ity_V256; 380 default: ppIRType(ty); 381 VG_(tool_panic)("memcheck:shadowTypeV"); 382 } 383} 384 385/* Produce a 'defined' value of the given shadow type. Should only be 386 supplied shadow types (Bit/I8/I16/I32/UI64). */ 387static IRExpr* definedOfType ( IRType ty ) { 388 switch (ty) { 389 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 390 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 391 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 392 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 393 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 394 case Ity_I128: return i128_const_zero(); 395 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 396 default: VG_(tool_panic)("memcheck:definedOfType"); 397 } 398} 399 400 401/*------------------------------------------------------------*/ 402/*--- Constructing IR fragments ---*/ 403/*------------------------------------------------------------*/ 404 405/* add stmt to a bb */ 406static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) { 407 if (mce->trace) { 408 VG_(printf)(" %c: ", cat); 409 ppIRStmt(st); 410 VG_(printf)("\n"); 411 } 412 addStmtToIRSB(mce->sb, st); 413} 414 415/* assign value to tmp */ 416static inline 417void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) { 418 stmt(cat, mce, IRStmt_WrTmp(tmp,expr)); 419} 420 421/* build various kinds of expressions */ 422#define triop(_op, _arg1, _arg2, _arg3) \ 423 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3)) 424#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 425#define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 426#define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 427#define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 428#define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 429#define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 430#define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 431#define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) 432 433/* Bind the given expression to a new temporary, and return the 434 temporary. This effectively converts an arbitrary expression into 435 an atom. 436 437 'ty' is the type of 'e' and hence the type that the new temporary 438 needs to be. But passing it in is redundant, since we can deduce 439 the type merely by inspecting 'e'. So at least use that fact to 440 assert that the two types agree. */ 441static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) 442{ 443 TempKind k; 444 IRTemp t; 445 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e); 446 447 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */ 448 switch (cat) { 449 case 'V': k = VSh; break; 450 case 'B': k = BSh; break; 451 case 'C': k = Orig; break; 452 /* happens when we are making up new "orig" 453 expressions, for IRCAS handling */ 454 default: tl_assert(0); 455 } 456 t = newTemp(mce, ty, k); 457 assign(cat, mce, t, e); 458 return mkexpr(t); 459} 460 461 462/*------------------------------------------------------------*/ 463/*--- Helper functions for 128-bit ops ---*/ 464/*------------------------------------------------------------*/ 465 466static IRExpr *i128_const_zero(void) 467{ 468 IRAtom* z64 = IRExpr_Const(IRConst_U64(0)); 469 return binop(Iop_64HLto128, z64, z64); 470} 471 472/* There are no I128-bit loads and/or stores [as generated by any 473 current front ends]. So we do not need to worry about that in 474 expr2vbits_Load */ 475 476 477/*------------------------------------------------------------*/ 478/*--- Constructing definedness primitive ops ---*/ 479/*------------------------------------------------------------*/ 480 481/* --------- Defined-if-either-defined --------- */ 482 483static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 484 tl_assert(isShadowAtom(mce,a1)); 485 tl_assert(isShadowAtom(mce,a2)); 486 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2)); 487} 488 489static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 490 tl_assert(isShadowAtom(mce,a1)); 491 tl_assert(isShadowAtom(mce,a2)); 492 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2)); 493} 494 495static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 496 tl_assert(isShadowAtom(mce,a1)); 497 tl_assert(isShadowAtom(mce,a2)); 498 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2)); 499} 500 501static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 502 tl_assert(isShadowAtom(mce,a1)); 503 tl_assert(isShadowAtom(mce,a2)); 504 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2)); 505} 506 507static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 508 tl_assert(isShadowAtom(mce,a1)); 509 tl_assert(isShadowAtom(mce,a2)); 510 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 511} 512 513static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 514 tl_assert(isShadowAtom(mce,a1)); 515 tl_assert(isShadowAtom(mce,a2)); 516 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2)); 517} 518 519/* --------- Undefined-if-either-undefined --------- */ 520 521static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 522 tl_assert(isShadowAtom(mce,a1)); 523 tl_assert(isShadowAtom(mce,a2)); 524 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2)); 525} 526 527static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 528 tl_assert(isShadowAtom(mce,a1)); 529 tl_assert(isShadowAtom(mce,a2)); 530 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2)); 531} 532 533static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 534 tl_assert(isShadowAtom(mce,a1)); 535 tl_assert(isShadowAtom(mce,a2)); 536 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2)); 537} 538 539static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 540 tl_assert(isShadowAtom(mce,a1)); 541 tl_assert(isShadowAtom(mce,a2)); 542 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2)); 543} 544 545static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 546 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6; 547 tl_assert(isShadowAtom(mce,a1)); 548 tl_assert(isShadowAtom(mce,a2)); 549 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1)); 550 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1)); 551 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2)); 552 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2)); 553 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3)); 554 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4)); 555 556 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5)); 557} 558 559static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 560 tl_assert(isShadowAtom(mce,a1)); 561 tl_assert(isShadowAtom(mce,a2)); 562 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 563} 564 565static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 566 tl_assert(isShadowAtom(mce,a1)); 567 tl_assert(isShadowAtom(mce,a2)); 568 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2)); 569} 570 571static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 572 switch (vty) { 573 case Ity_I8: return mkUifU8(mce, a1, a2); 574 case Ity_I16: return mkUifU16(mce, a1, a2); 575 case Ity_I32: return mkUifU32(mce, a1, a2); 576 case Ity_I64: return mkUifU64(mce, a1, a2); 577 case Ity_I128: return mkUifU128(mce, a1, a2); 578 case Ity_V128: return mkUifUV128(mce, a1, a2); 579 default: 580 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 581 VG_(tool_panic)("memcheck:mkUifU"); 582 } 583} 584 585/* --------- The Left-family of operations. --------- */ 586 587static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 588 tl_assert(isShadowAtom(mce,a1)); 589 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1)); 590} 591 592static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 593 tl_assert(isShadowAtom(mce,a1)); 594 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1)); 595} 596 597static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 598 tl_assert(isShadowAtom(mce,a1)); 599 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1)); 600} 601 602static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) { 603 tl_assert(isShadowAtom(mce,a1)); 604 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1)); 605} 606 607/* --------- 'Improvement' functions for AND/OR. --------- */ 608 609/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 610 defined (0); all other -> undefined (1). 611*/ 612static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 613{ 614 tl_assert(isOriginalAtom(mce, data)); 615 tl_assert(isShadowAtom(mce, vbits)); 616 tl_assert(sameKindedAtoms(data, vbits)); 617 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits)); 618} 619 620static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 621{ 622 tl_assert(isOriginalAtom(mce, data)); 623 tl_assert(isShadowAtom(mce, vbits)); 624 tl_assert(sameKindedAtoms(data, vbits)); 625 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits)); 626} 627 628static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 629{ 630 tl_assert(isOriginalAtom(mce, data)); 631 tl_assert(isShadowAtom(mce, vbits)); 632 tl_assert(sameKindedAtoms(data, vbits)); 633 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits)); 634} 635 636static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 637{ 638 tl_assert(isOriginalAtom(mce, data)); 639 tl_assert(isShadowAtom(mce, vbits)); 640 tl_assert(sameKindedAtoms(data, vbits)); 641 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits)); 642} 643 644static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 645{ 646 tl_assert(isOriginalAtom(mce, data)); 647 tl_assert(isShadowAtom(mce, vbits)); 648 tl_assert(sameKindedAtoms(data, vbits)); 649 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 650} 651 652static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 653{ 654 tl_assert(isOriginalAtom(mce, data)); 655 tl_assert(isShadowAtom(mce, vbits)); 656 tl_assert(sameKindedAtoms(data, vbits)); 657 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits)); 658} 659 660/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 661 defined (0); all other -> undefined (1). 662*/ 663static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 664{ 665 tl_assert(isOriginalAtom(mce, data)); 666 tl_assert(isShadowAtom(mce, vbits)); 667 tl_assert(sameKindedAtoms(data, vbits)); 668 return assignNew( 669 'V', mce, Ity_I8, 670 binop(Iop_Or8, 671 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)), 672 vbits) ); 673} 674 675static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 676{ 677 tl_assert(isOriginalAtom(mce, data)); 678 tl_assert(isShadowAtom(mce, vbits)); 679 tl_assert(sameKindedAtoms(data, vbits)); 680 return assignNew( 681 'V', mce, Ity_I16, 682 binop(Iop_Or16, 683 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)), 684 vbits) ); 685} 686 687static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 688{ 689 tl_assert(isOriginalAtom(mce, data)); 690 tl_assert(isShadowAtom(mce, vbits)); 691 tl_assert(sameKindedAtoms(data, vbits)); 692 return assignNew( 693 'V', mce, Ity_I32, 694 binop(Iop_Or32, 695 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)), 696 vbits) ); 697} 698 699static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 700{ 701 tl_assert(isOriginalAtom(mce, data)); 702 tl_assert(isShadowAtom(mce, vbits)); 703 tl_assert(sameKindedAtoms(data, vbits)); 704 return assignNew( 705 'V', mce, Ity_I64, 706 binop(Iop_Or64, 707 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)), 708 vbits) ); 709} 710 711static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 712{ 713 tl_assert(isOriginalAtom(mce, data)); 714 tl_assert(isShadowAtom(mce, vbits)); 715 tl_assert(sameKindedAtoms(data, vbits)); 716 return assignNew( 717 'V', mce, Ity_V128, 718 binop(Iop_OrV128, 719 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)), 720 vbits) ); 721} 722 723static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 724{ 725 tl_assert(isOriginalAtom(mce, data)); 726 tl_assert(isShadowAtom(mce, vbits)); 727 tl_assert(sameKindedAtoms(data, vbits)); 728 return assignNew( 729 'V', mce, Ity_V256, 730 binop(Iop_OrV256, 731 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)), 732 vbits) ); 733} 734 735/* --------- Pessimising casts. --------- */ 736 737/* The function returns an expression of type DST_TY. If any of the VBITS 738 is undefined (value == 1) the resulting expression has all bits set to 739 1. Otherwise, all bits are 0. */ 740 741static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 742{ 743 IRType src_ty; 744 IRAtom* tmp1; 745 746 /* Note, dst_ty is a shadow type, not an original type. */ 747 tl_assert(isShadowAtom(mce,vbits)); 748 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits); 749 750 /* Fast-track some common cases */ 751 if (src_ty == Ity_I32 && dst_ty == Ity_I32) 752 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 753 754 if (src_ty == Ity_I64 && dst_ty == Ity_I64) 755 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 756 757 if (src_ty == Ity_I32 && dst_ty == Ity_I64) { 758 /* PCast the arg, then clone it. */ 759 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 760 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 761 } 762 763 if (src_ty == Ity_I64 && dst_ty == Ity_I32) { 764 /* PCast the arg. This gives all 0s or all 1s. Then throw away 765 the top half. */ 766 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 767 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp)); 768 } 769 770 /* Else do it the slow way .. */ 771 /* First of all, collapse vbits down to a single bit. */ 772 tmp1 = NULL; 773 switch (src_ty) { 774 case Ity_I1: 775 tmp1 = vbits; 776 break; 777 case Ity_I8: 778 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits)); 779 break; 780 case Ity_I16: 781 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits)); 782 break; 783 case Ity_I32: 784 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits)); 785 break; 786 case Ity_I64: 787 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits)); 788 break; 789 case Ity_I128: { 790 /* Gah. Chop it in half, OR the halves together, and compare 791 that with zero. */ 792 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits)); 793 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits)); 794 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 795 tmp1 = assignNew('V', mce, Ity_I1, 796 unop(Iop_CmpNEZ64, tmp4)); 797 break; 798 } 799 default: 800 ppIRType(src_ty); 801 VG_(tool_panic)("mkPCastTo(1)"); 802 } 803 tl_assert(tmp1); 804 /* Now widen up to the dst type. */ 805 switch (dst_ty) { 806 case Ity_I1: 807 return tmp1; 808 case Ity_I8: 809 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 810 case Ity_I16: 811 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 812 case Ity_I32: 813 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 814 case Ity_I64: 815 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 816 case Ity_V128: 817 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 818 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 819 return tmp1; 820 case Ity_I128: 821 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 822 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1)); 823 return tmp1; 824 default: 825 ppIRType(dst_ty); 826 VG_(tool_panic)("mkPCastTo(2)"); 827 } 828} 829 830/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */ 831/* 832 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and 833 PCasting to Ity_U1. However, sometimes it is necessary to be more 834 accurate. The insight is that the result is defined if two 835 corresponding bits can be found, one from each argument, so that 836 both bits are defined but are different -- that makes EQ say "No" 837 and NE say "Yes". Hence, we compute an improvement term and DifD 838 it onto the "normal" (UifU) result. 839 840 The result is: 841 842 PCastTo<1> ( 843 -- naive version 844 PCastTo<sz>( UifU<sz>(vxx, vyy) ) 845 846 `DifD<sz>` 847 848 -- improvement term 849 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) ) 850 ) 851 852 where 853 vec contains 0 (defined) bits where the corresponding arg bits 854 are defined but different, and 1 bits otherwise. 855 856 vec = Or<sz>( vxx, // 0 iff bit defined 857 vyy, // 0 iff bit defined 858 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different 859 ) 860 861 If any bit of vec is 0, the result is defined and so the 862 improvement term should produce 0...0, else it should produce 863 1...1. 864 865 Hence require for the improvement term: 866 867 if vec == 1...1 then 1...1 else 0...0 868 -> 869 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) 870 871 This was extensively re-analysed and checked on 6 July 05. 872*/ 873static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, 874 IRType ty, 875 IRAtom* vxx, IRAtom* vyy, 876 IRAtom* xx, IRAtom* yy ) 877{ 878 IRAtom *naive, *vec, *improvement_term; 879 IRAtom *improved, *final_cast, *top; 880 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR; 881 882 tl_assert(isShadowAtom(mce,vxx)); 883 tl_assert(isShadowAtom(mce,vyy)); 884 tl_assert(isOriginalAtom(mce,xx)); 885 tl_assert(isOriginalAtom(mce,yy)); 886 tl_assert(sameKindedAtoms(vxx,xx)); 887 tl_assert(sameKindedAtoms(vyy,yy)); 888 889 switch (ty) { 890 case Ity_I16: 891 opOR = Iop_Or16; 892 opDIFD = Iop_And16; 893 opUIFU = Iop_Or16; 894 opNOT = Iop_Not16; 895 opXOR = Iop_Xor16; 896 opCMP = Iop_CmpEQ16; 897 top = mkU16(0xFFFF); 898 break; 899 case Ity_I32: 900 opOR = Iop_Or32; 901 opDIFD = Iop_And32; 902 opUIFU = Iop_Or32; 903 opNOT = Iop_Not32; 904 opXOR = Iop_Xor32; 905 opCMP = Iop_CmpEQ32; 906 top = mkU32(0xFFFFFFFF); 907 break; 908 case Ity_I64: 909 opOR = Iop_Or64; 910 opDIFD = Iop_And64; 911 opUIFU = Iop_Or64; 912 opNOT = Iop_Not64; 913 opXOR = Iop_Xor64; 914 opCMP = Iop_CmpEQ64; 915 top = mkU64(0xFFFFFFFFFFFFFFFFULL); 916 break; 917 default: 918 VG_(tool_panic)("expensiveCmpEQorNE"); 919 } 920 921 naive 922 = mkPCastTo(mce,ty, 923 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy))); 924 925 vec 926 = assignNew( 927 'V', mce,ty, 928 binop( opOR, 929 assignNew('V', mce,ty, binop(opOR, vxx, vyy)), 930 assignNew( 931 'V', mce,ty, 932 unop( opNOT, 933 assignNew('V', mce,ty, binop(opXOR, xx, yy)))))); 934 935 improvement_term 936 = mkPCastTo( mce,ty, 937 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top))); 938 939 improved 940 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) ); 941 942 final_cast 943 = mkPCastTo( mce, Ity_I1, improved ); 944 945 return final_cast; 946} 947 948 949/* --------- Semi-accurate interpretation of CmpORD. --------- */ 950 951/* CmpORD32{S,U} does PowerPC-style 3-way comparisons: 952 953 CmpORD32S(x,y) = 1<<3 if x <s y 954 = 1<<2 if x >s y 955 = 1<<1 if x == y 956 957 and similarly the unsigned variant. The default interpretation is: 958 959 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#) 960 & (7<<1) 961 962 The "& (7<<1)" reflects the fact that all result bits except 3,2,1 963 are zero and therefore defined (viz, zero). 964 965 Also deal with a special case better: 966 967 CmpORD32S(x,0) 968 969 Here, bit 3 (LT) of the result is a copy of the top bit of x and 970 will be defined even if the rest of x isn't. In which case we do: 971 972 CmpORD32S#(x,x#,0,{impliedly 0}#) 973 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ# 974 | (x# >>u 31) << 3 -- LT# = x#[31] 975 976 Analogous handling for CmpORD64{S,U}. 977*/ 978static Bool isZeroU32 ( IRAtom* e ) 979{ 980 return 981 toBool( e->tag == Iex_Const 982 && e->Iex.Const.con->tag == Ico_U32 983 && e->Iex.Const.con->Ico.U32 == 0 ); 984} 985 986static Bool isZeroU64 ( IRAtom* e ) 987{ 988 return 989 toBool( e->tag == Iex_Const 990 && e->Iex.Const.con->tag == Ico_U64 991 && e->Iex.Const.con->Ico.U64 == 0 ); 992} 993 994static IRAtom* doCmpORD ( MCEnv* mce, 995 IROp cmp_op, 996 IRAtom* xxhash, IRAtom* yyhash, 997 IRAtom* xx, IRAtom* yy ) 998{ 999 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U; 1000 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S; 1001 IROp opOR = m64 ? Iop_Or64 : Iop_Or32; 1002 IROp opAND = m64 ? Iop_And64 : Iop_And32; 1003 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32; 1004 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32; 1005 IRType ty = m64 ? Ity_I64 : Ity_I32; 1006 Int width = m64 ? 64 : 32; 1007 1008 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32; 1009 1010 IRAtom* threeLeft1 = NULL; 1011 IRAtom* sevenLeft1 = NULL; 1012 1013 tl_assert(isShadowAtom(mce,xxhash)); 1014 tl_assert(isShadowAtom(mce,yyhash)); 1015 tl_assert(isOriginalAtom(mce,xx)); 1016 tl_assert(isOriginalAtom(mce,yy)); 1017 tl_assert(sameKindedAtoms(xxhash,xx)); 1018 tl_assert(sameKindedAtoms(yyhash,yy)); 1019 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U 1020 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U); 1021 1022 if (0) { 1023 ppIROp(cmp_op); VG_(printf)(" "); 1024 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n"); 1025 } 1026 1027 if (syned && isZero(yy)) { 1028 /* fancy interpretation */ 1029 /* if yy is zero, then it must be fully defined (zero#). */ 1030 tl_assert(isZero(yyhash)); 1031 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1); 1032 return 1033 binop( 1034 opOR, 1035 assignNew( 1036 'V', mce,ty, 1037 binop( 1038 opAND, 1039 mkPCastTo(mce,ty, xxhash), 1040 threeLeft1 1041 )), 1042 assignNew( 1043 'V', mce,ty, 1044 binop( 1045 opSHL, 1046 assignNew( 1047 'V', mce,ty, 1048 binop(opSHR, xxhash, mkU8(width-1))), 1049 mkU8(3) 1050 )) 1051 ); 1052 } else { 1053 /* standard interpretation */ 1054 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1); 1055 return 1056 binop( 1057 opAND, 1058 mkPCastTo( mce,ty, 1059 mkUifU(mce,ty, xxhash,yyhash)), 1060 sevenLeft1 1061 ); 1062 } 1063} 1064 1065 1066/*------------------------------------------------------------*/ 1067/*--- Emit a test and complaint if something is undefined. ---*/ 1068/*------------------------------------------------------------*/ 1069 1070static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */ 1071 1072 1073/* Set the annotations on a dirty helper to indicate that the stack 1074 pointer and instruction pointers might be read. This is the 1075 behaviour of all 'emit-a-complaint' style functions we might 1076 call. */ 1077 1078static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 1079 di->nFxState = 2; 1080 di->fxState[0].fx = Ifx_Read; 1081 di->fxState[0].offset = mce->layout->offset_SP; 1082 di->fxState[0].size = mce->layout->sizeof_SP; 1083 di->fxState[0].nRepeats = 0; 1084 di->fxState[0].repeatLen = 0; 1085 di->fxState[1].fx = Ifx_Read; 1086 di->fxState[1].offset = mce->layout->offset_IP; 1087 di->fxState[1].size = mce->layout->sizeof_IP; 1088 di->fxState[1].nRepeats = 0; 1089 di->fxState[1].repeatLen = 0; 1090} 1091 1092 1093/* Check the supplied **original** atom for undefinedness, and emit a 1094 complaint if so. Once that happens, mark it as defined. This is 1095 possible because the atom is either a tmp or literal. If it's a 1096 tmp, it will be shadowed by a tmp, and so we can set the shadow to 1097 be defined. In fact as mentioned above, we will have to allocate a 1098 new tmp to carry the new 'defined' shadow value, and update the 1099 original->tmp mapping accordingly; we cannot simply assign a new 1100 value to an existing shadow tmp as this breaks SSAness -- resulting 1101 in the post-instrumentation sanity checker spluttering in disapproval. 1102*/ 1103static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard ) 1104{ 1105 IRAtom* vatom; 1106 IRType ty; 1107 Int sz; 1108 IRDirty* di; 1109 IRAtom* cond; 1110 IRAtom* origin; 1111 void* fn; 1112 const HChar* nm; 1113 IRExpr** args; 1114 Int nargs; 1115 1116 // Don't do V bit tests if we're not reporting undefined value errors. 1117 if (MC_(clo_mc_level) == 1) 1118 return; 1119 1120 /* Since the original expression is atomic, there's no duplicated 1121 work generated by making multiple V-expressions for it. So we 1122 don't really care about the possibility that someone else may 1123 also create a V-interpretion for it. */ 1124 tl_assert(isOriginalAtom(mce, atom)); 1125 vatom = expr2vbits( mce, atom ); 1126 tl_assert(isShadowAtom(mce, vatom)); 1127 tl_assert(sameKindedAtoms(atom, vatom)); 1128 1129 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1130 1131 /* sz is only used for constructing the error message */ 1132 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 1133 1134 cond = mkPCastTo( mce, Ity_I1, vatom ); 1135 /* cond will be 0 if all defined, and 1 if any not defined. */ 1136 1137 /* Get the origin info for the value we are about to check. At 1138 least, if we are doing origin tracking. If not, use a dummy 1139 zero origin. */ 1140 if (MC_(clo_mc_level) == 3) { 1141 origin = schemeE( mce, atom ); 1142 if (mce->hWordTy == Ity_I64) { 1143 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) ); 1144 } 1145 } else { 1146 origin = NULL; 1147 } 1148 1149 fn = NULL; 1150 nm = NULL; 1151 args = NULL; 1152 nargs = -1; 1153 1154 switch (sz) { 1155 case 0: 1156 if (origin) { 1157 fn = &MC_(helperc_value_check0_fail_w_o); 1158 nm = "MC_(helperc_value_check0_fail_w_o)"; 1159 args = mkIRExprVec_1(origin); 1160 nargs = 1; 1161 } else { 1162 fn = &MC_(helperc_value_check0_fail_no_o); 1163 nm = "MC_(helperc_value_check0_fail_no_o)"; 1164 args = mkIRExprVec_0(); 1165 nargs = 0; 1166 } 1167 break; 1168 case 1: 1169 if (origin) { 1170 fn = &MC_(helperc_value_check1_fail_w_o); 1171 nm = "MC_(helperc_value_check1_fail_w_o)"; 1172 args = mkIRExprVec_1(origin); 1173 nargs = 1; 1174 } else { 1175 fn = &MC_(helperc_value_check1_fail_no_o); 1176 nm = "MC_(helperc_value_check1_fail_no_o)"; 1177 args = mkIRExprVec_0(); 1178 nargs = 0; 1179 } 1180 break; 1181 case 4: 1182 if (origin) { 1183 fn = &MC_(helperc_value_check4_fail_w_o); 1184 nm = "MC_(helperc_value_check4_fail_w_o)"; 1185 args = mkIRExprVec_1(origin); 1186 nargs = 1; 1187 } else { 1188 fn = &MC_(helperc_value_check4_fail_no_o); 1189 nm = "MC_(helperc_value_check4_fail_no_o)"; 1190 args = mkIRExprVec_0(); 1191 nargs = 0; 1192 } 1193 break; 1194 case 8: 1195 if (origin) { 1196 fn = &MC_(helperc_value_check8_fail_w_o); 1197 nm = "MC_(helperc_value_check8_fail_w_o)"; 1198 args = mkIRExprVec_1(origin); 1199 nargs = 1; 1200 } else { 1201 fn = &MC_(helperc_value_check8_fail_no_o); 1202 nm = "MC_(helperc_value_check8_fail_no_o)"; 1203 args = mkIRExprVec_0(); 1204 nargs = 0; 1205 } 1206 break; 1207 case 2: 1208 case 16: 1209 if (origin) { 1210 fn = &MC_(helperc_value_checkN_fail_w_o); 1211 nm = "MC_(helperc_value_checkN_fail_w_o)"; 1212 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin); 1213 nargs = 2; 1214 } else { 1215 fn = &MC_(helperc_value_checkN_fail_no_o); 1216 nm = "MC_(helperc_value_checkN_fail_no_o)"; 1217 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) ); 1218 nargs = 1; 1219 } 1220 break; 1221 default: 1222 VG_(tool_panic)("unexpected szB"); 1223 } 1224 1225 tl_assert(fn); 1226 tl_assert(nm); 1227 tl_assert(args); 1228 tl_assert(nargs >= 0 && nargs <= 2); 1229 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL) 1230 || (MC_(clo_mc_level) == 2 && origin == NULL) ); 1231 1232 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm, 1233 VG_(fnptr_to_fnentry)( fn ), args ); 1234 di->guard = cond; 1235 1236 /* If the complaint is to be issued under a guard condition, AND that 1237 guard condition. */ 1238 if (guard) { 1239 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard)); 1240 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard)); 1241 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2)); 1242 1243 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e)); 1244 } 1245 1246 setHelperAnns( mce, di ); 1247 stmt( 'V', mce, IRStmt_Dirty(di)); 1248 1249 /* Set the shadow tmp to be defined. First, update the 1250 orig->shadow tmp mapping to reflect the fact that this shadow is 1251 getting a new value. */ 1252 tl_assert(isIRAtom(vatom)); 1253 /* sameKindedAtoms ... */ 1254 if (vatom->tag == Iex_RdTmp) { 1255 tl_assert(atom->tag == Iex_RdTmp); 1256 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1257 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), 1258 definedOfType(ty)); 1259 } 1260} 1261 1262 1263/*------------------------------------------------------------*/ 1264/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 1265/*------------------------------------------------------------*/ 1266 1267/* Examine the always-defined sections declared in layout to see if 1268 the (offset,size) section is within one. Note, is is an error to 1269 partially fall into such a region: (offset,size) should either be 1270 completely in such a region or completely not-in such a region. 1271*/ 1272static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 1273{ 1274 Int minoffD, maxoffD, i; 1275 Int minoff = offset; 1276 Int maxoff = minoff + size - 1; 1277 tl_assert((minoff & ~0xFFFF) == 0); 1278 tl_assert((maxoff & ~0xFFFF) == 0); 1279 1280 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 1281 minoffD = mce->layout->alwaysDefd[i].offset; 1282 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 1283 tl_assert((minoffD & ~0xFFFF) == 0); 1284 tl_assert((maxoffD & ~0xFFFF) == 0); 1285 1286 if (maxoff < minoffD || maxoffD < minoff) 1287 continue; /* no overlap */ 1288 if (minoff >= minoffD && maxoff <= maxoffD) 1289 return True; /* completely contained in an always-defd section */ 1290 1291 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 1292 } 1293 return False; /* could not find any containing section */ 1294} 1295 1296 1297/* Generate into bb suitable actions to shadow this Put. If the state 1298 slice is marked 'always defined', do nothing. Otherwise, write the 1299 supplied V bits to the shadow state. We can pass in either an 1300 original atom or a V-atom, but not both. In the former case the 1301 relevant V-bits are then generated from the original. 1302 We assume here, that the definedness of GUARD has already been checked. 1303*/ 1304static 1305void do_shadow_PUT ( MCEnv* mce, Int offset, 1306 IRAtom* atom, IRAtom* vatom, IRExpr *guard ) 1307{ 1308 IRType ty; 1309 1310 // Don't do shadow PUTs if we're not doing undefined value checking. 1311 // Their absence lets Vex's optimiser remove all the shadow computation 1312 // that they depend on, which includes GETs of the shadow registers. 1313 if (MC_(clo_mc_level) == 1) 1314 return; 1315 1316 if (atom) { 1317 tl_assert(!vatom); 1318 tl_assert(isOriginalAtom(mce, atom)); 1319 vatom = expr2vbits( mce, atom ); 1320 } else { 1321 tl_assert(vatom); 1322 tl_assert(isShadowAtom(mce, vatom)); 1323 } 1324 1325 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1326 tl_assert(ty != Ity_I1); 1327 tl_assert(ty != Ity_I128); 1328 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1329 /* later: no ... */ 1330 /* emit code to emit a complaint if any of the vbits are 1. */ 1331 /* complainIfUndefined(mce, atom); */ 1332 } else { 1333 /* Do a plain shadow Put. */ 1334 if (guard) { 1335 /* If the guard expression evaluates to false we simply Put the value 1336 that is already stored in the guest state slot */ 1337 IRAtom *cond, *iffalse; 1338 1339 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard)); 1340 iffalse = assignNew('V', mce, ty, 1341 IRExpr_Get(offset + mce->layout->total_sizeB, ty)); 1342 vatom = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom)); 1343 } 1344 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom )); 1345 } 1346} 1347 1348 1349/* Return an expression which contains the V bits corresponding to the 1350 given GETI (passed in in pieces). 1351*/ 1352static 1353void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti) 1354{ 1355 IRAtom* vatom; 1356 IRType ty, tyS; 1357 Int arrSize;; 1358 IRRegArray* descr = puti->descr; 1359 IRAtom* ix = puti->ix; 1360 Int bias = puti->bias; 1361 IRAtom* atom = puti->data; 1362 1363 // Don't do shadow PUTIs if we're not doing undefined value checking. 1364 // Their absence lets Vex's optimiser remove all the shadow computation 1365 // that they depend on, which includes GETIs of the shadow registers. 1366 if (MC_(clo_mc_level) == 1) 1367 return; 1368 1369 tl_assert(isOriginalAtom(mce,atom)); 1370 vatom = expr2vbits( mce, atom ); 1371 tl_assert(sameKindedAtoms(atom, vatom)); 1372 ty = descr->elemTy; 1373 tyS = shadowTypeV(ty); 1374 arrSize = descr->nElems * sizeofIRType(ty); 1375 tl_assert(ty != Ity_I1); 1376 tl_assert(isOriginalAtom(mce,ix)); 1377 complainIfUndefined(mce, ix, NULL); 1378 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1379 /* later: no ... */ 1380 /* emit code to emit a complaint if any of the vbits are 1. */ 1381 /* complainIfUndefined(mce, atom); */ 1382 } else { 1383 /* Do a cloned version of the Put that refers to the shadow 1384 area. */ 1385 IRRegArray* new_descr 1386 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1387 tyS, descr->nElems); 1388 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) )); 1389 } 1390} 1391 1392 1393/* Return an expression which contains the V bits corresponding to the 1394 given GET (passed in in pieces). 1395*/ 1396static 1397IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 1398{ 1399 IRType tyS = shadowTypeV(ty); 1400 tl_assert(ty != Ity_I1); 1401 tl_assert(ty != Ity_I128); 1402 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1403 /* Always defined, return all zeroes of the relevant type */ 1404 return definedOfType(tyS); 1405 } else { 1406 /* return a cloned version of the Get that refers to the shadow 1407 area. */ 1408 /* FIXME: this isn't an atom! */ 1409 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 1410 } 1411} 1412 1413 1414/* Return an expression which contains the V bits corresponding to the 1415 given GETI (passed in in pieces). 1416*/ 1417static 1418IRExpr* shadow_GETI ( MCEnv* mce, 1419 IRRegArray* descr, IRAtom* ix, Int bias ) 1420{ 1421 IRType ty = descr->elemTy; 1422 IRType tyS = shadowTypeV(ty); 1423 Int arrSize = descr->nElems * sizeofIRType(ty); 1424 tl_assert(ty != Ity_I1); 1425 tl_assert(isOriginalAtom(mce,ix)); 1426 complainIfUndefined(mce, ix, NULL); 1427 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1428 /* Always defined, return all zeroes of the relevant type */ 1429 return definedOfType(tyS); 1430 } else { 1431 /* return a cloned version of the Get that refers to the shadow 1432 area. */ 1433 IRRegArray* new_descr 1434 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1435 tyS, descr->nElems); 1436 return IRExpr_GetI( new_descr, ix, bias ); 1437 } 1438} 1439 1440 1441/*------------------------------------------------------------*/ 1442/*--- Generating approximations for unknown operations, ---*/ 1443/*--- using lazy-propagate semantics ---*/ 1444/*------------------------------------------------------------*/ 1445 1446/* Lazy propagation of undefinedness from two values, resulting in the 1447 specified shadow type. 1448*/ 1449static 1450IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 1451{ 1452 IRAtom* at; 1453 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1454 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1455 tl_assert(isShadowAtom(mce,va1)); 1456 tl_assert(isShadowAtom(mce,va2)); 1457 1458 /* The general case is inefficient because PCast is an expensive 1459 operation. Here are some special cases which use PCast only 1460 once rather than twice. */ 1461 1462 /* I64 x I64 -> I64 */ 1463 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) { 1464 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n"); 1465 at = mkUifU(mce, Ity_I64, va1, va2); 1466 at = mkPCastTo(mce, Ity_I64, at); 1467 return at; 1468 } 1469 1470 /* I64 x I64 -> I32 */ 1471 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) { 1472 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n"); 1473 at = mkUifU(mce, Ity_I64, va1, va2); 1474 at = mkPCastTo(mce, Ity_I32, at); 1475 return at; 1476 } 1477 1478 if (0) { 1479 VG_(printf)("mkLazy2 "); 1480 ppIRType(t1); 1481 VG_(printf)("_"); 1482 ppIRType(t2); 1483 VG_(printf)("_"); 1484 ppIRType(finalVty); 1485 VG_(printf)("\n"); 1486 } 1487 1488 /* General case: force everything via 32-bit intermediaries. */ 1489 at = mkPCastTo(mce, Ity_I32, va1); 1490 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1491 at = mkPCastTo(mce, finalVty, at); 1492 return at; 1493} 1494 1495 1496/* 3-arg version of the above. */ 1497static 1498IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, 1499 IRAtom* va1, IRAtom* va2, IRAtom* va3 ) 1500{ 1501 IRAtom* at; 1502 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1503 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1504 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1505 tl_assert(isShadowAtom(mce,va1)); 1506 tl_assert(isShadowAtom(mce,va2)); 1507 tl_assert(isShadowAtom(mce,va3)); 1508 1509 /* The general case is inefficient because PCast is an expensive 1510 operation. Here are some special cases which use PCast only 1511 twice rather than three times. */ 1512 1513 /* I32 x I64 x I64 -> I64 */ 1514 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1515 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1516 && finalVty == Ity_I64) { 1517 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n"); 1518 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1519 mode indication which is fully defined, this should get 1520 folded out later. */ 1521 at = mkPCastTo(mce, Ity_I64, va1); 1522 /* Now fold in 2nd and 3rd args. */ 1523 at = mkUifU(mce, Ity_I64, at, va2); 1524 at = mkUifU(mce, Ity_I64, at, va3); 1525 /* and PCast once again. */ 1526 at = mkPCastTo(mce, Ity_I64, at); 1527 return at; 1528 } 1529 1530 /* I32 x I64 x I64 -> I32 */ 1531 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1532 && finalVty == Ity_I32) { 1533 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n"); 1534 at = mkPCastTo(mce, Ity_I64, va1); 1535 at = mkUifU(mce, Ity_I64, at, va2); 1536 at = mkUifU(mce, Ity_I64, at, va3); 1537 at = mkPCastTo(mce, Ity_I32, at); 1538 return at; 1539 } 1540 1541 /* I32 x I32 x I32 -> I32 */ 1542 /* 32-bit FP idiom, as (eg) happens on ARM */ 1543 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 1544 && finalVty == Ity_I32) { 1545 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n"); 1546 at = va1; 1547 at = mkUifU(mce, Ity_I32, at, va2); 1548 at = mkUifU(mce, Ity_I32, at, va3); 1549 at = mkPCastTo(mce, Ity_I32, at); 1550 return at; 1551 } 1552 1553 /* I32 x I128 x I128 -> I128 */ 1554 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1555 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 1556 && finalVty == Ity_I128) { 1557 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n"); 1558 /* Widen 1st arg to I128. Since 1st arg is typically a rounding 1559 mode indication which is fully defined, this should get 1560 folded out later. */ 1561 at = mkPCastTo(mce, Ity_I128, va1); 1562 /* Now fold in 2nd and 3rd args. */ 1563 at = mkUifU(mce, Ity_I128, at, va2); 1564 at = mkUifU(mce, Ity_I128, at, va3); 1565 /* and PCast once again. */ 1566 at = mkPCastTo(mce, Ity_I128, at); 1567 return at; 1568 } 1569 if (1) { 1570 VG_(printf)("mkLazy3: "); 1571 ppIRType(t1); 1572 VG_(printf)(" x "); 1573 ppIRType(t2); 1574 VG_(printf)(" x "); 1575 ppIRType(t3); 1576 VG_(printf)(" -> "); 1577 ppIRType(finalVty); 1578 VG_(printf)("\n"); 1579 } 1580 1581 tl_assert(0); 1582 /* General case: force everything via 32-bit intermediaries. */ 1583 /* 1584 at = mkPCastTo(mce, Ity_I32, va1); 1585 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1586 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3)); 1587 at = mkPCastTo(mce, finalVty, at); 1588 return at; 1589 */ 1590} 1591 1592 1593/* 4-arg version of the above. */ 1594static 1595IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty, 1596 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 ) 1597{ 1598 IRAtom* at; 1599 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1600 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1601 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1602 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4); 1603 tl_assert(isShadowAtom(mce,va1)); 1604 tl_assert(isShadowAtom(mce,va2)); 1605 tl_assert(isShadowAtom(mce,va3)); 1606 tl_assert(isShadowAtom(mce,va4)); 1607 1608 /* The general case is inefficient because PCast is an expensive 1609 operation. Here are some special cases which use PCast only 1610 twice rather than three times. */ 1611 1612 /* I32 x I64 x I64 x I64 -> I64 */ 1613 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1614 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64 1615 && finalVty == Ity_I64) { 1616 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n"); 1617 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1618 mode indication which is fully defined, this should get 1619 folded out later. */ 1620 at = mkPCastTo(mce, Ity_I64, va1); 1621 /* Now fold in 2nd, 3rd, 4th args. */ 1622 at = mkUifU(mce, Ity_I64, at, va2); 1623 at = mkUifU(mce, Ity_I64, at, va3); 1624 at = mkUifU(mce, Ity_I64, at, va4); 1625 /* and PCast once again. */ 1626 at = mkPCastTo(mce, Ity_I64, at); 1627 return at; 1628 } 1629 /* I32 x I32 x I32 x I32 -> I32 */ 1630 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1631 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32 1632 && finalVty == Ity_I32) { 1633 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n"); 1634 at = va1; 1635 /* Now fold in 2nd, 3rd, 4th args. */ 1636 at = mkUifU(mce, Ity_I32, at, va2); 1637 at = mkUifU(mce, Ity_I32, at, va3); 1638 at = mkUifU(mce, Ity_I32, at, va4); 1639 at = mkPCastTo(mce, Ity_I32, at); 1640 return at; 1641 } 1642 1643 if (1) { 1644 VG_(printf)("mkLazy4: "); 1645 ppIRType(t1); 1646 VG_(printf)(" x "); 1647 ppIRType(t2); 1648 VG_(printf)(" x "); 1649 ppIRType(t3); 1650 VG_(printf)(" x "); 1651 ppIRType(t4); 1652 VG_(printf)(" -> "); 1653 ppIRType(finalVty); 1654 VG_(printf)("\n"); 1655 } 1656 1657 tl_assert(0); 1658} 1659 1660 1661/* Do the lazy propagation game from a null-terminated vector of 1662 atoms. This is presumably the arguments to a helper call, so the 1663 IRCallee info is also supplied in order that we can know which 1664 arguments should be ignored (via the .mcx_mask field). 1665*/ 1666static 1667IRAtom* mkLazyN ( MCEnv* mce, 1668 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 1669{ 1670 Int i; 1671 IRAtom* here; 1672 IRAtom* curr; 1673 IRType mergeTy; 1674 Bool mergeTy64 = True; 1675 1676 /* Decide on the type of the merge intermediary. If all relevant 1677 args are I64, then it's I64. In all other circumstances, use 1678 I32. */ 1679 for (i = 0; exprvec[i]; i++) { 1680 tl_assert(i < 32); 1681 tl_assert(isOriginalAtom(mce, exprvec[i])); 1682 if (cee->mcx_mask & (1<<i)) 1683 continue; 1684 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64) 1685 mergeTy64 = False; 1686 } 1687 1688 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32; 1689 curr = definedOfType(mergeTy); 1690 1691 for (i = 0; exprvec[i]; i++) { 1692 tl_assert(i < 32); 1693 tl_assert(isOriginalAtom(mce, exprvec[i])); 1694 /* Only take notice of this arg if the callee's mc-exclusion 1695 mask does not say it is to be excluded. */ 1696 if (cee->mcx_mask & (1<<i)) { 1697 /* the arg is to be excluded from definedness checking. Do 1698 nothing. */ 1699 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 1700 } else { 1701 /* calculate the arg's definedness, and pessimistically merge 1702 it in. */ 1703 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) ); 1704 curr = mergeTy64 1705 ? mkUifU64(mce, here, curr) 1706 : mkUifU32(mce, here, curr); 1707 } 1708 } 1709 return mkPCastTo(mce, finalVtype, curr ); 1710} 1711 1712 1713/*------------------------------------------------------------*/ 1714/*--- Generating expensive sequences for exact carry-chain ---*/ 1715/*--- propagation in add/sub and related operations. ---*/ 1716/*------------------------------------------------------------*/ 1717 1718static 1719IRAtom* expensiveAddSub ( MCEnv* mce, 1720 Bool add, 1721 IRType ty, 1722 IRAtom* qaa, IRAtom* qbb, 1723 IRAtom* aa, IRAtom* bb ) 1724{ 1725 IRAtom *a_min, *b_min, *a_max, *b_max; 1726 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB; 1727 1728 tl_assert(isShadowAtom(mce,qaa)); 1729 tl_assert(isShadowAtom(mce,qbb)); 1730 tl_assert(isOriginalAtom(mce,aa)); 1731 tl_assert(isOriginalAtom(mce,bb)); 1732 tl_assert(sameKindedAtoms(qaa,aa)); 1733 tl_assert(sameKindedAtoms(qbb,bb)); 1734 1735 switch (ty) { 1736 case Ity_I32: 1737 opAND = Iop_And32; 1738 opOR = Iop_Or32; 1739 opXOR = Iop_Xor32; 1740 opNOT = Iop_Not32; 1741 opADD = Iop_Add32; 1742 opSUB = Iop_Sub32; 1743 break; 1744 case Ity_I64: 1745 opAND = Iop_And64; 1746 opOR = Iop_Or64; 1747 opXOR = Iop_Xor64; 1748 opNOT = Iop_Not64; 1749 opADD = Iop_Add64; 1750 opSUB = Iop_Sub64; 1751 break; 1752 default: 1753 VG_(tool_panic)("expensiveAddSub"); 1754 } 1755 1756 // a_min = aa & ~qaa 1757 a_min = assignNew('V', mce,ty, 1758 binop(opAND, aa, 1759 assignNew('V', mce,ty, unop(opNOT, qaa)))); 1760 1761 // b_min = bb & ~qbb 1762 b_min = assignNew('V', mce,ty, 1763 binop(opAND, bb, 1764 assignNew('V', mce,ty, unop(opNOT, qbb)))); 1765 1766 // a_max = aa | qaa 1767 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa)); 1768 1769 // b_max = bb | qbb 1770 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb)); 1771 1772 if (add) { 1773 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1774 return 1775 assignNew('V', mce,ty, 1776 binop( opOR, 1777 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1778 assignNew('V', mce,ty, 1779 binop( opXOR, 1780 assignNew('V', mce,ty, binop(opADD, a_min, b_min)), 1781 assignNew('V', mce,ty, binop(opADD, a_max, b_max)) 1782 ) 1783 ) 1784 ) 1785 ); 1786 } else { 1787 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min)) 1788 return 1789 assignNew('V', mce,ty, 1790 binop( opOR, 1791 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1792 assignNew('V', mce,ty, 1793 binop( opXOR, 1794 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)), 1795 assignNew('V', mce,ty, binop(opSUB, a_max, b_min)) 1796 ) 1797 ) 1798 ) 1799 ); 1800 } 1801 1802} 1803 1804 1805static 1806IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop, 1807 IRAtom* atom, IRAtom* vatom ) 1808{ 1809 IRType ty; 1810 IROp xorOp, subOp, andOp; 1811 IRExpr *one; 1812 IRAtom *improver, *improved; 1813 tl_assert(isShadowAtom(mce,vatom)); 1814 tl_assert(isOriginalAtom(mce,atom)); 1815 tl_assert(sameKindedAtoms(atom,vatom)); 1816 1817 switch (czop) { 1818 case Iop_Ctz32: 1819 ty = Ity_I32; 1820 xorOp = Iop_Xor32; 1821 subOp = Iop_Sub32; 1822 andOp = Iop_And32; 1823 one = mkU32(1); 1824 break; 1825 case Iop_Ctz64: 1826 ty = Ity_I64; 1827 xorOp = Iop_Xor64; 1828 subOp = Iop_Sub64; 1829 andOp = Iop_And64; 1830 one = mkU64(1); 1831 break; 1832 default: 1833 ppIROp(czop); 1834 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes"); 1835 } 1836 1837 // improver = atom ^ (atom - 1) 1838 // 1839 // That is, improver has its low ctz(atom) bits equal to one; 1840 // higher bits (if any) equal to zero. 1841 improver = assignNew('V', mce,ty, 1842 binop(xorOp, 1843 atom, 1844 assignNew('V', mce, ty, 1845 binop(subOp, atom, one)))); 1846 1847 // improved = vatom & improver 1848 // 1849 // That is, treat any V bits above the first ctz(atom) bits as 1850 // "defined". 1851 improved = assignNew('V', mce, ty, 1852 binop(andOp, vatom, improver)); 1853 1854 // Return pessimizing cast of improved. 1855 return mkPCastTo(mce, ty, improved); 1856} 1857 1858 1859/*------------------------------------------------------------*/ 1860/*--- Scalar shifts. ---*/ 1861/*------------------------------------------------------------*/ 1862 1863/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic 1864 idea is to shift the definedness bits by the original shift amount. 1865 This introduces 0s ("defined") in new positions for left shifts and 1866 unsigned right shifts, and copies the top definedness bit for 1867 signed right shifts. So, conveniently, applying the original shift 1868 operator to the definedness bits for the left arg is exactly the 1869 right thing to do: 1870 1871 (qaa << bb) 1872 1873 However if the shift amount is undefined then the whole result 1874 is undefined. Hence need: 1875 1876 (qaa << bb) `UifU` PCast(qbb) 1877 1878 If the shift amount bb is a literal than qbb will say 'all defined' 1879 and the UifU and PCast will get folded out by post-instrumentation 1880 optimisation. 1881*/ 1882static IRAtom* scalarShift ( MCEnv* mce, 1883 IRType ty, 1884 IROp original_op, 1885 IRAtom* qaa, IRAtom* qbb, 1886 IRAtom* aa, IRAtom* bb ) 1887{ 1888 tl_assert(isShadowAtom(mce,qaa)); 1889 tl_assert(isShadowAtom(mce,qbb)); 1890 tl_assert(isOriginalAtom(mce,aa)); 1891 tl_assert(isOriginalAtom(mce,bb)); 1892 tl_assert(sameKindedAtoms(qaa,aa)); 1893 tl_assert(sameKindedAtoms(qbb,bb)); 1894 return 1895 assignNew( 1896 'V', mce, ty, 1897 mkUifU( mce, ty, 1898 assignNew('V', mce, ty, binop(original_op, qaa, bb)), 1899 mkPCastTo(mce, ty, qbb) 1900 ) 1901 ); 1902} 1903 1904 1905/*------------------------------------------------------------*/ 1906/*--- Helpers for dealing with vector primops. ---*/ 1907/*------------------------------------------------------------*/ 1908 1909/* Vector pessimisation -- pessimise within each lane individually. */ 1910 1911static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 1912{ 1913 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 1914} 1915 1916static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 1917{ 1918 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 1919} 1920 1921static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 1922{ 1923 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 1924} 1925 1926static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 1927{ 1928 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 1929} 1930 1931static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at ) 1932{ 1933 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at)); 1934} 1935 1936static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at ) 1937{ 1938 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at)); 1939} 1940 1941static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at ) 1942{ 1943 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at)); 1944} 1945 1946static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at ) 1947{ 1948 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at)); 1949} 1950 1951static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at ) 1952{ 1953 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at)); 1954} 1955 1956static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at ) 1957{ 1958 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at)); 1959} 1960 1961static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at ) 1962{ 1963 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at)); 1964} 1965 1966 1967/* Here's a simple scheme capable of handling ops derived from SSE1 1968 code and while only generating ops that can be efficiently 1969 implemented in SSE1. */ 1970 1971/* All-lanes versions are straightforward: 1972 1973 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 1974 1975 unary32Fx4(x,y) ==> PCast32x4(x#) 1976 1977 Lowest-lane-only versions are more complex: 1978 1979 binary32F0x4(x,y) ==> SetV128lo32( 1980 x#, 1981 PCast32(V128to32(UifUV128(x#,y#))) 1982 ) 1983 1984 This is perhaps not so obvious. In particular, it's faster to 1985 do a V128-bit UifU and then take the bottom 32 bits than the more 1986 obvious scheme of taking the bottom 32 bits of each operand 1987 and doing a 32-bit UifU. Basically since UifU is fast and 1988 chopping lanes off vector values is slow. 1989 1990 Finally: 1991 1992 unary32F0x4(x) ==> SetV128lo32( 1993 x#, 1994 PCast32(V128to32(x#)) 1995 ) 1996 1997 Where: 1998 1999 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 2000 PCast32x4(v#) = CmpNEZ32x4(v#) 2001*/ 2002 2003static 2004IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2005{ 2006 IRAtom* at; 2007 tl_assert(isShadowAtom(mce, vatomX)); 2008 tl_assert(isShadowAtom(mce, vatomY)); 2009 at = mkUifUV128(mce, vatomX, vatomY); 2010 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at)); 2011 return at; 2012} 2013 2014static 2015IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2016{ 2017 IRAtom* at; 2018 tl_assert(isShadowAtom(mce, vatomX)); 2019 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX)); 2020 return at; 2021} 2022 2023static 2024IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2025{ 2026 IRAtom* at; 2027 tl_assert(isShadowAtom(mce, vatomX)); 2028 tl_assert(isShadowAtom(mce, vatomY)); 2029 at = mkUifUV128(mce, vatomX, vatomY); 2030 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at)); 2031 at = mkPCastTo(mce, Ity_I32, at); 2032 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 2033 return at; 2034} 2035 2036static 2037IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 2038{ 2039 IRAtom* at; 2040 tl_assert(isShadowAtom(mce, vatomX)); 2041 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX)); 2042 at = mkPCastTo(mce, Ity_I32, at); 2043 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 2044 return at; 2045} 2046 2047/* --- ... and ... 64Fx2 versions of the same ... --- */ 2048 2049static 2050IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2051{ 2052 IRAtom* at; 2053 tl_assert(isShadowAtom(mce, vatomX)); 2054 tl_assert(isShadowAtom(mce, vatomY)); 2055 at = mkUifUV128(mce, vatomX, vatomY); 2056 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at)); 2057 return at; 2058} 2059 2060static 2061IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2062{ 2063 IRAtom* at; 2064 tl_assert(isShadowAtom(mce, vatomX)); 2065 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX)); 2066 return at; 2067} 2068 2069static 2070IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2071{ 2072 IRAtom* at; 2073 tl_assert(isShadowAtom(mce, vatomX)); 2074 tl_assert(isShadowAtom(mce, vatomY)); 2075 at = mkUifUV128(mce, vatomX, vatomY); 2076 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at)); 2077 at = mkPCastTo(mce, Ity_I64, at); 2078 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2079 return at; 2080} 2081 2082static 2083IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 2084{ 2085 IRAtom* at; 2086 tl_assert(isShadowAtom(mce, vatomX)); 2087 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX)); 2088 at = mkPCastTo(mce, Ity_I64, at); 2089 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2090 return at; 2091} 2092 2093/* --- --- ... and ... 32Fx2 versions of the same --- --- */ 2094 2095static 2096IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2097{ 2098 IRAtom* at; 2099 tl_assert(isShadowAtom(mce, vatomX)); 2100 tl_assert(isShadowAtom(mce, vatomY)); 2101 at = mkUifU64(mce, vatomX, vatomY); 2102 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at)); 2103 return at; 2104} 2105 2106static 2107IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2108{ 2109 IRAtom* at; 2110 tl_assert(isShadowAtom(mce, vatomX)); 2111 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX)); 2112 return at; 2113} 2114 2115/* --- ... and ... 64Fx4 versions of the same ... --- */ 2116 2117static 2118IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2119{ 2120 IRAtom* at; 2121 tl_assert(isShadowAtom(mce, vatomX)); 2122 tl_assert(isShadowAtom(mce, vatomY)); 2123 at = mkUifUV256(mce, vatomX, vatomY); 2124 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at)); 2125 return at; 2126} 2127 2128static 2129IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2130{ 2131 IRAtom* at; 2132 tl_assert(isShadowAtom(mce, vatomX)); 2133 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX)); 2134 return at; 2135} 2136 2137/* --- ... and ... 32Fx8 versions of the same ... --- */ 2138 2139static 2140IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2141{ 2142 IRAtom* at; 2143 tl_assert(isShadowAtom(mce, vatomX)); 2144 tl_assert(isShadowAtom(mce, vatomY)); 2145 at = mkUifUV256(mce, vatomX, vatomY); 2146 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at)); 2147 return at; 2148} 2149 2150static 2151IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX ) 2152{ 2153 IRAtom* at; 2154 tl_assert(isShadowAtom(mce, vatomX)); 2155 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX)); 2156 return at; 2157} 2158 2159/* --- --- Vector saturated narrowing --- --- */ 2160 2161/* We used to do something very clever here, but on closer inspection 2162 (2011-Jun-15), and in particular bug #279698, it turns out to be 2163 wrong. Part of the problem came from the fact that for a long 2164 time, the IR primops to do with saturated narrowing were 2165 underspecified and managed to confuse multiple cases which needed 2166 to be separate: the op names had a signedness qualifier, but in 2167 fact the source and destination signednesses needed to be specified 2168 independently, so the op names really need two independent 2169 signedness specifiers. 2170 2171 As of 2011-Jun-15 (ish) the underspecification was sorted out 2172 properly. The incorrect instrumentation remained, though. That 2173 has now (2011-Oct-22) been fixed. 2174 2175 What we now do is simple: 2176 2177 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a 2178 number of lanes, X is the source lane width and signedness, and Y 2179 is the destination lane width and signedness. In all cases the 2180 destination lane width is half the source lane width, so the names 2181 have a bit of redundancy, but are at least easy to read. 2182 2183 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s 2184 to unsigned 16s. 2185 2186 Let Vanilla(OP) be a function that takes OP, one of these 2187 saturating narrowing ops, and produces the same "shaped" narrowing 2188 op which is not saturating, but merely dumps the most significant 2189 bits. "same shape" means that the lane numbers and widths are the 2190 same as with OP. 2191 2192 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8) 2193 = Iop_NarrowBin32to16x8, 2194 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by 2195 dumping the top half of each lane. 2196 2197 So, with that in place, the scheme is simple, and it is simple to 2198 pessimise each lane individually and then apply Vanilla(OP) so as 2199 to get the result in the right "shape". If the original OP is 2200 QNarrowBinXtoYxZ then we produce 2201 2202 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) ) 2203 2204 or for the case when OP is unary (Iop_QNarrowUn*) 2205 2206 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) ) 2207*/ 2208static 2209IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp ) 2210{ 2211 switch (qnarrowOp) { 2212 /* Binary: (128, 128) -> 128 */ 2213 case Iop_QNarrowBin16Sto8Ux16: 2214 case Iop_QNarrowBin16Sto8Sx16: 2215 case Iop_QNarrowBin16Uto8Ux16: 2216 return Iop_NarrowBin16to8x16; 2217 case Iop_QNarrowBin32Sto16Ux8: 2218 case Iop_QNarrowBin32Sto16Sx8: 2219 case Iop_QNarrowBin32Uto16Ux8: 2220 return Iop_NarrowBin32to16x8; 2221 /* Binary: (64, 64) -> 64 */ 2222 case Iop_QNarrowBin32Sto16Sx4: 2223 return Iop_NarrowBin32to16x4; 2224 case Iop_QNarrowBin16Sto8Ux8: 2225 case Iop_QNarrowBin16Sto8Sx8: 2226 return Iop_NarrowBin16to8x8; 2227 /* Unary: 128 -> 64 */ 2228 case Iop_QNarrowUn64Uto32Ux2: 2229 case Iop_QNarrowUn64Sto32Sx2: 2230 case Iop_QNarrowUn64Sto32Ux2: 2231 return Iop_NarrowUn64to32x2; 2232 case Iop_QNarrowUn32Uto16Ux4: 2233 case Iop_QNarrowUn32Sto16Sx4: 2234 case Iop_QNarrowUn32Sto16Ux4: 2235 return Iop_NarrowUn32to16x4; 2236 case Iop_QNarrowUn16Uto8Ux8: 2237 case Iop_QNarrowUn16Sto8Sx8: 2238 case Iop_QNarrowUn16Sto8Ux8: 2239 return Iop_NarrowUn16to8x8; 2240 default: 2241 ppIROp(qnarrowOp); 2242 VG_(tool_panic)("vanillaNarrowOpOfShape"); 2243 } 2244} 2245 2246static 2247IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op, 2248 IRAtom* vatom1, IRAtom* vatom2) 2249{ 2250 IRAtom *at1, *at2, *at3; 2251 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2252 switch (narrow_op) { 2253 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break; 2254 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break; 2255 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break; 2256 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break; 2257 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break; 2258 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break; 2259 default: VG_(tool_panic)("vectorNarrowBinV128"); 2260 } 2261 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2262 tl_assert(isShadowAtom(mce,vatom1)); 2263 tl_assert(isShadowAtom(mce,vatom2)); 2264 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2265 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2)); 2266 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2)); 2267 return at3; 2268} 2269 2270static 2271IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op, 2272 IRAtom* vatom1, IRAtom* vatom2) 2273{ 2274 IRAtom *at1, *at2, *at3; 2275 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2276 switch (narrow_op) { 2277 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break; 2278 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break; 2279 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break; 2280 default: VG_(tool_panic)("vectorNarrowBin64"); 2281 } 2282 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2283 tl_assert(isShadowAtom(mce,vatom1)); 2284 tl_assert(isShadowAtom(mce,vatom2)); 2285 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1)); 2286 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2)); 2287 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2)); 2288 return at3; 2289} 2290 2291static 2292IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op, 2293 IRAtom* vatom1) 2294{ 2295 IRAtom *at1, *at2; 2296 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2297 tl_assert(isShadowAtom(mce,vatom1)); 2298 /* For vanilla narrowing (non-saturating), we can just apply 2299 the op directly to the V bits. */ 2300 switch (narrow_op) { 2301 case Iop_NarrowUn16to8x8: 2302 case Iop_NarrowUn32to16x4: 2303 case Iop_NarrowUn64to32x2: 2304 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1)); 2305 return at1; 2306 default: 2307 break; /* Do Plan B */ 2308 } 2309 /* Plan B: for ops that involve a saturation operation on the args, 2310 we must PCast before the vanilla narrow. */ 2311 switch (narrow_op) { 2312 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break; 2313 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break; 2314 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break; 2315 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break; 2316 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break; 2317 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break; 2318 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break; 2319 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break; 2320 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break; 2321 default: VG_(tool_panic)("vectorNarrowUnV128"); 2322 } 2323 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2324 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2325 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1)); 2326 return at2; 2327} 2328 2329static 2330IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op, 2331 IRAtom* vatom1) 2332{ 2333 IRAtom *at1, *at2; 2334 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2335 switch (longen_op) { 2336 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break; 2337 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break; 2338 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break; 2339 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break; 2340 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break; 2341 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break; 2342 default: VG_(tool_panic)("vectorWidenI64"); 2343 } 2344 tl_assert(isShadowAtom(mce,vatom1)); 2345 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1)); 2346 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1)); 2347 return at2; 2348} 2349 2350 2351/* --- --- Vector integer arithmetic --- --- */ 2352 2353/* Simple ... UifU the args and per-lane pessimise the results. */ 2354 2355/* --- V128-bit versions --- */ 2356 2357static 2358IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2359{ 2360 IRAtom* at; 2361 at = mkUifUV128(mce, vatom1, vatom2); 2362 at = mkPCast8x16(mce, at); 2363 return at; 2364} 2365 2366static 2367IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2368{ 2369 IRAtom* at; 2370 at = mkUifUV128(mce, vatom1, vatom2); 2371 at = mkPCast16x8(mce, at); 2372 return at; 2373} 2374 2375static 2376IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2377{ 2378 IRAtom* at; 2379 at = mkUifUV128(mce, vatom1, vatom2); 2380 at = mkPCast32x4(mce, at); 2381 return at; 2382} 2383 2384static 2385IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2386{ 2387 IRAtom* at; 2388 at = mkUifUV128(mce, vatom1, vatom2); 2389 at = mkPCast64x2(mce, at); 2390 return at; 2391} 2392 2393/* --- 64-bit versions --- */ 2394 2395static 2396IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2397{ 2398 IRAtom* at; 2399 at = mkUifU64(mce, vatom1, vatom2); 2400 at = mkPCast8x8(mce, at); 2401 return at; 2402} 2403 2404static 2405IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2406{ 2407 IRAtom* at; 2408 at = mkUifU64(mce, vatom1, vatom2); 2409 at = mkPCast16x4(mce, at); 2410 return at; 2411} 2412 2413static 2414IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2415{ 2416 IRAtom* at; 2417 at = mkUifU64(mce, vatom1, vatom2); 2418 at = mkPCast32x2(mce, at); 2419 return at; 2420} 2421 2422static 2423IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2424{ 2425 IRAtom* at; 2426 at = mkUifU64(mce, vatom1, vatom2); 2427 at = mkPCastTo(mce, Ity_I64, at); 2428 return at; 2429} 2430 2431/* --- 32-bit versions --- */ 2432 2433static 2434IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2435{ 2436 IRAtom* at; 2437 at = mkUifU32(mce, vatom1, vatom2); 2438 at = mkPCast8x4(mce, at); 2439 return at; 2440} 2441 2442static 2443IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2444{ 2445 IRAtom* at; 2446 at = mkUifU32(mce, vatom1, vatom2); 2447 at = mkPCast16x2(mce, at); 2448 return at; 2449} 2450 2451 2452/*------------------------------------------------------------*/ 2453/*--- Generate shadow values from all kinds of IRExprs. ---*/ 2454/*------------------------------------------------------------*/ 2455 2456static 2457IRAtom* expr2vbits_Qop ( MCEnv* mce, 2458 IROp op, 2459 IRAtom* atom1, IRAtom* atom2, 2460 IRAtom* atom3, IRAtom* atom4 ) 2461{ 2462 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2463 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2464 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2465 IRAtom* vatom4 = expr2vbits( mce, atom4 ); 2466 2467 tl_assert(isOriginalAtom(mce,atom1)); 2468 tl_assert(isOriginalAtom(mce,atom2)); 2469 tl_assert(isOriginalAtom(mce,atom3)); 2470 tl_assert(isOriginalAtom(mce,atom4)); 2471 tl_assert(isShadowAtom(mce,vatom1)); 2472 tl_assert(isShadowAtom(mce,vatom2)); 2473 tl_assert(isShadowAtom(mce,vatom3)); 2474 tl_assert(isShadowAtom(mce,vatom4)); 2475 tl_assert(sameKindedAtoms(atom1,vatom1)); 2476 tl_assert(sameKindedAtoms(atom2,vatom2)); 2477 tl_assert(sameKindedAtoms(atom3,vatom3)); 2478 tl_assert(sameKindedAtoms(atom4,vatom4)); 2479 switch (op) { 2480 case Iop_MAddF64: 2481 case Iop_MAddF64r32: 2482 case Iop_MSubF64: 2483 case Iop_MSubF64r32: 2484 /* I32(rm) x F64 x F64 x F64 -> F64 */ 2485 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4); 2486 2487 case Iop_MAddF32: 2488 case Iop_MSubF32: 2489 /* I32(rm) x F32 x F32 x F32 -> F32 */ 2490 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4); 2491 2492 /* V256-bit data-steering */ 2493 case Iop_64x4toV256: 2494 return assignNew('V', mce, Ity_V256, 2495 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4)); 2496 2497 default: 2498 ppIROp(op); 2499 VG_(tool_panic)("memcheck:expr2vbits_Qop"); 2500 } 2501} 2502 2503 2504static 2505IRAtom* expr2vbits_Triop ( MCEnv* mce, 2506 IROp op, 2507 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 ) 2508{ 2509 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2510 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2511 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2512 2513 tl_assert(isOriginalAtom(mce,atom1)); 2514 tl_assert(isOriginalAtom(mce,atom2)); 2515 tl_assert(isOriginalAtom(mce,atom3)); 2516 tl_assert(isShadowAtom(mce,vatom1)); 2517 tl_assert(isShadowAtom(mce,vatom2)); 2518 tl_assert(isShadowAtom(mce,vatom3)); 2519 tl_assert(sameKindedAtoms(atom1,vatom1)); 2520 tl_assert(sameKindedAtoms(atom2,vatom2)); 2521 tl_assert(sameKindedAtoms(atom3,vatom3)); 2522 switch (op) { 2523 case Iop_AddF128: 2524 case Iop_AddD128: 2525 case Iop_SubF128: 2526 case Iop_SubD128: 2527 case Iop_MulF128: 2528 case Iop_MulD128: 2529 case Iop_DivF128: 2530 case Iop_DivD128: 2531 case Iop_QuantizeD128: 2532 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */ 2533 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2534 case Iop_AddF64: 2535 case Iop_AddD64: 2536 case Iop_AddF64r32: 2537 case Iop_SubF64: 2538 case Iop_SubD64: 2539 case Iop_SubF64r32: 2540 case Iop_MulF64: 2541 case Iop_MulD64: 2542 case Iop_MulF64r32: 2543 case Iop_DivF64: 2544 case Iop_DivD64: 2545 case Iop_DivF64r32: 2546 case Iop_ScaleF64: 2547 case Iop_Yl2xF64: 2548 case Iop_Yl2xp1F64: 2549 case Iop_AtanF64: 2550 case Iop_PRemF64: 2551 case Iop_PRem1F64: 2552 case Iop_QuantizeD64: 2553 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */ 2554 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2555 case Iop_PRemC3210F64: 2556 case Iop_PRem1C3210F64: 2557 /* I32(rm) x F64 x F64 -> I32 */ 2558 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2559 case Iop_AddF32: 2560 case Iop_SubF32: 2561 case Iop_MulF32: 2562 case Iop_DivF32: 2563 /* I32(rm) x F32 x F32 -> I32 */ 2564 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2565 case Iop_SignificanceRoundD64: 2566 /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */ 2567 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2568 case Iop_SignificanceRoundD128: 2569 /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */ 2570 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2571 case Iop_ExtractV128: 2572 complainIfUndefined(mce, atom3, NULL); 2573 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2574 case Iop_Extract64: 2575 complainIfUndefined(mce, atom3, NULL); 2576 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3)); 2577 case Iop_SetElem8x8: 2578 case Iop_SetElem16x4: 2579 case Iop_SetElem32x2: 2580 complainIfUndefined(mce, atom2, NULL); 2581 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3)); 2582 default: 2583 ppIROp(op); 2584 VG_(tool_panic)("memcheck:expr2vbits_Triop"); 2585 } 2586} 2587 2588 2589static 2590IRAtom* expr2vbits_Binop ( MCEnv* mce, 2591 IROp op, 2592 IRAtom* atom1, IRAtom* atom2 ) 2593{ 2594 IRType and_or_ty; 2595 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 2596 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 2597 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 2598 2599 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2600 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2601 2602 tl_assert(isOriginalAtom(mce,atom1)); 2603 tl_assert(isOriginalAtom(mce,atom2)); 2604 tl_assert(isShadowAtom(mce,vatom1)); 2605 tl_assert(isShadowAtom(mce,vatom2)); 2606 tl_assert(sameKindedAtoms(atom1,vatom1)); 2607 tl_assert(sameKindedAtoms(atom2,vatom2)); 2608 switch (op) { 2609 2610 /* 32-bit SIMD */ 2611 2612 case Iop_Add16x2: 2613 case Iop_HAdd16Ux2: 2614 case Iop_HAdd16Sx2: 2615 case Iop_Sub16x2: 2616 case Iop_HSub16Ux2: 2617 case Iop_HSub16Sx2: 2618 case Iop_QAdd16Sx2: 2619 case Iop_QSub16Sx2: 2620 case Iop_QSub16Ux2: 2621 return binary16Ix2(mce, vatom1, vatom2); 2622 2623 case Iop_Add8x4: 2624 case Iop_HAdd8Ux4: 2625 case Iop_HAdd8Sx4: 2626 case Iop_Sub8x4: 2627 case Iop_HSub8Ux4: 2628 case Iop_HSub8Sx4: 2629 case Iop_QSub8Ux4: 2630 case Iop_QAdd8Ux4: 2631 case Iop_QSub8Sx4: 2632 case Iop_QAdd8Sx4: 2633 return binary8Ix4(mce, vatom1, vatom2); 2634 2635 /* 64-bit SIMD */ 2636 2637 case Iop_ShrN8x8: 2638 case Iop_ShrN16x4: 2639 case Iop_ShrN32x2: 2640 case Iop_SarN8x8: 2641 case Iop_SarN16x4: 2642 case Iop_SarN32x2: 2643 case Iop_ShlN16x4: 2644 case Iop_ShlN32x2: 2645 case Iop_ShlN8x8: 2646 /* Same scheme as with all other shifts. */ 2647 complainIfUndefined(mce, atom2, NULL); 2648 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 2649 2650 case Iop_QNarrowBin32Sto16Sx4: 2651 case Iop_QNarrowBin16Sto8Sx8: 2652 case Iop_QNarrowBin16Sto8Ux8: 2653 return vectorNarrowBin64(mce, op, vatom1, vatom2); 2654 2655 case Iop_Min8Ux8: 2656 case Iop_Min8Sx8: 2657 case Iop_Max8Ux8: 2658 case Iop_Max8Sx8: 2659 case Iop_Avg8Ux8: 2660 case Iop_QSub8Sx8: 2661 case Iop_QSub8Ux8: 2662 case Iop_Sub8x8: 2663 case Iop_CmpGT8Sx8: 2664 case Iop_CmpGT8Ux8: 2665 case Iop_CmpEQ8x8: 2666 case Iop_QAdd8Sx8: 2667 case Iop_QAdd8Ux8: 2668 case Iop_QSal8x8: 2669 case Iop_QShl8x8: 2670 case Iop_Add8x8: 2671 case Iop_Mul8x8: 2672 case Iop_PolynomialMul8x8: 2673 return binary8Ix8(mce, vatom1, vatom2); 2674 2675 case Iop_Min16Sx4: 2676 case Iop_Min16Ux4: 2677 case Iop_Max16Sx4: 2678 case Iop_Max16Ux4: 2679 case Iop_Avg16Ux4: 2680 case Iop_QSub16Ux4: 2681 case Iop_QSub16Sx4: 2682 case Iop_Sub16x4: 2683 case Iop_Mul16x4: 2684 case Iop_MulHi16Sx4: 2685 case Iop_MulHi16Ux4: 2686 case Iop_CmpGT16Sx4: 2687 case Iop_CmpGT16Ux4: 2688 case Iop_CmpEQ16x4: 2689 case Iop_QAdd16Sx4: 2690 case Iop_QAdd16Ux4: 2691 case Iop_QSal16x4: 2692 case Iop_QShl16x4: 2693 case Iop_Add16x4: 2694 case Iop_QDMulHi16Sx4: 2695 case Iop_QRDMulHi16Sx4: 2696 return binary16Ix4(mce, vatom1, vatom2); 2697 2698 case Iop_Sub32x2: 2699 case Iop_Mul32x2: 2700 case Iop_Max32Sx2: 2701 case Iop_Max32Ux2: 2702 case Iop_Min32Sx2: 2703 case Iop_Min32Ux2: 2704 case Iop_CmpGT32Sx2: 2705 case Iop_CmpGT32Ux2: 2706 case Iop_CmpEQ32x2: 2707 case Iop_Add32x2: 2708 case Iop_QAdd32Ux2: 2709 case Iop_QAdd32Sx2: 2710 case Iop_QSub32Ux2: 2711 case Iop_QSub32Sx2: 2712 case Iop_QSal32x2: 2713 case Iop_QShl32x2: 2714 case Iop_QDMulHi32Sx2: 2715 case Iop_QRDMulHi32Sx2: 2716 return binary32Ix2(mce, vatom1, vatom2); 2717 2718 case Iop_QSub64Ux1: 2719 case Iop_QSub64Sx1: 2720 case Iop_QAdd64Ux1: 2721 case Iop_QAdd64Sx1: 2722 case Iop_QSal64x1: 2723 case Iop_QShl64x1: 2724 case Iop_Sal64x1: 2725 return binary64Ix1(mce, vatom1, vatom2); 2726 2727 case Iop_QShlN8Sx8: 2728 case Iop_QShlN8x8: 2729 case Iop_QSalN8x8: 2730 complainIfUndefined(mce, atom2, NULL); 2731 return mkPCast8x8(mce, vatom1); 2732 2733 case Iop_QShlN16Sx4: 2734 case Iop_QShlN16x4: 2735 case Iop_QSalN16x4: 2736 complainIfUndefined(mce, atom2, NULL); 2737 return mkPCast16x4(mce, vatom1); 2738 2739 case Iop_QShlN32Sx2: 2740 case Iop_QShlN32x2: 2741 case Iop_QSalN32x2: 2742 complainIfUndefined(mce, atom2, NULL); 2743 return mkPCast32x2(mce, vatom1); 2744 2745 case Iop_QShlN64Sx1: 2746 case Iop_QShlN64x1: 2747 case Iop_QSalN64x1: 2748 complainIfUndefined(mce, atom2, NULL); 2749 return mkPCast32x2(mce, vatom1); 2750 2751 case Iop_PwMax32Sx2: 2752 case Iop_PwMax32Ux2: 2753 case Iop_PwMin32Sx2: 2754 case Iop_PwMin32Ux2: 2755 case Iop_PwMax32Fx2: 2756 case Iop_PwMin32Fx2: 2757 return assignNew('V', mce, Ity_I64, 2758 binop(Iop_PwMax32Ux2, 2759 mkPCast32x2(mce, vatom1), 2760 mkPCast32x2(mce, vatom2))); 2761 2762 case Iop_PwMax16Sx4: 2763 case Iop_PwMax16Ux4: 2764 case Iop_PwMin16Sx4: 2765 case Iop_PwMin16Ux4: 2766 return assignNew('V', mce, Ity_I64, 2767 binop(Iop_PwMax16Ux4, 2768 mkPCast16x4(mce, vatom1), 2769 mkPCast16x4(mce, vatom2))); 2770 2771 case Iop_PwMax8Sx8: 2772 case Iop_PwMax8Ux8: 2773 case Iop_PwMin8Sx8: 2774 case Iop_PwMin8Ux8: 2775 return assignNew('V', mce, Ity_I64, 2776 binop(Iop_PwMax8Ux8, 2777 mkPCast8x8(mce, vatom1), 2778 mkPCast8x8(mce, vatom2))); 2779 2780 case Iop_PwAdd32x2: 2781 case Iop_PwAdd32Fx2: 2782 return mkPCast32x2(mce, 2783 assignNew('V', mce, Ity_I64, 2784 binop(Iop_PwAdd32x2, 2785 mkPCast32x2(mce, vatom1), 2786 mkPCast32x2(mce, vatom2)))); 2787 2788 case Iop_PwAdd16x4: 2789 return mkPCast16x4(mce, 2790 assignNew('V', mce, Ity_I64, 2791 binop(op, mkPCast16x4(mce, vatom1), 2792 mkPCast16x4(mce, vatom2)))); 2793 2794 case Iop_PwAdd8x8: 2795 return mkPCast8x8(mce, 2796 assignNew('V', mce, Ity_I64, 2797 binop(op, mkPCast8x8(mce, vatom1), 2798 mkPCast8x8(mce, vatom2)))); 2799 2800 case Iop_Shl8x8: 2801 case Iop_Shr8x8: 2802 case Iop_Sar8x8: 2803 case Iop_Sal8x8: 2804 return mkUifU64(mce, 2805 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2806 mkPCast8x8(mce,vatom2) 2807 ); 2808 2809 case Iop_Shl16x4: 2810 case Iop_Shr16x4: 2811 case Iop_Sar16x4: 2812 case Iop_Sal16x4: 2813 return mkUifU64(mce, 2814 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2815 mkPCast16x4(mce,vatom2) 2816 ); 2817 2818 case Iop_Shl32x2: 2819 case Iop_Shr32x2: 2820 case Iop_Sar32x2: 2821 case Iop_Sal32x2: 2822 return mkUifU64(mce, 2823 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2824 mkPCast32x2(mce,vatom2) 2825 ); 2826 2827 /* 64-bit data-steering */ 2828 case Iop_InterleaveLO32x2: 2829 case Iop_InterleaveLO16x4: 2830 case Iop_InterleaveLO8x8: 2831 case Iop_InterleaveHI32x2: 2832 case Iop_InterleaveHI16x4: 2833 case Iop_InterleaveHI8x8: 2834 case Iop_CatOddLanes8x8: 2835 case Iop_CatEvenLanes8x8: 2836 case Iop_CatOddLanes16x4: 2837 case Iop_CatEvenLanes16x4: 2838 case Iop_InterleaveOddLanes8x8: 2839 case Iop_InterleaveEvenLanes8x8: 2840 case Iop_InterleaveOddLanes16x4: 2841 case Iop_InterleaveEvenLanes16x4: 2842 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 2843 2844 case Iop_GetElem8x8: 2845 complainIfUndefined(mce, atom2, NULL); 2846 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 2847 case Iop_GetElem16x4: 2848 complainIfUndefined(mce, atom2, NULL); 2849 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 2850 case Iop_GetElem32x2: 2851 complainIfUndefined(mce, atom2, NULL); 2852 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 2853 2854 /* Perm8x8: rearrange values in left arg using steering values 2855 from right arg. So rearrange the vbits in the same way but 2856 pessimise wrt steering values. */ 2857 case Iop_Perm8x8: 2858 return mkUifU64( 2859 mce, 2860 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2861 mkPCast8x8(mce, vatom2) 2862 ); 2863 2864 /* V128-bit SIMD */ 2865 2866 case Iop_ShrN8x16: 2867 case Iop_ShrN16x8: 2868 case Iop_ShrN32x4: 2869 case Iop_ShrN64x2: 2870 case Iop_SarN8x16: 2871 case Iop_SarN16x8: 2872 case Iop_SarN32x4: 2873 case Iop_SarN64x2: 2874 case Iop_ShlN8x16: 2875 case Iop_ShlN16x8: 2876 case Iop_ShlN32x4: 2877 case Iop_ShlN64x2: 2878 /* Same scheme as with all other shifts. Note: 22 Oct 05: 2879 this is wrong now, scalar shifts are done properly lazily. 2880 Vector shifts should be fixed too. */ 2881 complainIfUndefined(mce, atom2, NULL); 2882 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 2883 2884 /* V x V shifts/rotates are done using the standard lazy scheme. */ 2885 case Iop_Shl8x16: 2886 case Iop_Shr8x16: 2887 case Iop_Sar8x16: 2888 case Iop_Sal8x16: 2889 case Iop_Rol8x16: 2890 return mkUifUV128(mce, 2891 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2892 mkPCast8x16(mce,vatom2) 2893 ); 2894 2895 case Iop_Shl16x8: 2896 case Iop_Shr16x8: 2897 case Iop_Sar16x8: 2898 case Iop_Sal16x8: 2899 case Iop_Rol16x8: 2900 return mkUifUV128(mce, 2901 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2902 mkPCast16x8(mce,vatom2) 2903 ); 2904 2905 case Iop_Shl32x4: 2906 case Iop_Shr32x4: 2907 case Iop_Sar32x4: 2908 case Iop_Sal32x4: 2909 case Iop_Rol32x4: 2910 return mkUifUV128(mce, 2911 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2912 mkPCast32x4(mce,vatom2) 2913 ); 2914 2915 case Iop_Shl64x2: 2916 case Iop_Shr64x2: 2917 case Iop_Sar64x2: 2918 case Iop_Sal64x2: 2919 return mkUifUV128(mce, 2920 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2921 mkPCast64x2(mce,vatom2) 2922 ); 2923 2924 case Iop_F32ToFixed32Ux4_RZ: 2925 case Iop_F32ToFixed32Sx4_RZ: 2926 case Iop_Fixed32UToF32x4_RN: 2927 case Iop_Fixed32SToF32x4_RN: 2928 complainIfUndefined(mce, atom2, NULL); 2929 return mkPCast32x4(mce, vatom1); 2930 2931 case Iop_F32ToFixed32Ux2_RZ: 2932 case Iop_F32ToFixed32Sx2_RZ: 2933 case Iop_Fixed32UToF32x2_RN: 2934 case Iop_Fixed32SToF32x2_RN: 2935 complainIfUndefined(mce, atom2, NULL); 2936 return mkPCast32x2(mce, vatom1); 2937 2938 case Iop_QSub8Ux16: 2939 case Iop_QSub8Sx16: 2940 case Iop_Sub8x16: 2941 case Iop_Min8Ux16: 2942 case Iop_Min8Sx16: 2943 case Iop_Max8Ux16: 2944 case Iop_Max8Sx16: 2945 case Iop_CmpGT8Sx16: 2946 case Iop_CmpGT8Ux16: 2947 case Iop_CmpEQ8x16: 2948 case Iop_Avg8Ux16: 2949 case Iop_Avg8Sx16: 2950 case Iop_QAdd8Ux16: 2951 case Iop_QAdd8Sx16: 2952 case Iop_QSal8x16: 2953 case Iop_QShl8x16: 2954 case Iop_Add8x16: 2955 case Iop_Mul8x16: 2956 case Iop_PolynomialMul8x16: 2957 return binary8Ix16(mce, vatom1, vatom2); 2958 2959 case Iop_QSub16Ux8: 2960 case Iop_QSub16Sx8: 2961 case Iop_Sub16x8: 2962 case Iop_Mul16x8: 2963 case Iop_MulHi16Sx8: 2964 case Iop_MulHi16Ux8: 2965 case Iop_Min16Sx8: 2966 case Iop_Min16Ux8: 2967 case Iop_Max16Sx8: 2968 case Iop_Max16Ux8: 2969 case Iop_CmpGT16Sx8: 2970 case Iop_CmpGT16Ux8: 2971 case Iop_CmpEQ16x8: 2972 case Iop_Avg16Ux8: 2973 case Iop_Avg16Sx8: 2974 case Iop_QAdd16Ux8: 2975 case Iop_QAdd16Sx8: 2976 case Iop_QSal16x8: 2977 case Iop_QShl16x8: 2978 case Iop_Add16x8: 2979 case Iop_QDMulHi16Sx8: 2980 case Iop_QRDMulHi16Sx8: 2981 return binary16Ix8(mce, vatom1, vatom2); 2982 2983 case Iop_Sub32x4: 2984 case Iop_CmpGT32Sx4: 2985 case Iop_CmpGT32Ux4: 2986 case Iop_CmpEQ32x4: 2987 case Iop_QAdd32Sx4: 2988 case Iop_QAdd32Ux4: 2989 case Iop_QSub32Sx4: 2990 case Iop_QSub32Ux4: 2991 case Iop_QSal32x4: 2992 case Iop_QShl32x4: 2993 case Iop_Avg32Ux4: 2994 case Iop_Avg32Sx4: 2995 case Iop_Add32x4: 2996 case Iop_Max32Ux4: 2997 case Iop_Max32Sx4: 2998 case Iop_Min32Ux4: 2999 case Iop_Min32Sx4: 3000 case Iop_Mul32x4: 3001 case Iop_QDMulHi32Sx4: 3002 case Iop_QRDMulHi32Sx4: 3003 return binary32Ix4(mce, vatom1, vatom2); 3004 3005 case Iop_Sub64x2: 3006 case Iop_Add64x2: 3007 case Iop_CmpEQ64x2: 3008 case Iop_CmpGT64Sx2: 3009 case Iop_QSal64x2: 3010 case Iop_QShl64x2: 3011 case Iop_QAdd64Ux2: 3012 case Iop_QAdd64Sx2: 3013 case Iop_QSub64Ux2: 3014 case Iop_QSub64Sx2: 3015 return binary64Ix2(mce, vatom1, vatom2); 3016 3017 case Iop_QNarrowBin32Sto16Sx8: 3018 case Iop_QNarrowBin32Uto16Ux8: 3019 case Iop_QNarrowBin32Sto16Ux8: 3020 case Iop_QNarrowBin16Sto8Sx16: 3021 case Iop_QNarrowBin16Uto8Ux16: 3022 case Iop_QNarrowBin16Sto8Ux16: 3023 return vectorNarrowBinV128(mce, op, vatom1, vatom2); 3024 3025 case Iop_Sub64Fx2: 3026 case Iop_Mul64Fx2: 3027 case Iop_Min64Fx2: 3028 case Iop_Max64Fx2: 3029 case Iop_Div64Fx2: 3030 case Iop_CmpLT64Fx2: 3031 case Iop_CmpLE64Fx2: 3032 case Iop_CmpEQ64Fx2: 3033 case Iop_CmpUN64Fx2: 3034 case Iop_Add64Fx2: 3035 return binary64Fx2(mce, vatom1, vatom2); 3036 3037 case Iop_Sub64F0x2: 3038 case Iop_Mul64F0x2: 3039 case Iop_Min64F0x2: 3040 case Iop_Max64F0x2: 3041 case Iop_Div64F0x2: 3042 case Iop_CmpLT64F0x2: 3043 case Iop_CmpLE64F0x2: 3044 case Iop_CmpEQ64F0x2: 3045 case Iop_CmpUN64F0x2: 3046 case Iop_Add64F0x2: 3047 return binary64F0x2(mce, vatom1, vatom2); 3048 3049 case Iop_Sub32Fx4: 3050 case Iop_Mul32Fx4: 3051 case Iop_Min32Fx4: 3052 case Iop_Max32Fx4: 3053 case Iop_Div32Fx4: 3054 case Iop_CmpLT32Fx4: 3055 case Iop_CmpLE32Fx4: 3056 case Iop_CmpEQ32Fx4: 3057 case Iop_CmpUN32Fx4: 3058 case Iop_CmpGT32Fx4: 3059 case Iop_CmpGE32Fx4: 3060 case Iop_Add32Fx4: 3061 case Iop_Recps32Fx4: 3062 case Iop_Rsqrts32Fx4: 3063 return binary32Fx4(mce, vatom1, vatom2); 3064 3065 case Iop_Sub32Fx2: 3066 case Iop_Mul32Fx2: 3067 case Iop_Min32Fx2: 3068 case Iop_Max32Fx2: 3069 case Iop_CmpEQ32Fx2: 3070 case Iop_CmpGT32Fx2: 3071 case Iop_CmpGE32Fx2: 3072 case Iop_Add32Fx2: 3073 case Iop_Recps32Fx2: 3074 case Iop_Rsqrts32Fx2: 3075 return binary32Fx2(mce, vatom1, vatom2); 3076 3077 case Iop_Sub32F0x4: 3078 case Iop_Mul32F0x4: 3079 case Iop_Min32F0x4: 3080 case Iop_Max32F0x4: 3081 case Iop_Div32F0x4: 3082 case Iop_CmpLT32F0x4: 3083 case Iop_CmpLE32F0x4: 3084 case Iop_CmpEQ32F0x4: 3085 case Iop_CmpUN32F0x4: 3086 case Iop_Add32F0x4: 3087 return binary32F0x4(mce, vatom1, vatom2); 3088 3089 case Iop_QShlN8Sx16: 3090 case Iop_QShlN8x16: 3091 case Iop_QSalN8x16: 3092 complainIfUndefined(mce, atom2, NULL); 3093 return mkPCast8x16(mce, vatom1); 3094 3095 case Iop_QShlN16Sx8: 3096 case Iop_QShlN16x8: 3097 case Iop_QSalN16x8: 3098 complainIfUndefined(mce, atom2, NULL); 3099 return mkPCast16x8(mce, vatom1); 3100 3101 case Iop_QShlN32Sx4: 3102 case Iop_QShlN32x4: 3103 case Iop_QSalN32x4: 3104 complainIfUndefined(mce, atom2, NULL); 3105 return mkPCast32x4(mce, vatom1); 3106 3107 case Iop_QShlN64Sx2: 3108 case Iop_QShlN64x2: 3109 case Iop_QSalN64x2: 3110 complainIfUndefined(mce, atom2, NULL); 3111 return mkPCast32x4(mce, vatom1); 3112 3113 case Iop_Mull32Sx2: 3114 case Iop_Mull32Ux2: 3115 case Iop_QDMulLong32Sx2: 3116 return vectorWidenI64(mce, Iop_Widen32Sto64x2, 3117 mkUifU64(mce, vatom1, vatom2)); 3118 3119 case Iop_Mull16Sx4: 3120 case Iop_Mull16Ux4: 3121 case Iop_QDMulLong16Sx4: 3122 return vectorWidenI64(mce, Iop_Widen16Sto32x4, 3123 mkUifU64(mce, vatom1, vatom2)); 3124 3125 case Iop_Mull8Sx8: 3126 case Iop_Mull8Ux8: 3127 case Iop_PolynomialMull8x8: 3128 return vectorWidenI64(mce, Iop_Widen8Sto16x8, 3129 mkUifU64(mce, vatom1, vatom2)); 3130 3131 case Iop_PwAdd32x4: 3132 return mkPCast32x4(mce, 3133 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1), 3134 mkPCast32x4(mce, vatom2)))); 3135 3136 case Iop_PwAdd16x8: 3137 return mkPCast16x8(mce, 3138 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1), 3139 mkPCast16x8(mce, vatom2)))); 3140 3141 case Iop_PwAdd8x16: 3142 return mkPCast8x16(mce, 3143 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1), 3144 mkPCast8x16(mce, vatom2)))); 3145 3146 /* V128-bit data-steering */ 3147 case Iop_SetV128lo32: 3148 case Iop_SetV128lo64: 3149 case Iop_64HLtoV128: 3150 case Iop_InterleaveLO64x2: 3151 case Iop_InterleaveLO32x4: 3152 case Iop_InterleaveLO16x8: 3153 case Iop_InterleaveLO8x16: 3154 case Iop_InterleaveHI64x2: 3155 case Iop_InterleaveHI32x4: 3156 case Iop_InterleaveHI16x8: 3157 case Iop_InterleaveHI8x16: 3158 case Iop_CatOddLanes8x16: 3159 case Iop_CatOddLanes16x8: 3160 case Iop_CatOddLanes32x4: 3161 case Iop_CatEvenLanes8x16: 3162 case Iop_CatEvenLanes16x8: 3163 case Iop_CatEvenLanes32x4: 3164 case Iop_InterleaveOddLanes8x16: 3165 case Iop_InterleaveOddLanes16x8: 3166 case Iop_InterleaveOddLanes32x4: 3167 case Iop_InterleaveEvenLanes8x16: 3168 case Iop_InterleaveEvenLanes16x8: 3169 case Iop_InterleaveEvenLanes32x4: 3170 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2)); 3171 3172 case Iop_GetElem8x16: 3173 complainIfUndefined(mce, atom2, NULL); 3174 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 3175 case Iop_GetElem16x8: 3176 complainIfUndefined(mce, atom2, NULL); 3177 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 3178 case Iop_GetElem32x4: 3179 complainIfUndefined(mce, atom2, NULL); 3180 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 3181 case Iop_GetElem64x2: 3182 complainIfUndefined(mce, atom2, NULL); 3183 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 3184 3185 /* Perm8x16: rearrange values in left arg using steering values 3186 from right arg. So rearrange the vbits in the same way but 3187 pessimise wrt steering values. Perm32x4 ditto. */ 3188 case Iop_Perm8x16: 3189 return mkUifUV128( 3190 mce, 3191 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3192 mkPCast8x16(mce, vatom2) 3193 ); 3194 case Iop_Perm32x4: 3195 return mkUifUV128( 3196 mce, 3197 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3198 mkPCast32x4(mce, vatom2) 3199 ); 3200 3201 /* These two take the lower half of each 16-bit lane, sign/zero 3202 extend it to 32, and multiply together, producing a 32x4 3203 result (and implicitly ignoring half the operand bits). So 3204 treat it as a bunch of independent 16x8 operations, but then 3205 do 32-bit shifts left-right to copy the lower half results 3206 (which are all 0s or all 1s due to PCasting in binary16Ix8) 3207 into the upper half of each result lane. */ 3208 case Iop_MullEven16Ux8: 3209 case Iop_MullEven16Sx8: { 3210 IRAtom* at; 3211 at = binary16Ix8(mce,vatom1,vatom2); 3212 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16))); 3213 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16))); 3214 return at; 3215 } 3216 3217 /* Same deal as Iop_MullEven16{S,U}x8 */ 3218 case Iop_MullEven8Ux16: 3219 case Iop_MullEven8Sx16: { 3220 IRAtom* at; 3221 at = binary8Ix16(mce,vatom1,vatom2); 3222 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8))); 3223 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8))); 3224 return at; 3225 } 3226 3227 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x 3228 32x4 -> 16x8 laneage, discarding the upper half of each lane. 3229 Simply apply same op to the V bits, since this really no more 3230 than a data steering operation. */ 3231 case Iop_NarrowBin32to16x8: 3232 case Iop_NarrowBin16to8x16: 3233 return assignNew('V', mce, Ity_V128, 3234 binop(op, vatom1, vatom2)); 3235 3236 case Iop_ShrV128: 3237 case Iop_ShlV128: 3238 /* Same scheme as with all other shifts. Note: 10 Nov 05: 3239 this is wrong now, scalar shifts are done properly lazily. 3240 Vector shifts should be fixed too. */ 3241 complainIfUndefined(mce, atom2, NULL); 3242 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3243 3244 /* I128-bit data-steering */ 3245 case Iop_64HLto128: 3246 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2)); 3247 3248 /* V256-bit SIMD */ 3249 3250 case Iop_Add64Fx4: 3251 case Iop_Sub64Fx4: 3252 case Iop_Mul64Fx4: 3253 case Iop_Div64Fx4: 3254 case Iop_Max64Fx4: 3255 case Iop_Min64Fx4: 3256 return binary64Fx4(mce, vatom1, vatom2); 3257 3258 case Iop_Add32Fx8: 3259 case Iop_Sub32Fx8: 3260 case Iop_Mul32Fx8: 3261 case Iop_Div32Fx8: 3262 case Iop_Max32Fx8: 3263 case Iop_Min32Fx8: 3264 return binary32Fx8(mce, vatom1, vatom2); 3265 3266 /* V256-bit data-steering */ 3267 case Iop_V128HLtoV256: 3268 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2)); 3269 3270 /* Scalar floating point */ 3271 3272 case Iop_F32toI64S: 3273 case Iop_F32toI64U: 3274 /* I32(rm) x F32 -> I64 */ 3275 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3276 3277 case Iop_I64StoF32: 3278 /* I32(rm) x I64 -> F32 */ 3279 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3280 3281 case Iop_RoundF64toInt: 3282 case Iop_RoundF64toF32: 3283 case Iop_F64toI64S: 3284 case Iop_F64toI64U: 3285 case Iop_I64StoF64: 3286 case Iop_I64UtoF64: 3287 case Iop_SinF64: 3288 case Iop_CosF64: 3289 case Iop_TanF64: 3290 case Iop_2xm1F64: 3291 case Iop_SqrtF64: 3292 /* I32(rm) x I64/F64 -> I64/F64 */ 3293 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3294 3295 case Iop_ShlD64: 3296 case Iop_ShrD64: 3297 case Iop_RoundD64toInt: 3298 /* I32(DFP rm) x D64 -> D64 */ 3299 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3300 3301 case Iop_ShlD128: 3302 case Iop_ShrD128: 3303 case Iop_RoundD128toInt: 3304 /* I32(DFP rm) x D128 -> D128 */ 3305 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3306 3307 case Iop_D64toI64S: 3308 case Iop_I64StoD64: 3309 /* I64(DFP rm) x I64 -> D64 */ 3310 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3311 3312 case Iop_RoundF32toInt: 3313 case Iop_SqrtF32: 3314 /* I32(rm) x I32/F32 -> I32/F32 */ 3315 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3316 3317 case Iop_SqrtF128: 3318 /* I32(rm) x F128 -> F128 */ 3319 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3320 3321 case Iop_I32StoF32: 3322 case Iop_I32UtoF32: 3323 case Iop_F32toI32S: 3324 case Iop_F32toI32U: 3325 /* First arg is I32 (rounding mode), second is F32/I32 (data). */ 3326 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3327 3328 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */ 3329 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */ 3330 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */ 3331 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3332 3333 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */ 3334 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */ 3335 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */ 3336 case Iop_D128toD64: /* IRRoundingModeDFP(I64) x D128 -> D64 */ 3337 case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64 */ 3338 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3339 3340 case Iop_F64HLtoF128: 3341 case Iop_D64HLtoD128: 3342 return assignNew('V', mce, Ity_I128, 3343 binop(Iop_64HLto128, vatom1, vatom2)); 3344 3345 case Iop_F64toI32U: 3346 case Iop_F64toI32S: 3347 case Iop_F64toF32: 3348 case Iop_I64UtoF32: 3349 /* First arg is I32 (rounding mode), second is F64 (data). */ 3350 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3351 3352 case Iop_D64toD32: 3353 /* First arg is I64 (DFProunding mode), second is D64 (data). */ 3354 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3355 3356 case Iop_F64toI16S: 3357 /* First arg is I32 (rounding mode), second is F64 (data). */ 3358 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 3359 3360 case Iop_InsertExpD64: 3361 /* I64 x I64 -> D64 */ 3362 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3363 3364 case Iop_InsertExpD128: 3365 /* I64 x I128 -> D128 */ 3366 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3367 3368 case Iop_CmpF32: 3369 case Iop_CmpF64: 3370 case Iop_CmpF128: 3371 case Iop_CmpD64: 3372 case Iop_CmpD128: 3373 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3374 3375 /* non-FP after here */ 3376 3377 case Iop_DivModU64to32: 3378 case Iop_DivModS64to32: 3379 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3380 3381 case Iop_DivModU128to64: 3382 case Iop_DivModS128to64: 3383 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3384 3385 case Iop_8HLto16: 3386 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2)); 3387 case Iop_16HLto32: 3388 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2)); 3389 case Iop_32HLto64: 3390 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3391 3392 case Iop_DivModS64to64: 3393 case Iop_MullS64: 3394 case Iop_MullU64: { 3395 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3396 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64); 3397 return assignNew('V', mce, Ity_I128, 3398 binop(Iop_64HLto128, vHi64, vLo64)); 3399 } 3400 3401 case Iop_MullS32: 3402 case Iop_MullU32: { 3403 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3404 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 3405 return assignNew('V', mce, Ity_I64, 3406 binop(Iop_32HLto64, vHi32, vLo32)); 3407 } 3408 3409 case Iop_MullS16: 3410 case Iop_MullU16: { 3411 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3412 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 3413 return assignNew('V', mce, Ity_I32, 3414 binop(Iop_16HLto32, vHi16, vLo16)); 3415 } 3416 3417 case Iop_MullS8: 3418 case Iop_MullU8: { 3419 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3420 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 3421 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 3422 } 3423 3424 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */ 3425 case Iop_DivS32: 3426 case Iop_DivU32: 3427 case Iop_DivU32E: 3428 case Iop_DivS32E: 3429 case Iop_QAdd32S: /* could probably do better */ 3430 case Iop_QSub32S: /* could probably do better */ 3431 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3432 3433 case Iop_DivS64: 3434 case Iop_DivU64: 3435 case Iop_DivS64E: 3436 case Iop_DivU64E: 3437 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3438 3439 case Iop_Add32: 3440 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3441 return expensiveAddSub(mce,True,Ity_I32, 3442 vatom1,vatom2, atom1,atom2); 3443 else 3444 goto cheap_AddSub32; 3445 case Iop_Sub32: 3446 if (mce->bogusLiterals) 3447 return expensiveAddSub(mce,False,Ity_I32, 3448 vatom1,vatom2, atom1,atom2); 3449 else 3450 goto cheap_AddSub32; 3451 3452 cheap_AddSub32: 3453 case Iop_Mul32: 3454 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3455 3456 case Iop_CmpORD32S: 3457 case Iop_CmpORD32U: 3458 case Iop_CmpORD64S: 3459 case Iop_CmpORD64U: 3460 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2); 3461 3462 case Iop_Add64: 3463 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3464 return expensiveAddSub(mce,True,Ity_I64, 3465 vatom1,vatom2, atom1,atom2); 3466 else 3467 goto cheap_AddSub64; 3468 case Iop_Sub64: 3469 if (mce->bogusLiterals) 3470 return expensiveAddSub(mce,False,Ity_I64, 3471 vatom1,vatom2, atom1,atom2); 3472 else 3473 goto cheap_AddSub64; 3474 3475 cheap_AddSub64: 3476 case Iop_Mul64: 3477 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3478 3479 case Iop_Mul16: 3480 case Iop_Add16: 3481 case Iop_Sub16: 3482 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3483 3484 case Iop_Mul8: 3485 case Iop_Sub8: 3486 case Iop_Add8: 3487 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3488 3489 case Iop_CmpEQ64: 3490 case Iop_CmpNE64: 3491 if (mce->bogusLiterals) 3492 goto expensive_cmp64; 3493 else 3494 goto cheap_cmp64; 3495 3496 expensive_cmp64: 3497 case Iop_ExpCmpNE64: 3498 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 ); 3499 3500 cheap_cmp64: 3501 case Iop_CmpLE64S: case Iop_CmpLE64U: 3502 case Iop_CmpLT64U: case Iop_CmpLT64S: 3503 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2)); 3504 3505 case Iop_CmpEQ32: 3506 case Iop_CmpNE32: 3507 if (mce->bogusLiterals) 3508 goto expensive_cmp32; 3509 else 3510 goto cheap_cmp32; 3511 3512 expensive_cmp32: 3513 case Iop_ExpCmpNE32: 3514 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 ); 3515 3516 cheap_cmp32: 3517 case Iop_CmpLE32S: case Iop_CmpLE32U: 3518 case Iop_CmpLT32U: case Iop_CmpLT32S: 3519 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 3520 3521 case Iop_CmpEQ16: case Iop_CmpNE16: 3522 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 3523 3524 case Iop_ExpCmpNE16: 3525 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 ); 3526 3527 case Iop_CmpEQ8: case Iop_CmpNE8: 3528 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 3529 3530 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 3531 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 3532 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 3533 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 3534 /* Just say these all produce a defined result, regardless 3535 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */ 3536 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1)); 3537 3538 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: 3539 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 ); 3540 3541 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 3542 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 ); 3543 3544 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 3545 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 ); 3546 3547 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8: 3548 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 ); 3549 3550 case Iop_AndV256: 3551 uifu = mkUifUV256; difd = mkDifDV256; 3552 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or; 3553 case Iop_AndV128: 3554 uifu = mkUifUV128; difd = mkDifDV128; 3555 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 3556 case Iop_And64: 3557 uifu = mkUifU64; difd = mkDifD64; 3558 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 3559 case Iop_And32: 3560 uifu = mkUifU32; difd = mkDifD32; 3561 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 3562 case Iop_And16: 3563 uifu = mkUifU16; difd = mkDifD16; 3564 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 3565 case Iop_And8: 3566 uifu = mkUifU8; difd = mkDifD8; 3567 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 3568 3569 case Iop_OrV256: 3570 uifu = mkUifUV256; difd = mkDifDV256; 3571 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or; 3572 case Iop_OrV128: 3573 uifu = mkUifUV128; difd = mkDifDV128; 3574 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 3575 case Iop_Or64: 3576 uifu = mkUifU64; difd = mkDifD64; 3577 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 3578 case Iop_Or32: 3579 uifu = mkUifU32; difd = mkDifD32; 3580 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 3581 case Iop_Or16: 3582 uifu = mkUifU16; difd = mkDifD16; 3583 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 3584 case Iop_Or8: 3585 uifu = mkUifU8; difd = mkDifD8; 3586 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 3587 3588 do_And_Or: 3589 return 3590 assignNew( 3591 'V', mce, 3592 and_or_ty, 3593 difd(mce, uifu(mce, vatom1, vatom2), 3594 difd(mce, improve(mce, atom1, vatom1), 3595 improve(mce, atom2, vatom2) ) ) ); 3596 3597 case Iop_Xor8: 3598 return mkUifU8(mce, vatom1, vatom2); 3599 case Iop_Xor16: 3600 return mkUifU16(mce, vatom1, vatom2); 3601 case Iop_Xor32: 3602 return mkUifU32(mce, vatom1, vatom2); 3603 case Iop_Xor64: 3604 return mkUifU64(mce, vatom1, vatom2); 3605 case Iop_XorV128: 3606 return mkUifUV128(mce, vatom1, vatom2); 3607 case Iop_XorV256: 3608 return mkUifUV256(mce, vatom1, vatom2); 3609 3610 default: 3611 ppIROp(op); 3612 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 3613 } 3614} 3615 3616 3617static 3618IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 3619{ 3620 IRAtom* vatom = expr2vbits( mce, atom ); 3621 tl_assert(isOriginalAtom(mce,atom)); 3622 switch (op) { 3623 3624 case Iop_Sqrt64Fx2: 3625 return unary64Fx2(mce, vatom); 3626 3627 case Iop_Sqrt64F0x2: 3628 return unary64F0x2(mce, vatom); 3629 3630 case Iop_Sqrt32Fx8: 3631 case Iop_RSqrt32Fx8: 3632 case Iop_Recip32Fx8: 3633 return unary32Fx8(mce, vatom); 3634 3635 case Iop_Sqrt64Fx4: 3636 return unary64Fx4(mce, vatom); 3637 3638 case Iop_Sqrt32Fx4: 3639 case Iop_RSqrt32Fx4: 3640 case Iop_Recip32Fx4: 3641 case Iop_I32UtoFx4: 3642 case Iop_I32StoFx4: 3643 case Iop_QFtoI32Ux4_RZ: 3644 case Iop_QFtoI32Sx4_RZ: 3645 case Iop_RoundF32x4_RM: 3646 case Iop_RoundF32x4_RP: 3647 case Iop_RoundF32x4_RN: 3648 case Iop_RoundF32x4_RZ: 3649 case Iop_Recip32x4: 3650 case Iop_Abs32Fx4: 3651 case Iop_Neg32Fx4: 3652 case Iop_Rsqrte32Fx4: 3653 return unary32Fx4(mce, vatom); 3654 3655 case Iop_I32UtoFx2: 3656 case Iop_I32StoFx2: 3657 case Iop_Recip32Fx2: 3658 case Iop_Recip32x2: 3659 case Iop_Abs32Fx2: 3660 case Iop_Neg32Fx2: 3661 case Iop_Rsqrte32Fx2: 3662 return unary32Fx2(mce, vatom); 3663 3664 case Iop_Sqrt32F0x4: 3665 case Iop_RSqrt32F0x4: 3666 case Iop_Recip32F0x4: 3667 return unary32F0x4(mce, vatom); 3668 3669 case Iop_32UtoV128: 3670 case Iop_64UtoV128: 3671 case Iop_Dup8x16: 3672 case Iop_Dup16x8: 3673 case Iop_Dup32x4: 3674 case Iop_Reverse16_8x16: 3675 case Iop_Reverse32_8x16: 3676 case Iop_Reverse32_16x8: 3677 case Iop_Reverse64_8x16: 3678 case Iop_Reverse64_16x8: 3679 case Iop_Reverse64_32x4: 3680 case Iop_V256toV128_1: case Iop_V256toV128_0: 3681 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 3682 3683 case Iop_F128HItoF64: /* F128 -> high half of F128 */ 3684 case Iop_D128HItoD64: /* D128 -> high half of D128 */ 3685 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom)); 3686 case Iop_F128LOtoF64: /* F128 -> low half of F128 */ 3687 case Iop_D128LOtoD64: /* D128 -> low half of D128 */ 3688 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom)); 3689 3690 case Iop_NegF128: 3691 case Iop_AbsF128: 3692 return mkPCastTo(mce, Ity_I128, vatom); 3693 3694 case Iop_I32StoF128: /* signed I32 -> F128 */ 3695 case Iop_I64StoF128: /* signed I64 -> F128 */ 3696 case Iop_I32UtoF128: /* unsigned I32 -> F128 */ 3697 case Iop_I64UtoF128: /* unsigned I64 -> F128 */ 3698 case Iop_F32toF128: /* F32 -> F128 */ 3699 case Iop_F64toF128: /* F64 -> F128 */ 3700 case Iop_I64StoD128: /* signed I64 -> D128 */ 3701 return mkPCastTo(mce, Ity_I128, vatom); 3702 3703 case Iop_F32toF64: 3704 case Iop_I32StoF64: 3705 case Iop_I32UtoF64: 3706 case Iop_NegF64: 3707 case Iop_AbsF64: 3708 case Iop_Est5FRSqrt: 3709 case Iop_RoundF64toF64_NEAREST: 3710 case Iop_RoundF64toF64_NegINF: 3711 case Iop_RoundF64toF64_PosINF: 3712 case Iop_RoundF64toF64_ZERO: 3713 case Iop_Clz64: 3714 case Iop_D32toD64: 3715 case Iop_ExtractExpD64: /* D64 -> I64 */ 3716 case Iop_ExtractExpD128: /* D128 -> I64 */ 3717 case Iop_DPBtoBCD: 3718 case Iop_BCDtoDPB: 3719 return mkPCastTo(mce, Ity_I64, vatom); 3720 3721 case Iop_D64toD128: 3722 return mkPCastTo(mce, Ity_I128, vatom); 3723 3724 case Iop_Clz32: 3725 case Iop_TruncF64asF32: 3726 case Iop_NegF32: 3727 case Iop_AbsF32: 3728 return mkPCastTo(mce, Ity_I32, vatom); 3729 3730 case Iop_Ctz32: 3731 case Iop_Ctz64: 3732 return expensiveCountTrailingZeroes(mce, op, atom, vatom); 3733 3734 case Iop_1Uto64: 3735 case Iop_1Sto64: 3736 case Iop_8Uto64: 3737 case Iop_8Sto64: 3738 case Iop_16Uto64: 3739 case Iop_16Sto64: 3740 case Iop_32Sto64: 3741 case Iop_32Uto64: 3742 case Iop_V128to64: 3743 case Iop_V128HIto64: 3744 case Iop_128HIto64: 3745 case Iop_128to64: 3746 case Iop_Dup8x8: 3747 case Iop_Dup16x4: 3748 case Iop_Dup32x2: 3749 case Iop_Reverse16_8x8: 3750 case Iop_Reverse32_8x8: 3751 case Iop_Reverse32_16x4: 3752 case Iop_Reverse64_8x8: 3753 case Iop_Reverse64_16x4: 3754 case Iop_Reverse64_32x2: 3755 case Iop_V256to64_0: case Iop_V256to64_1: 3756 case Iop_V256to64_2: case Iop_V256to64_3: 3757 return assignNew('V', mce, Ity_I64, unop(op, vatom)); 3758 3759 case Iop_64to32: 3760 case Iop_64HIto32: 3761 case Iop_1Uto32: 3762 case Iop_1Sto32: 3763 case Iop_8Uto32: 3764 case Iop_16Uto32: 3765 case Iop_16Sto32: 3766 case Iop_8Sto32: 3767 case Iop_V128to32: 3768 return assignNew('V', mce, Ity_I32, unop(op, vatom)); 3769 3770 case Iop_8Sto16: 3771 case Iop_8Uto16: 3772 case Iop_32to16: 3773 case Iop_32HIto16: 3774 case Iop_64to16: 3775 case Iop_GetMSBs8x16: 3776 return assignNew('V', mce, Ity_I16, unop(op, vatom)); 3777 3778 case Iop_1Uto8: 3779 case Iop_1Sto8: 3780 case Iop_16to8: 3781 case Iop_16HIto8: 3782 case Iop_32to8: 3783 case Iop_64to8: 3784 case Iop_GetMSBs8x8: 3785 return assignNew('V', mce, Ity_I8, unop(op, vatom)); 3786 3787 case Iop_32to1: 3788 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom)); 3789 3790 case Iop_64to1: 3791 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom)); 3792 3793 case Iop_ReinterpF64asI64: 3794 case Iop_ReinterpI64asF64: 3795 case Iop_ReinterpI32asF32: 3796 case Iop_ReinterpF32asI32: 3797 case Iop_ReinterpI64asD64: 3798 case Iop_ReinterpD64asI64: 3799 case Iop_NotV256: 3800 case Iop_NotV128: 3801 case Iop_Not64: 3802 case Iop_Not32: 3803 case Iop_Not16: 3804 case Iop_Not8: 3805 case Iop_Not1: 3806 return vatom; 3807 3808 case Iop_CmpNEZ8x8: 3809 case Iop_Cnt8x8: 3810 case Iop_Clz8Sx8: 3811 case Iop_Cls8Sx8: 3812 case Iop_Abs8x8: 3813 return mkPCast8x8(mce, vatom); 3814 3815 case Iop_CmpNEZ8x16: 3816 case Iop_Cnt8x16: 3817 case Iop_Clz8Sx16: 3818 case Iop_Cls8Sx16: 3819 case Iop_Abs8x16: 3820 return mkPCast8x16(mce, vatom); 3821 3822 case Iop_CmpNEZ16x4: 3823 case Iop_Clz16Sx4: 3824 case Iop_Cls16Sx4: 3825 case Iop_Abs16x4: 3826 return mkPCast16x4(mce, vatom); 3827 3828 case Iop_CmpNEZ16x8: 3829 case Iop_Clz16Sx8: 3830 case Iop_Cls16Sx8: 3831 case Iop_Abs16x8: 3832 return mkPCast16x8(mce, vatom); 3833 3834 case Iop_CmpNEZ32x2: 3835 case Iop_Clz32Sx2: 3836 case Iop_Cls32Sx2: 3837 case Iop_FtoI32Ux2_RZ: 3838 case Iop_FtoI32Sx2_RZ: 3839 case Iop_Abs32x2: 3840 return mkPCast32x2(mce, vatom); 3841 3842 case Iop_CmpNEZ32x4: 3843 case Iop_Clz32Sx4: 3844 case Iop_Cls32Sx4: 3845 case Iop_FtoI32Ux4_RZ: 3846 case Iop_FtoI32Sx4_RZ: 3847 case Iop_Abs32x4: 3848 return mkPCast32x4(mce, vatom); 3849 3850 case Iop_CmpwNEZ32: 3851 return mkPCastTo(mce, Ity_I32, vatom); 3852 3853 case Iop_CmpwNEZ64: 3854 return mkPCastTo(mce, Ity_I64, vatom); 3855 3856 case Iop_CmpNEZ64x2: 3857 return mkPCast64x2(mce, vatom); 3858 3859 case Iop_NarrowUn16to8x8: 3860 case Iop_NarrowUn32to16x4: 3861 case Iop_NarrowUn64to32x2: 3862 case Iop_QNarrowUn16Sto8Sx8: 3863 case Iop_QNarrowUn16Sto8Ux8: 3864 case Iop_QNarrowUn16Uto8Ux8: 3865 case Iop_QNarrowUn32Sto16Sx4: 3866 case Iop_QNarrowUn32Sto16Ux4: 3867 case Iop_QNarrowUn32Uto16Ux4: 3868 case Iop_QNarrowUn64Sto32Sx2: 3869 case Iop_QNarrowUn64Sto32Ux2: 3870 case Iop_QNarrowUn64Uto32Ux2: 3871 return vectorNarrowUnV128(mce, op, vatom); 3872 3873 case Iop_Widen8Sto16x8: 3874 case Iop_Widen8Uto16x8: 3875 case Iop_Widen16Sto32x4: 3876 case Iop_Widen16Uto32x4: 3877 case Iop_Widen32Sto64x2: 3878 case Iop_Widen32Uto64x2: 3879 return vectorWidenI64(mce, op, vatom); 3880 3881 case Iop_PwAddL32Ux2: 3882 case Iop_PwAddL32Sx2: 3883 return mkPCastTo(mce, Ity_I64, 3884 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom)))); 3885 3886 case Iop_PwAddL16Ux4: 3887 case Iop_PwAddL16Sx4: 3888 return mkPCast32x2(mce, 3889 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom)))); 3890 3891 case Iop_PwAddL8Ux8: 3892 case Iop_PwAddL8Sx8: 3893 return mkPCast16x4(mce, 3894 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom)))); 3895 3896 case Iop_PwAddL32Ux4: 3897 case Iop_PwAddL32Sx4: 3898 return mkPCast64x2(mce, 3899 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom)))); 3900 3901 case Iop_PwAddL16Ux8: 3902 case Iop_PwAddL16Sx8: 3903 return mkPCast32x4(mce, 3904 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom)))); 3905 3906 case Iop_PwAddL8Ux16: 3907 case Iop_PwAddL8Sx16: 3908 return mkPCast16x8(mce, 3909 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom)))); 3910 3911 case Iop_I64UtoF32: 3912 default: 3913 ppIROp(op); 3914 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 3915 } 3916} 3917 3918 3919/* Worker function; do not call directly. */ 3920static 3921IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 3922 IREndness end, IRType ty, 3923 IRAtom* addr, UInt bias ) 3924{ 3925 void* helper; 3926 const HChar* hname; 3927 IRDirty* di; 3928 IRTemp datavbits; 3929 IRAtom* addrAct; 3930 3931 tl_assert(isOriginalAtom(mce,addr)); 3932 tl_assert(end == Iend_LE || end == Iend_BE); 3933 3934 /* First, emit a definedness test for the address. This also sets 3935 the address (shadow) to 'defined' following the test. */ 3936 complainIfUndefined( mce, addr, NULL ); 3937 3938 /* Now cook up a call to the relevant helper function, to read the 3939 data V bits from shadow memory. */ 3940 ty = shadowTypeV(ty); 3941 3942 if (end == Iend_LE) { 3943 switch (ty) { 3944 case Ity_I64: helper = &MC_(helperc_LOADV64le); 3945 hname = "MC_(helperc_LOADV64le)"; 3946 break; 3947 case Ity_I32: helper = &MC_(helperc_LOADV32le); 3948 hname = "MC_(helperc_LOADV32le)"; 3949 break; 3950 case Ity_I16: helper = &MC_(helperc_LOADV16le); 3951 hname = "MC_(helperc_LOADV16le)"; 3952 break; 3953 case Ity_I8: helper = &MC_(helperc_LOADV8); 3954 hname = "MC_(helperc_LOADV8)"; 3955 break; 3956 default: ppIRType(ty); 3957 VG_(tool_panic)("memcheck:do_shadow_Load(LE)"); 3958 } 3959 } else { 3960 switch (ty) { 3961 case Ity_I64: helper = &MC_(helperc_LOADV64be); 3962 hname = "MC_(helperc_LOADV64be)"; 3963 break; 3964 case Ity_I32: helper = &MC_(helperc_LOADV32be); 3965 hname = "MC_(helperc_LOADV32be)"; 3966 break; 3967 case Ity_I16: helper = &MC_(helperc_LOADV16be); 3968 hname = "MC_(helperc_LOADV16be)"; 3969 break; 3970 case Ity_I8: helper = &MC_(helperc_LOADV8); 3971 hname = "MC_(helperc_LOADV8)"; 3972 break; 3973 default: ppIRType(ty); 3974 VG_(tool_panic)("memcheck:do_shadow_Load(BE)"); 3975 } 3976 } 3977 3978 /* Generate the actual address into addrAct. */ 3979 if (bias == 0) { 3980 addrAct = addr; 3981 } else { 3982 IROp mkAdd; 3983 IRAtom* eBias; 3984 IRType tyAddr = mce->hWordTy; 3985 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 3986 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 3987 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 3988 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) ); 3989 } 3990 3991 /* We need to have a place to park the V bits we're just about to 3992 read. */ 3993 datavbits = newTemp(mce, ty, VSh); 3994 di = unsafeIRDirty_1_N( datavbits, 3995 1/*regparms*/, 3996 hname, VG_(fnptr_to_fnentry)( helper ), 3997 mkIRExprVec_1( addrAct )); 3998 setHelperAnns( mce, di ); 3999 stmt( 'V', mce, IRStmt_Dirty(di) ); 4000 4001 return mkexpr(datavbits); 4002} 4003 4004 4005static 4006IRAtom* expr2vbits_Load ( MCEnv* mce, 4007 IREndness end, IRType ty, 4008 IRAtom* addr, UInt bias ) 4009{ 4010 tl_assert(end == Iend_LE || end == Iend_BE); 4011 switch (shadowTypeV(ty)) { 4012 case Ity_I8: 4013 case Ity_I16: 4014 case Ity_I32: 4015 case Ity_I64: 4016 return expr2vbits_Load_WRK(mce, end, ty, addr, bias); 4017 case Ity_V128: { 4018 IRAtom *v64hi, *v64lo; 4019 if (end == Iend_LE) { 4020 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0); 4021 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 4022 } else { 4023 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0); 4024 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 4025 } 4026 return assignNew( 'V', mce, 4027 Ity_V128, 4028 binop(Iop_64HLtoV128, v64hi, v64lo)); 4029 } 4030 case Ity_V256: { 4031 /* V256-bit case -- phrased in terms of 64 bit units (Qs), 4032 with Q3 being the most significant lane. */ 4033 if (end == Iend_BE) goto unhandled; 4034 IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0); 4035 IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 4036 IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16); 4037 IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24); 4038 return assignNew( 'V', mce, 4039 Ity_V256, 4040 IRExpr_Qop(Iop_64x4toV256, 4041 v64Q3, v64Q2, v64Q1, v64Q0)); 4042 } 4043 unhandled: 4044 default: 4045 VG_(tool_panic)("expr2vbits_Load"); 4046 } 4047} 4048 4049 4050/* If there is no guard expression or the guard is always TRUE this function 4051 behaves like expr2vbits_Load. If the guard is not true at runtime, an 4052 all-bits-defined bit pattern will be returned. 4053 It is assumed that definedness of GUARD has already been checked at the call 4054 site. */ 4055static 4056IRAtom* expr2vbits_guarded_Load ( MCEnv* mce, 4057 IREndness end, IRType ty, 4058 IRAtom* addr, UInt bias, IRAtom *guard ) 4059{ 4060 if (guard) { 4061 IRAtom *cond, *iffalse, *iftrue; 4062 4063 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard)); 4064 iftrue = assignNew('V', mce, ty, 4065 expr2vbits_Load(mce, end, ty, addr, bias)); 4066 iffalse = assignNew('V', mce, ty, definedOfType(ty)); 4067 4068 return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue)); 4069 } 4070 4071 /* No guard expression or unconditional load */ 4072 return expr2vbits_Load(mce, end, ty, addr, bias); 4073} 4074 4075 4076static 4077IRAtom* expr2vbits_Mux0X ( MCEnv* mce, 4078 IRAtom* cond, IRAtom* expr0, IRAtom* exprX ) 4079{ 4080 IRAtom *vbitsC, *vbits0, *vbitsX; 4081 IRType ty; 4082 /* Given Mux0X(cond,expr0,exprX), generate 4083 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#) 4084 That is, steer the V bits like the originals, but trash the 4085 result if the steering value is undefined. This gives 4086 lazy propagation. */ 4087 tl_assert(isOriginalAtom(mce, cond)); 4088 tl_assert(isOriginalAtom(mce, expr0)); 4089 tl_assert(isOriginalAtom(mce, exprX)); 4090 4091 vbitsC = expr2vbits(mce, cond); 4092 vbits0 = expr2vbits(mce, expr0); 4093 vbitsX = expr2vbits(mce, exprX); 4094 ty = typeOfIRExpr(mce->sb->tyenv, vbits0); 4095 4096 return 4097 mkUifU(mce, ty, assignNew('V', mce, ty, 4098 IRExpr_Mux0X(cond, vbits0, vbitsX)), 4099 mkPCastTo(mce, ty, vbitsC) ); 4100} 4101 4102/* --------- This is the main expression-handling function. --------- */ 4103 4104static 4105IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 4106{ 4107 switch (e->tag) { 4108 4109 case Iex_Get: 4110 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 4111 4112 case Iex_GetI: 4113 return shadow_GETI( mce, e->Iex.GetI.descr, 4114 e->Iex.GetI.ix, e->Iex.GetI.bias ); 4115 4116 case Iex_RdTmp: 4117 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) ); 4118 4119 case Iex_Const: 4120 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e))); 4121 4122 case Iex_Qop: 4123 return expr2vbits_Qop( 4124 mce, 4125 e->Iex.Qop.details->op, 4126 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2, 4127 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4 4128 ); 4129 4130 case Iex_Triop: 4131 return expr2vbits_Triop( 4132 mce, 4133 e->Iex.Triop.details->op, 4134 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2, 4135 e->Iex.Triop.details->arg3 4136 ); 4137 4138 case Iex_Binop: 4139 return expr2vbits_Binop( 4140 mce, 4141 e->Iex.Binop.op, 4142 e->Iex.Binop.arg1, e->Iex.Binop.arg2 4143 ); 4144 4145 case Iex_Unop: 4146 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 4147 4148 case Iex_Load: 4149 return expr2vbits_Load( mce, e->Iex.Load.end, 4150 e->Iex.Load.ty, 4151 e->Iex.Load.addr, 0/*addr bias*/ ); 4152 4153 case Iex_CCall: 4154 return mkLazyN( mce, e->Iex.CCall.args, 4155 e->Iex.CCall.retty, 4156 e->Iex.CCall.cee ); 4157 4158 case Iex_Mux0X: 4159 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0, 4160 e->Iex.Mux0X.exprX); 4161 4162 default: 4163 VG_(printf)("\n"); 4164 ppIRExpr(e); 4165 VG_(printf)("\n"); 4166 VG_(tool_panic)("memcheck: expr2vbits"); 4167 } 4168} 4169 4170/*------------------------------------------------------------*/ 4171/*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 4172/*------------------------------------------------------------*/ 4173 4174/* Widen a value to the host word size. */ 4175 4176static 4177IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 4178{ 4179 IRType ty, tyH; 4180 4181 /* vatom is vbits-value and as such can only have a shadow type. */ 4182 tl_assert(isShadowAtom(mce,vatom)); 4183 4184 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 4185 tyH = mce->hWordTy; 4186 4187 if (tyH == Ity_I32) { 4188 switch (ty) { 4189 case Ity_I32: 4190 return vatom; 4191 case Ity_I16: 4192 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom)); 4193 case Ity_I8: 4194 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom)); 4195 default: 4196 goto unhandled; 4197 } 4198 } else 4199 if (tyH == Ity_I64) { 4200 switch (ty) { 4201 case Ity_I32: 4202 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom)); 4203 case Ity_I16: 4204 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4205 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom)))); 4206 case Ity_I8: 4207 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4208 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom)))); 4209 default: 4210 goto unhandled; 4211 } 4212 } else { 4213 goto unhandled; 4214 } 4215 unhandled: 4216 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 4217 VG_(tool_panic)("zwidenToHostWord"); 4218} 4219 4220 4221/* Generate a shadow store. addr is always the original address atom. 4222 You can pass in either originals or V-bits for the data atom, but 4223 obviously not both. guard :: Ity_I1 controls whether the store 4224 really happens; NULL means it unconditionally does. Note that 4225 guard itself is not checked for definedness; the caller of this 4226 function must do that if necessary. */ 4227 4228static 4229void do_shadow_Store ( MCEnv* mce, 4230 IREndness end, 4231 IRAtom* addr, UInt bias, 4232 IRAtom* data, IRAtom* vdata, 4233 IRAtom* guard ) 4234{ 4235 IROp mkAdd; 4236 IRType ty, tyAddr; 4237 void* helper = NULL; 4238 const HChar* hname = NULL; 4239 IRConst* c; 4240 4241 tyAddr = mce->hWordTy; 4242 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 4243 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 4244 tl_assert( end == Iend_LE || end == Iend_BE ); 4245 4246 if (data) { 4247 tl_assert(!vdata); 4248 tl_assert(isOriginalAtom(mce, data)); 4249 tl_assert(bias == 0); 4250 vdata = expr2vbits( mce, data ); 4251 } else { 4252 tl_assert(vdata); 4253 } 4254 4255 tl_assert(isOriginalAtom(mce,addr)); 4256 tl_assert(isShadowAtom(mce,vdata)); 4257 4258 if (guard) { 4259 tl_assert(isOriginalAtom(mce, guard)); 4260 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 4261 } 4262 4263 ty = typeOfIRExpr(mce->sb->tyenv, vdata); 4264 4265 // If we're not doing undefined value checking, pretend that this value 4266 // is "all valid". That lets Vex's optimiser remove some of the V bit 4267 // shadow computation ops that precede it. 4268 if (MC_(clo_mc_level) == 1) { 4269 switch (ty) { 4270 case Ity_V256: // V256 weirdness -- used four times 4271 c = IRConst_V256(V_BITS32_DEFINED); break; 4272 case Ity_V128: // V128 weirdness -- used twice 4273 c = IRConst_V128(V_BITS16_DEFINED); break; 4274 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; 4275 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; 4276 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; 4277 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break; 4278 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 4279 } 4280 vdata = IRExpr_Const( c ); 4281 } 4282 4283 /* First, emit a definedness test for the address. This also sets 4284 the address (shadow) to 'defined' following the test. */ 4285 complainIfUndefined( mce, addr, guard ); 4286 4287 /* Now decide which helper function to call to write the data V 4288 bits into shadow memory. */ 4289 if (end == Iend_LE) { 4290 switch (ty) { 4291 case Ity_V256: /* we'll use the helper four times */ 4292 case Ity_V128: /* we'll use the helper twice */ 4293 case Ity_I64: helper = &MC_(helperc_STOREV64le); 4294 hname = "MC_(helperc_STOREV64le)"; 4295 break; 4296 case Ity_I32: helper = &MC_(helperc_STOREV32le); 4297 hname = "MC_(helperc_STOREV32le)"; 4298 break; 4299 case Ity_I16: helper = &MC_(helperc_STOREV16le); 4300 hname = "MC_(helperc_STOREV16le)"; 4301 break; 4302 case Ity_I8: helper = &MC_(helperc_STOREV8); 4303 hname = "MC_(helperc_STOREV8)"; 4304 break; 4305 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 4306 } 4307 } else { 4308 switch (ty) { 4309 case Ity_V128: /* we'll use the helper twice */ 4310 case Ity_I64: helper = &MC_(helperc_STOREV64be); 4311 hname = "MC_(helperc_STOREV64be)"; 4312 break; 4313 case Ity_I32: helper = &MC_(helperc_STOREV32be); 4314 hname = "MC_(helperc_STOREV32be)"; 4315 break; 4316 case Ity_I16: helper = &MC_(helperc_STOREV16be); 4317 hname = "MC_(helperc_STOREV16be)"; 4318 break; 4319 case Ity_I8: helper = &MC_(helperc_STOREV8); 4320 hname = "MC_(helperc_STOREV8)"; 4321 break; 4322 /* Note, no V256 case here, because no big-endian target that 4323 we support, has 256 vectors. */ 4324 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)"); 4325 } 4326 } 4327 4328 if (UNLIKELY(ty == Ity_V256)) { 4329 4330 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with 4331 Q3 being the most significant lane. */ 4332 /* These are the offsets of the Qs in memory. */ 4333 Int offQ0, offQ1, offQ2, offQ3; 4334 4335 /* Various bits for constructing the 4 lane helper calls */ 4336 IRDirty *diQ0, *diQ1, *diQ2, *diQ3; 4337 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3; 4338 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3; 4339 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3; 4340 4341 if (end == Iend_LE) { 4342 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24; 4343 } else { 4344 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24; 4345 } 4346 4347 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0); 4348 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) ); 4349 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata)); 4350 diQ0 = unsafeIRDirty_0_N( 4351 1/*regparms*/, 4352 hname, VG_(fnptr_to_fnentry)( helper ), 4353 mkIRExprVec_2( addrQ0, vdataQ0 ) 4354 ); 4355 4356 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1); 4357 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) ); 4358 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata)); 4359 diQ1 = unsafeIRDirty_0_N( 4360 1/*regparms*/, 4361 hname, VG_(fnptr_to_fnentry)( helper ), 4362 mkIRExprVec_2( addrQ1, vdataQ1 ) 4363 ); 4364 4365 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2); 4366 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) ); 4367 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata)); 4368 diQ2 = unsafeIRDirty_0_N( 4369 1/*regparms*/, 4370 hname, VG_(fnptr_to_fnentry)( helper ), 4371 mkIRExprVec_2( addrQ2, vdataQ2 ) 4372 ); 4373 4374 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3); 4375 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) ); 4376 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata)); 4377 diQ3 = unsafeIRDirty_0_N( 4378 1/*regparms*/, 4379 hname, VG_(fnptr_to_fnentry)( helper ), 4380 mkIRExprVec_2( addrQ3, vdataQ3 ) 4381 ); 4382 4383 if (guard) 4384 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard; 4385 4386 setHelperAnns( mce, diQ0 ); 4387 setHelperAnns( mce, diQ1 ); 4388 setHelperAnns( mce, diQ2 ); 4389 setHelperAnns( mce, diQ3 ); 4390 stmt( 'V', mce, IRStmt_Dirty(diQ0) ); 4391 stmt( 'V', mce, IRStmt_Dirty(diQ1) ); 4392 stmt( 'V', mce, IRStmt_Dirty(diQ2) ); 4393 stmt( 'V', mce, IRStmt_Dirty(diQ3) ); 4394 4395 } 4396 else if (UNLIKELY(ty == Ity_V128)) { 4397 4398 /* V128-bit case */ 4399 /* See comment in next clause re 64-bit regparms */ 4400 /* also, need to be careful about endianness */ 4401 4402 Int offLo64, offHi64; 4403 IRDirty *diLo64, *diHi64; 4404 IRAtom *addrLo64, *addrHi64; 4405 IRAtom *vdataLo64, *vdataHi64; 4406 IRAtom *eBiasLo64, *eBiasHi64; 4407 4408 if (end == Iend_LE) { 4409 offLo64 = 0; 4410 offHi64 = 8; 4411 } else { 4412 offLo64 = 8; 4413 offHi64 = 0; 4414 } 4415 4416 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); 4417 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); 4418 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); 4419 diLo64 = unsafeIRDirty_0_N( 4420 1/*regparms*/, 4421 hname, VG_(fnptr_to_fnentry)( helper ), 4422 mkIRExprVec_2( addrLo64, vdataLo64 ) 4423 ); 4424 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); 4425 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); 4426 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 4427 diHi64 = unsafeIRDirty_0_N( 4428 1/*regparms*/, 4429 hname, VG_(fnptr_to_fnentry)( helper ), 4430 mkIRExprVec_2( addrHi64, vdataHi64 ) 4431 ); 4432 if (guard) diLo64->guard = guard; 4433 if (guard) diHi64->guard = guard; 4434 setHelperAnns( mce, diLo64 ); 4435 setHelperAnns( mce, diHi64 ); 4436 stmt( 'V', mce, IRStmt_Dirty(diLo64) ); 4437 stmt( 'V', mce, IRStmt_Dirty(diHi64) ); 4438 4439 } else { 4440 4441 IRDirty *di; 4442 IRAtom *addrAct; 4443 4444 /* 8/16/32/64-bit cases */ 4445 /* Generate the actual address into addrAct. */ 4446 if (bias == 0) { 4447 addrAct = addr; 4448 } else { 4449 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 4450 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias)); 4451 } 4452 4453 if (ty == Ity_I64) { 4454 /* We can't do this with regparm 2 on 32-bit platforms, since 4455 the back ends aren't clever enough to handle 64-bit 4456 regparm args. Therefore be different. */ 4457 di = unsafeIRDirty_0_N( 4458 1/*regparms*/, 4459 hname, VG_(fnptr_to_fnentry)( helper ), 4460 mkIRExprVec_2( addrAct, vdata ) 4461 ); 4462 } else { 4463 di = unsafeIRDirty_0_N( 4464 2/*regparms*/, 4465 hname, VG_(fnptr_to_fnentry)( helper ), 4466 mkIRExprVec_2( addrAct, 4467 zwidenToHostWord( mce, vdata )) 4468 ); 4469 } 4470 if (guard) di->guard = guard; 4471 setHelperAnns( mce, di ); 4472 stmt( 'V', mce, IRStmt_Dirty(di) ); 4473 } 4474 4475} 4476 4477 4478/* Do lazy pessimistic propagation through a dirty helper call, by 4479 looking at the annotations on it. This is the most complex part of 4480 Memcheck. */ 4481 4482static IRType szToITy ( Int n ) 4483{ 4484 switch (n) { 4485 case 1: return Ity_I8; 4486 case 2: return Ity_I16; 4487 case 4: return Ity_I32; 4488 case 8: return Ity_I64; 4489 default: VG_(tool_panic)("szToITy(memcheck)"); 4490 } 4491} 4492 4493static 4494void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 4495{ 4496 Int i, k, n, toDo, gSz, gOff; 4497 IRAtom *src, *here, *curr; 4498 IRType tySrc, tyDst; 4499 IRTemp dst; 4500 IREndness end; 4501 4502 /* What's the native endianness? We need to know this. */ 4503# if defined(VG_BIGENDIAN) 4504 end = Iend_BE; 4505# elif defined(VG_LITTLEENDIAN) 4506 end = Iend_LE; 4507# else 4508# error "Unknown endianness" 4509# endif 4510 4511 /* First check the guard. */ 4512 complainIfUndefined(mce, d->guard, NULL); 4513 4514 /* Now round up all inputs and PCast over them. */ 4515 curr = definedOfType(Ity_I32); 4516 4517 /* Inputs: unmasked args 4518 Note: arguments are evaluated REGARDLESS of the guard expression */ 4519 for (i = 0; d->args[i]; i++) { 4520 if (d->cee->mcx_mask & (1<<i)) { 4521 /* ignore this arg */ 4522 } else { 4523 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) ); 4524 curr = mkUifU32(mce, here, curr); 4525 } 4526 } 4527 4528 /* Inputs: guest state that we read. */ 4529 for (i = 0; i < d->nFxState; i++) { 4530 tl_assert(d->fxState[i].fx != Ifx_None); 4531 if (d->fxState[i].fx == Ifx_Write) 4532 continue; 4533 4534 /* Enumerate the described state segments */ 4535 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 4536 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 4537 gSz = d->fxState[i].size; 4538 4539 /* Ignore any sections marked as 'always defined'. */ 4540 if (isAlwaysDefd(mce, gOff, gSz)) { 4541 if (0) 4542 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 4543 gOff, gSz); 4544 continue; 4545 } 4546 4547 /* This state element is read or modified. So we need to 4548 consider it. If larger than 8 bytes, deal with it in 4549 8-byte chunks. */ 4550 while (True) { 4551 tl_assert(gSz >= 0); 4552 if (gSz == 0) break; 4553 n = gSz <= 8 ? gSz : 8; 4554 /* update 'curr' with UifU of the state slice 4555 gOff .. gOff+n-1 */ 4556 tySrc = szToITy( n ); 4557 4558 /* Observe the guard expression. If it is false use an 4559 all-bits-defined bit pattern */ 4560 IRAtom *cond, *iffalse, *iftrue; 4561 4562 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard)); 4563 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc)); 4564 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc)); 4565 src = assignNew('V', mce, tySrc, 4566 IRExpr_Mux0X(cond, iffalse, iftrue)); 4567 4568 here = mkPCastTo( mce, Ity_I32, src ); 4569 curr = mkUifU32(mce, here, curr); 4570 gSz -= n; 4571 gOff += n; 4572 } 4573 } 4574 } 4575 4576 /* Inputs: memory. First set up some info needed regardless of 4577 whether we're doing reads or writes. */ 4578 4579 if (d->mFx != Ifx_None) { 4580 /* Because we may do multiple shadow loads/stores from the same 4581 base address, it's best to do a single test of its 4582 definedness right now. Post-instrumentation optimisation 4583 should remove all but this test. */ 4584 IRType tyAddr; 4585 tl_assert(d->mAddr); 4586 complainIfUndefined(mce, d->mAddr, d->guard); 4587 4588 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr); 4589 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 4590 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 4591 } 4592 4593 /* Deal with memory inputs (reads or modifies) */ 4594 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 4595 toDo = d->mSize; 4596 /* chew off 32-bit chunks. We don't care about the endianness 4597 since it's all going to be condensed down to a single bit, 4598 but nevertheless choose an endianness which is hopefully 4599 native to the platform. */ 4600 while (toDo >= 4) { 4601 here = mkPCastTo( 4602 mce, Ity_I32, 4603 expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr, 4604 d->mSize - toDo, d->guard ) 4605 ); 4606 curr = mkUifU32(mce, here, curr); 4607 toDo -= 4; 4608 } 4609 /* chew off 16-bit chunks */ 4610 while (toDo >= 2) { 4611 here = mkPCastTo( 4612 mce, Ity_I32, 4613 expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr, 4614 d->mSize - toDo, d->guard ) 4615 ); 4616 curr = mkUifU32(mce, here, curr); 4617 toDo -= 2; 4618 } 4619 /* chew off the remaining 8-bit chunk, if any */ 4620 if (toDo == 1) { 4621 here = mkPCastTo( 4622 mce, Ity_I32, 4623 expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr, 4624 d->mSize - toDo, d->guard ) 4625 ); 4626 curr = mkUifU32(mce, here, curr); 4627 toDo -= 1; 4628 } 4629 tl_assert(toDo == 0); 4630 } 4631 4632 /* Whew! So curr is a 32-bit V-value summarising pessimistically 4633 all the inputs to the helper. Now we need to re-distribute the 4634 results to all destinations. */ 4635 4636 /* Outputs: the destination temporary, if there is one. */ 4637 if (d->tmp != IRTemp_INVALID) { 4638 dst = findShadowTmpV(mce, d->tmp); 4639 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp); 4640 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) ); 4641 } 4642 4643 /* Outputs: guest state that we write or modify. */ 4644 for (i = 0; i < d->nFxState; i++) { 4645 tl_assert(d->fxState[i].fx != Ifx_None); 4646 if (d->fxState[i].fx == Ifx_Read) 4647 continue; 4648 4649 /* Enumerate the described state segments */ 4650 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 4651 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 4652 gSz = d->fxState[i].size; 4653 4654 /* Ignore any sections marked as 'always defined'. */ 4655 if (isAlwaysDefd(mce, gOff, gSz)) 4656 continue; 4657 4658 /* This state element is written or modified. So we need to 4659 consider it. If larger than 8 bytes, deal with it in 4660 8-byte chunks. */ 4661 while (True) { 4662 tl_assert(gSz >= 0); 4663 if (gSz == 0) break; 4664 n = gSz <= 8 ? gSz : 8; 4665 /* Write suitably-casted 'curr' to the state slice 4666 gOff .. gOff+n-1 */ 4667 tyDst = szToITy( n ); 4668 do_shadow_PUT( mce, gOff, 4669 NULL, /* original atom */ 4670 mkPCastTo( mce, tyDst, curr ), d->guard ); 4671 gSz -= n; 4672 gOff += n; 4673 } 4674 } 4675 } 4676 4677 /* Outputs: memory that we write or modify. Same comments about 4678 endianness as above apply. */ 4679 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 4680 toDo = d->mSize; 4681 /* chew off 32-bit chunks */ 4682 while (toDo >= 4) { 4683 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4684 NULL, /* original data */ 4685 mkPCastTo( mce, Ity_I32, curr ), 4686 d->guard ); 4687 toDo -= 4; 4688 } 4689 /* chew off 16-bit chunks */ 4690 while (toDo >= 2) { 4691 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4692 NULL, /* original data */ 4693 mkPCastTo( mce, Ity_I16, curr ), 4694 d->guard ); 4695 toDo -= 2; 4696 } 4697 /* chew off the remaining 8-bit chunk, if any */ 4698 if (toDo == 1) { 4699 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4700 NULL, /* original data */ 4701 mkPCastTo( mce, Ity_I8, curr ), 4702 d->guard ); 4703 toDo -= 1; 4704 } 4705 tl_assert(toDo == 0); 4706 } 4707 4708} 4709 4710 4711/* We have an ABI hint telling us that [base .. base+len-1] is to 4712 become undefined ("writable"). Generate code to call a helper to 4713 notify the A/V bit machinery of this fact. 4714 4715 We call 4716 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, 4717 Addr nia ); 4718*/ 4719static 4720void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia ) 4721{ 4722 IRDirty* di; 4723 /* Minor optimisation: if not doing origin tracking, ignore the 4724 supplied nia and pass zero instead. This is on the basis that 4725 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can 4726 almost always generate a shorter instruction to put zero into a 4727 register than any other value. */ 4728 if (MC_(clo_mc_level) < 3) 4729 nia = mkIRExpr_HWord(0); 4730 4731 di = unsafeIRDirty_0_N( 4732 0/*regparms*/, 4733 "MC_(helperc_MAKE_STACK_UNINIT)", 4734 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ), 4735 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia ) 4736 ); 4737 stmt( 'V', mce, IRStmt_Dirty(di) ); 4738} 4739 4740 4741/* ------ Dealing with IRCAS (big and complex) ------ */ 4742 4743/* FWDS */ 4744static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 4745 IRAtom* baseaddr, Int offset ); 4746static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ); 4747static void gen_store_b ( MCEnv* mce, Int szB, 4748 IRAtom* baseaddr, Int offset, IRAtom* dataB, 4749 IRAtom* guard ); 4750 4751static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ); 4752static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ); 4753 4754 4755/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both 4756 IRExpr.Consts, else this asserts. If they are both Consts, it 4757 doesn't do anything. So that just leaves the RdTmp case. 4758 4759 In which case: this assigns the shadow value SHADOW to the IR 4760 shadow temporary associated with ORIG. That is, ORIG, being an 4761 original temporary, will have a shadow temporary associated with 4762 it. However, in the case envisaged here, there will so far have 4763 been no IR emitted to actually write a shadow value into that 4764 temporary. What this routine does is to (emit IR to) copy the 4765 value in SHADOW into said temporary, so that after this call, 4766 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the 4767 value in SHADOW. 4768 4769 Point is to allow callers to compute "by hand" a shadow value for 4770 ORIG, and force it to be associated with ORIG. 4771 4772 How do we know that that shadow associated with ORIG has not so far 4773 been assigned to? Well, we don't per se know that, but supposing 4774 it had. Then this routine would create a second assignment to it, 4775 and later the IR sanity checker would barf. But that never 4776 happens. QED. 4777*/ 4778static void bind_shadow_tmp_to_orig ( UChar how, 4779 MCEnv* mce, 4780 IRAtom* orig, IRAtom* shadow ) 4781{ 4782 tl_assert(isOriginalAtom(mce, orig)); 4783 tl_assert(isShadowAtom(mce, shadow)); 4784 switch (orig->tag) { 4785 case Iex_Const: 4786 tl_assert(shadow->tag == Iex_Const); 4787 break; 4788 case Iex_RdTmp: 4789 tl_assert(shadow->tag == Iex_RdTmp); 4790 if (how == 'V') { 4791 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp), 4792 shadow); 4793 } else { 4794 tl_assert(how == 'B'); 4795 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp), 4796 shadow); 4797 } 4798 break; 4799 default: 4800 tl_assert(0); 4801 } 4802} 4803 4804 4805static 4806void do_shadow_CAS ( MCEnv* mce, IRCAS* cas ) 4807{ 4808 /* Scheme is (both single- and double- cases): 4809 4810 1. fetch data#,dataB (the proposed new value) 4811 4812 2. fetch expd#,expdB (what we expect to see at the address) 4813 4814 3. check definedness of address 4815 4816 4. load old#,oldB from shadow memory; this also checks 4817 addressibility of the address 4818 4819 5. the CAS itself 4820 4821 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below. 4822 4823 7. if "expected == old" (as computed by (6)) 4824 store data#,dataB to shadow memory 4825 4826 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores 4827 'data' but 7 stores 'data#'. Hence it is possible for the 4828 shadow data to be incorrectly checked and/or updated: 4829 4830 * 7 is at least gated correctly, since the 'expected == old' 4831 condition is derived from outputs of 5. However, the shadow 4832 write could happen too late: imagine after 5 we are 4833 descheduled, a different thread runs, writes a different 4834 (shadow) value at the address, and then we resume, hence 4835 overwriting the shadow value written by the other thread. 4836 4837 Because the original memory access is atomic, there's no way to 4838 make both the original and shadow accesses into a single atomic 4839 thing, hence this is unavoidable. 4840 4841 At least as Valgrind stands, I don't think it's a problem, since 4842 we're single threaded *and* we guarantee that there are no 4843 context switches during the execution of any specific superblock 4844 -- context switches can only happen at superblock boundaries. 4845 4846 If Valgrind ever becomes MT in the future, then it might be more 4847 of a problem. A possible kludge would be to artificially 4848 associate with the location, a lock, which we must acquire and 4849 release around the transaction as a whole. Hmm, that probably 4850 would't work properly since it only guards us against other 4851 threads doing CASs on the same location, not against other 4852 threads doing normal reads and writes. 4853 4854 ------------------------------------------------------------ 4855 4856 COMMENT_ON_CasCmpEQ: 4857 4858 Note two things. Firstly, in the sequence above, we compute 4859 "expected == old", but we don't check definedness of it. Why 4860 not? Also, the x86 and amd64 front ends use 4861 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent 4862 determination (expected == old ?) for themselves, and we also 4863 don't check definedness for those primops; we just say that the 4864 result is defined. Why? Details follow. 4865 4866 x86/amd64 contains various forms of locked insns: 4867 * lock prefix before all basic arithmetic insn; 4868 eg lock xorl %reg1,(%reg2) 4869 * atomic exchange reg-mem 4870 * compare-and-swaps 4871 4872 Rather than attempt to represent them all, which would be a 4873 royal PITA, I used a result from Maurice Herlihy 4874 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he 4875 demonstrates that compare-and-swap is a primitive more general 4876 than the other two, and so can be used to represent all of them. 4877 So the translation scheme for (eg) lock incl (%reg) is as 4878 follows: 4879 4880 again: 4881 old = * %reg 4882 new = old + 1 4883 atomically { if (* %reg == old) { * %reg = new } else { goto again } } 4884 4885 The "atomically" is the CAS bit. The scheme is always the same: 4886 get old value from memory, compute new value, atomically stuff 4887 new value back in memory iff the old value has not changed (iow, 4888 no other thread modified it in the meantime). If it has changed 4889 then we've been out-raced and we have to start over. 4890 4891 Now that's all very neat, but it has the bad side effect of 4892 introducing an explicit equality test into the translation. 4893 Consider the behaviour of said code on a memory location which 4894 is uninitialised. We will wind up doing a comparison on 4895 uninitialised data, and mc duly complains. 4896 4897 What's difficult about this is, the common case is that the 4898 location is uncontended, and so we're usually comparing the same 4899 value (* %reg) with itself. So we shouldn't complain even if it 4900 is undefined. But mc doesn't know that. 4901 4902 My solution is to mark the == in the IR specially, so as to tell 4903 mc that it almost certainly compares a value with itself, and we 4904 should just regard the result as always defined. Rather than 4905 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into 4906 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else. 4907 4908 So there's always the question of, can this give a false 4909 negative? eg, imagine that initially, * %reg is defined; and we 4910 read that; but then in the gap between the read and the CAS, a 4911 different thread writes an undefined (and different) value at 4912 the location. Then the CAS in this thread will fail and we will 4913 go back to "again:", but without knowing that the trip back 4914 there was based on an undefined comparison. No matter; at least 4915 the other thread won the race and the location is correctly 4916 marked as undefined. What if it wrote an uninitialised version 4917 of the same value that was there originally, though? 4918 4919 etc etc. Seems like there's a small corner case in which we 4920 might lose the fact that something's defined -- we're out-raced 4921 in between the "old = * reg" and the "atomically {", _and_ the 4922 other thread is writing in an undefined version of what's 4923 already there. Well, that seems pretty unlikely. 4924 4925 --- 4926 4927 If we ever need to reinstate it .. code which generates a 4928 definedness test for "expected == old" was removed at r10432 of 4929 this file. 4930 */ 4931 if (cas->oldHi == IRTemp_INVALID) { 4932 do_shadow_CAS_single( mce, cas ); 4933 } else { 4934 do_shadow_CAS_double( mce, cas ); 4935 } 4936} 4937 4938 4939static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ) 4940{ 4941 IRAtom *vdataLo = NULL, *bdataLo = NULL; 4942 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 4943 IRAtom *voldLo = NULL, *boldLo = NULL; 4944 IRAtom *expd_eq_old = NULL; 4945 IROp opCasCmpEQ; 4946 Int elemSzB; 4947 IRType elemTy; 4948 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 4949 4950 /* single CAS */ 4951 tl_assert(cas->oldHi == IRTemp_INVALID); 4952 tl_assert(cas->expdHi == NULL); 4953 tl_assert(cas->dataHi == NULL); 4954 4955 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 4956 switch (elemTy) { 4957 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break; 4958 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break; 4959 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break; 4960 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break; 4961 default: tl_assert(0); /* IR defn disallows any other types */ 4962 } 4963 4964 /* 1. fetch data# (the proposed new value) */ 4965 tl_assert(isOriginalAtom(mce, cas->dataLo)); 4966 vdataLo 4967 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 4968 tl_assert(isShadowAtom(mce, vdataLo)); 4969 if (otrak) { 4970 bdataLo 4971 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 4972 tl_assert(isShadowAtom(mce, bdataLo)); 4973 } 4974 4975 /* 2. fetch expected# (what we expect to see at the address) */ 4976 tl_assert(isOriginalAtom(mce, cas->expdLo)); 4977 vexpdLo 4978 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 4979 tl_assert(isShadowAtom(mce, vexpdLo)); 4980 if (otrak) { 4981 bexpdLo 4982 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 4983 tl_assert(isShadowAtom(mce, bexpdLo)); 4984 } 4985 4986 /* 3. check definedness of address */ 4987 /* 4. fetch old# from shadow memory; this also checks 4988 addressibility of the address */ 4989 voldLo 4990 = assignNew( 4991 'V', mce, elemTy, 4992 expr2vbits_Load( 4993 mce, 4994 cas->end, elemTy, cas->addr, 0/*Addr bias*/ 4995 )); 4996 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 4997 if (otrak) { 4998 boldLo 4999 = assignNew('B', mce, Ity_I32, 5000 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/)); 5001 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 5002 } 5003 5004 /* 5. the CAS itself */ 5005 stmt( 'C', mce, IRStmt_CAS(cas) ); 5006 5007 /* 6. compute "expected == old" */ 5008 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 5009 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 5010 tree, but it's not copied from the input block. */ 5011 expd_eq_old 5012 = assignNew('C', mce, Ity_I1, 5013 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo))); 5014 5015 /* 7. if "expected == old" 5016 store data# to shadow memory */ 5017 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/, 5018 NULL/*data*/, vdataLo/*vdata*/, 5019 expd_eq_old/*guard for store*/ ); 5020 if (otrak) { 5021 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/, 5022 bdataLo/*bdata*/, 5023 expd_eq_old/*guard for store*/ ); 5024 } 5025} 5026 5027 5028static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ) 5029{ 5030 IRAtom *vdataHi = NULL, *bdataHi = NULL; 5031 IRAtom *vdataLo = NULL, *bdataLo = NULL; 5032 IRAtom *vexpdHi = NULL, *bexpdHi = NULL; 5033 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 5034 IRAtom *voldHi = NULL, *boldHi = NULL; 5035 IRAtom *voldLo = NULL, *boldLo = NULL; 5036 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL; 5037 IRAtom *expd_eq_old = NULL, *zero = NULL; 5038 IROp opCasCmpEQ, opOr, opXor; 5039 Int elemSzB, memOffsLo, memOffsHi; 5040 IRType elemTy; 5041 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 5042 5043 /* double CAS */ 5044 tl_assert(cas->oldHi != IRTemp_INVALID); 5045 tl_assert(cas->expdHi != NULL); 5046 tl_assert(cas->dataHi != NULL); 5047 5048 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 5049 switch (elemTy) { 5050 case Ity_I8: 5051 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8; 5052 elemSzB = 1; zero = mkU8(0); 5053 break; 5054 case Ity_I16: 5055 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16; 5056 elemSzB = 2; zero = mkU16(0); 5057 break; 5058 case Ity_I32: 5059 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32; 5060 elemSzB = 4; zero = mkU32(0); 5061 break; 5062 case Ity_I64: 5063 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64; 5064 elemSzB = 8; zero = mkU64(0); 5065 break; 5066 default: 5067 tl_assert(0); /* IR defn disallows any other types */ 5068 } 5069 5070 /* 1. fetch data# (the proposed new value) */ 5071 tl_assert(isOriginalAtom(mce, cas->dataHi)); 5072 tl_assert(isOriginalAtom(mce, cas->dataLo)); 5073 vdataHi 5074 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi)); 5075 vdataLo 5076 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 5077 tl_assert(isShadowAtom(mce, vdataHi)); 5078 tl_assert(isShadowAtom(mce, vdataLo)); 5079 if (otrak) { 5080 bdataHi 5081 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi)); 5082 bdataLo 5083 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 5084 tl_assert(isShadowAtom(mce, bdataHi)); 5085 tl_assert(isShadowAtom(mce, bdataLo)); 5086 } 5087 5088 /* 2. fetch expected# (what we expect to see at the address) */ 5089 tl_assert(isOriginalAtom(mce, cas->expdHi)); 5090 tl_assert(isOriginalAtom(mce, cas->expdLo)); 5091 vexpdHi 5092 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi)); 5093 vexpdLo 5094 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 5095 tl_assert(isShadowAtom(mce, vexpdHi)); 5096 tl_assert(isShadowAtom(mce, vexpdLo)); 5097 if (otrak) { 5098 bexpdHi 5099 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi)); 5100 bexpdLo 5101 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 5102 tl_assert(isShadowAtom(mce, bexpdHi)); 5103 tl_assert(isShadowAtom(mce, bexpdLo)); 5104 } 5105 5106 /* 3. check definedness of address */ 5107 /* 4. fetch old# from shadow memory; this also checks 5108 addressibility of the address */ 5109 if (cas->end == Iend_LE) { 5110 memOffsLo = 0; 5111 memOffsHi = elemSzB; 5112 } else { 5113 tl_assert(cas->end == Iend_BE); 5114 memOffsLo = elemSzB; 5115 memOffsHi = 0; 5116 } 5117 voldHi 5118 = assignNew( 5119 'V', mce, elemTy, 5120 expr2vbits_Load( 5121 mce, 5122 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/ 5123 )); 5124 voldLo 5125 = assignNew( 5126 'V', mce, elemTy, 5127 expr2vbits_Load( 5128 mce, 5129 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/ 5130 )); 5131 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi); 5132 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 5133 if (otrak) { 5134 boldHi 5135 = assignNew('B', mce, Ity_I32, 5136 gen_load_b(mce, elemSzB, cas->addr, 5137 memOffsHi/*addr bias*/)); 5138 boldLo 5139 = assignNew('B', mce, Ity_I32, 5140 gen_load_b(mce, elemSzB, cas->addr, 5141 memOffsLo/*addr bias*/)); 5142 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi); 5143 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 5144 } 5145 5146 /* 5. the CAS itself */ 5147 stmt( 'C', mce, IRStmt_CAS(cas) ); 5148 5149 /* 6. compute "expected == old" */ 5150 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 5151 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 5152 tree, but it's not copied from the input block. */ 5153 /* 5154 xHi = oldHi ^ expdHi; 5155 xLo = oldLo ^ expdLo; 5156 xHL = xHi | xLo; 5157 expd_eq_old = xHL == 0; 5158 */ 5159 xHi = assignNew('C', mce, elemTy, 5160 binop(opXor, cas->expdHi, mkexpr(cas->oldHi))); 5161 xLo = assignNew('C', mce, elemTy, 5162 binop(opXor, cas->expdLo, mkexpr(cas->oldLo))); 5163 xHL = assignNew('C', mce, elemTy, 5164 binop(opOr, xHi, xLo)); 5165 expd_eq_old 5166 = assignNew('C', mce, Ity_I1, 5167 binop(opCasCmpEQ, xHL, zero)); 5168 5169 /* 7. if "expected == old" 5170 store data# to shadow memory */ 5171 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/, 5172 NULL/*data*/, vdataHi/*vdata*/, 5173 expd_eq_old/*guard for store*/ ); 5174 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/, 5175 NULL/*data*/, vdataLo/*vdata*/, 5176 expd_eq_old/*guard for store*/ ); 5177 if (otrak) { 5178 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/, 5179 bdataHi/*bdata*/, 5180 expd_eq_old/*guard for store*/ ); 5181 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/, 5182 bdataLo/*bdata*/, 5183 expd_eq_old/*guard for store*/ ); 5184 } 5185} 5186 5187 5188/* ------ Dealing with LL/SC (not difficult) ------ */ 5189 5190static void do_shadow_LLSC ( MCEnv* mce, 5191 IREndness stEnd, 5192 IRTemp stResult, 5193 IRExpr* stAddr, 5194 IRExpr* stStoredata ) 5195{ 5196 /* In short: treat a load-linked like a normal load followed by an 5197 assignment of the loaded (shadow) data to the result temporary. 5198 Treat a store-conditional like a normal store, and mark the 5199 result temporary as defined. */ 5200 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult); 5201 IRTemp resTmp = findShadowTmpV(mce, stResult); 5202 5203 tl_assert(isIRAtom(stAddr)); 5204 if (stStoredata) 5205 tl_assert(isIRAtom(stStoredata)); 5206 5207 if (stStoredata == NULL) { 5208 /* Load Linked */ 5209 /* Just treat this as a normal load, followed by an assignment of 5210 the value to .result. */ 5211 /* Stay sane */ 5212 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 5213 || resTy == Ity_I16 || resTy == Ity_I8); 5214 assign( 'V', mce, resTmp, 5215 expr2vbits_Load( 5216 mce, stEnd, resTy, stAddr, 0/*addr bias*/)); 5217 } else { 5218 /* Store Conditional */ 5219 /* Stay sane */ 5220 IRType dataTy = typeOfIRExpr(mce->sb->tyenv, 5221 stStoredata); 5222 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 5223 || dataTy == Ity_I16 || dataTy == Ity_I8); 5224 do_shadow_Store( mce, stEnd, 5225 stAddr, 0/* addr bias */, 5226 stStoredata, 5227 NULL /* shadow data */, 5228 NULL/*guard*/ ); 5229 /* This is a store conditional, so it writes to .result a value 5230 indicating whether or not the store succeeded. Just claim 5231 this value is always defined. In the PowerPC interpretation 5232 of store-conditional, definedness of the success indication 5233 depends on whether the address of the store matches the 5234 reservation address. But we can't tell that here (and 5235 anyway, we're not being PowerPC-specific). At least we are 5236 guaranteed that the definedness of the store address, and its 5237 addressibility, will be checked as per normal. So it seems 5238 pretty safe to just say that the success indication is always 5239 defined. 5240 5241 In schemeS, for origin tracking, we must correspondingly set 5242 a no-origin value for the origin shadow of .result. 5243 */ 5244 tl_assert(resTy == Ity_I1); 5245 assign( 'V', mce, resTmp, definedOfType(resTy) ); 5246 } 5247} 5248 5249 5250/*------------------------------------------------------------*/ 5251/*--- Memcheck main ---*/ 5252/*------------------------------------------------------------*/ 5253 5254static void schemeS ( MCEnv* mce, IRStmt* st ); 5255 5256static Bool isBogusAtom ( IRAtom* at ) 5257{ 5258 ULong n = 0; 5259 IRConst* con; 5260 tl_assert(isIRAtom(at)); 5261 if (at->tag == Iex_RdTmp) 5262 return False; 5263 tl_assert(at->tag == Iex_Const); 5264 con = at->Iex.Const.con; 5265 switch (con->tag) { 5266 case Ico_U1: return False; 5267 case Ico_U8: n = (ULong)con->Ico.U8; break; 5268 case Ico_U16: n = (ULong)con->Ico.U16; break; 5269 case Ico_U32: n = (ULong)con->Ico.U32; break; 5270 case Ico_U64: n = (ULong)con->Ico.U64; break; 5271 case Ico_F64: return False; 5272 case Ico_F32i: return False; 5273 case Ico_F64i: return False; 5274 case Ico_V128: return False; 5275 default: ppIRExpr(at); tl_assert(0); 5276 } 5277 /* VG_(printf)("%llx\n", n); */ 5278 return (/*32*/ n == 0xFEFEFEFFULL 5279 /*32*/ || n == 0x80808080ULL 5280 /*32*/ || n == 0x7F7F7F7FULL 5281 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL 5282 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL 5283 /*64*/ || n == 0x0000000000008080ULL 5284 /*64*/ || n == 0x8080808080808080ULL 5285 /*64*/ || n == 0x0101010101010101ULL 5286 ); 5287} 5288 5289static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 5290{ 5291 Int i; 5292 IRExpr* e; 5293 IRDirty* d; 5294 IRCAS* cas; 5295 switch (st->tag) { 5296 case Ist_WrTmp: 5297 e = st->Ist.WrTmp.data; 5298 switch (e->tag) { 5299 case Iex_Get: 5300 case Iex_RdTmp: 5301 return False; 5302 case Iex_Const: 5303 return isBogusAtom(e); 5304 case Iex_Unop: 5305 return isBogusAtom(e->Iex.Unop.arg); 5306 case Iex_GetI: 5307 return isBogusAtom(e->Iex.GetI.ix); 5308 case Iex_Binop: 5309 return isBogusAtom(e->Iex.Binop.arg1) 5310 || isBogusAtom(e->Iex.Binop.arg2); 5311 case Iex_Triop: 5312 return isBogusAtom(e->Iex.Triop.details->arg1) 5313 || isBogusAtom(e->Iex.Triop.details->arg2) 5314 || isBogusAtom(e->Iex.Triop.details->arg3); 5315 case Iex_Qop: 5316 return isBogusAtom(e->Iex.Qop.details->arg1) 5317 || isBogusAtom(e->Iex.Qop.details->arg2) 5318 || isBogusAtom(e->Iex.Qop.details->arg3) 5319 || isBogusAtom(e->Iex.Qop.details->arg4); 5320 case Iex_Mux0X: 5321 return isBogusAtom(e->Iex.Mux0X.cond) 5322 || isBogusAtom(e->Iex.Mux0X.expr0) 5323 || isBogusAtom(e->Iex.Mux0X.exprX); 5324 case Iex_Load: 5325 return isBogusAtom(e->Iex.Load.addr); 5326 case Iex_CCall: 5327 for (i = 0; e->Iex.CCall.args[i]; i++) 5328 if (isBogusAtom(e->Iex.CCall.args[i])) 5329 return True; 5330 return False; 5331 default: 5332 goto unhandled; 5333 } 5334 case Ist_Dirty: 5335 d = st->Ist.Dirty.details; 5336 for (i = 0; d->args[i]; i++) 5337 if (isBogusAtom(d->args[i])) 5338 return True; 5339 if (d->guard && isBogusAtom(d->guard)) 5340 return True; 5341 if (d->mAddr && isBogusAtom(d->mAddr)) 5342 return True; 5343 return False; 5344 case Ist_Put: 5345 return isBogusAtom(st->Ist.Put.data); 5346 case Ist_PutI: 5347 return isBogusAtom(st->Ist.PutI.details->ix) 5348 || isBogusAtom(st->Ist.PutI.details->data); 5349 case Ist_Store: 5350 return isBogusAtom(st->Ist.Store.addr) 5351 || isBogusAtom(st->Ist.Store.data); 5352 case Ist_Exit: 5353 return isBogusAtom(st->Ist.Exit.guard); 5354 case Ist_AbiHint: 5355 return isBogusAtom(st->Ist.AbiHint.base) 5356 || isBogusAtom(st->Ist.AbiHint.nia); 5357 case Ist_NoOp: 5358 case Ist_IMark: 5359 case Ist_MBE: 5360 return False; 5361 case Ist_CAS: 5362 cas = st->Ist.CAS.details; 5363 return isBogusAtom(cas->addr) 5364 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False) 5365 || isBogusAtom(cas->expdLo) 5366 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False) 5367 || isBogusAtom(cas->dataLo); 5368 case Ist_LLSC: 5369 return isBogusAtom(st->Ist.LLSC.addr) 5370 || (st->Ist.LLSC.storedata 5371 ? isBogusAtom(st->Ist.LLSC.storedata) 5372 : False); 5373 default: 5374 unhandled: 5375 ppIRStmt(st); 5376 VG_(tool_panic)("hasBogusLiterals"); 5377 } 5378} 5379 5380 5381IRSB* MC_(instrument) ( VgCallbackClosure* closure, 5382 IRSB* sb_in, 5383 VexGuestLayout* layout, 5384 VexGuestExtents* vge, 5385 VexArchInfo* archinfo_host, 5386 IRType gWordTy, IRType hWordTy ) 5387{ 5388 Bool verboze = 0||False; 5389 Bool bogus; 5390 Int i, j, first_stmt; 5391 IRStmt* st; 5392 MCEnv mce; 5393 IRSB* sb_out; 5394 5395 if (gWordTy != hWordTy) { 5396 /* We don't currently support this case. */ 5397 VG_(tool_panic)("host/guest word size mismatch"); 5398 } 5399 5400 /* Check we're not completely nuts */ 5401 tl_assert(sizeof(UWord) == sizeof(void*)); 5402 tl_assert(sizeof(Word) == sizeof(void*)); 5403 tl_assert(sizeof(Addr) == sizeof(void*)); 5404 tl_assert(sizeof(ULong) == 8); 5405 tl_assert(sizeof(Long) == 8); 5406 tl_assert(sizeof(Addr64) == 8); 5407 tl_assert(sizeof(UInt) == 4); 5408 tl_assert(sizeof(Int) == 4); 5409 5410 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3); 5411 5412 /* Set up SB */ 5413 sb_out = deepCopyIRSBExceptStmts(sb_in); 5414 5415 /* Set up the running environment. Both .sb and .tmpMap are 5416 modified as we go along. Note that tmps are added to both 5417 .sb->tyenv and .tmpMap together, so the valid index-set for 5418 those two arrays should always be identical. */ 5419 VG_(memset)(&mce, 0, sizeof(mce)); 5420 mce.sb = sb_out; 5421 mce.trace = verboze; 5422 mce.layout = layout; 5423 mce.hWordTy = hWordTy; 5424 mce.bogusLiterals = False; 5425 5426 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on 5427 Darwin. 10.7 is mostly built with LLVM, which uses these for 5428 bitfield inserts, and we get a lot of false errors if the cheap 5429 interpretation is used, alas. Could solve this much better if 5430 we knew which of such adds came from x86/amd64 LEA instructions, 5431 since these are the only ones really needing the expensive 5432 interpretation, but that would require some way to tag them in 5433 the _toIR.c front ends, which is a lot of faffing around. So 5434 for now just use the slow and blunt-instrument solution. */ 5435 mce.useLLVMworkarounds = False; 5436# if defined(VGO_darwin) 5437 mce.useLLVMworkarounds = True; 5438# endif 5439 5440 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free), 5441 sizeof(TempMapEnt)); 5442 for (i = 0; i < sb_in->tyenv->types_used; i++) { 5443 TempMapEnt ent; 5444 ent.kind = Orig; 5445 ent.shadowV = IRTemp_INVALID; 5446 ent.shadowB = IRTemp_INVALID; 5447 VG_(addToXA)( mce.tmpMap, &ent ); 5448 } 5449 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used ); 5450 5451 /* Make a preliminary inspection of the statements, to see if there 5452 are any dodgy-looking literals. If there are, we generate 5453 extra-detailed (hence extra-expensive) instrumentation in 5454 places. Scan the whole bb even if dodgyness is found earlier, 5455 so that the flatness assertion is applied to all stmts. */ 5456 5457 bogus = False; 5458 5459 for (i = 0; i < sb_in->stmts_used; i++) { 5460 5461 st = sb_in->stmts[i]; 5462 tl_assert(st); 5463 tl_assert(isFlatIRStmt(st)); 5464 5465 if (!bogus) { 5466 bogus = checkForBogusLiterals(st); 5467 if (0 && bogus) { 5468 VG_(printf)("bogus: "); 5469 ppIRStmt(st); 5470 VG_(printf)("\n"); 5471 } 5472 } 5473 5474 } 5475 5476 mce.bogusLiterals = bogus; 5477 5478 /* Copy verbatim any IR preamble preceding the first IMark */ 5479 5480 tl_assert(mce.sb == sb_out); 5481 tl_assert(mce.sb != sb_in); 5482 5483 i = 0; 5484 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) { 5485 5486 st = sb_in->stmts[i]; 5487 tl_assert(st); 5488 tl_assert(isFlatIRStmt(st)); 5489 5490 stmt( 'C', &mce, sb_in->stmts[i] ); 5491 i++; 5492 } 5493 5494 /* Nasty problem. IR optimisation of the pre-instrumented IR may 5495 cause the IR following the preamble to contain references to IR 5496 temporaries defined in the preamble. Because the preamble isn't 5497 instrumented, these temporaries don't have any shadows. 5498 Nevertheless uses of them following the preamble will cause 5499 memcheck to generate references to their shadows. End effect is 5500 to cause IR sanity check failures, due to references to 5501 non-existent shadows. This is only evident for the complex 5502 preambles used for function wrapping on TOC-afflicted platforms 5503 (ppc64-linux). 5504 5505 The following loop therefore scans the preamble looking for 5506 assignments to temporaries. For each one found it creates an 5507 assignment to the corresponding (V) shadow temp, marking it as 5508 'defined'. This is the same resulting IR as if the main 5509 instrumentation loop before had been applied to the statement 5510 'tmp = CONSTANT'. 5511 5512 Similarly, if origin tracking is enabled, we must generate an 5513 assignment for the corresponding origin (B) shadow, claiming 5514 no-origin, as appropriate for a defined value. 5515 */ 5516 for (j = 0; j < i; j++) { 5517 if (sb_in->stmts[j]->tag == Ist_WrTmp) { 5518 /* findShadowTmpV checks its arg is an original tmp; 5519 no need to assert that here. */ 5520 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp; 5521 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o); 5522 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v); 5523 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) ); 5524 if (MC_(clo_mc_level) == 3) { 5525 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o); 5526 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32); 5527 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); 5528 } 5529 if (0) { 5530 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j); 5531 ppIRType( ty_v ); 5532 VG_(printf)("\n"); 5533 } 5534 } 5535 } 5536 5537 /* Iterate over the remaining stmts to generate instrumentation. */ 5538 5539 tl_assert(sb_in->stmts_used > 0); 5540 tl_assert(i >= 0); 5541 tl_assert(i < sb_in->stmts_used); 5542 tl_assert(sb_in->stmts[i]->tag == Ist_IMark); 5543 5544 for (/* use current i*/; i < sb_in->stmts_used; i++) { 5545 5546 st = sb_in->stmts[i]; 5547 first_stmt = sb_out->stmts_used; 5548 5549 if (verboze) { 5550 VG_(printf)("\n"); 5551 ppIRStmt(st); 5552 VG_(printf)("\n"); 5553 } 5554 5555 if (MC_(clo_mc_level) == 3) { 5556 /* See comments on case Ist_CAS below. */ 5557 if (st->tag != Ist_CAS) 5558 schemeS( &mce, st ); 5559 } 5560 5561 /* Generate instrumentation code for each stmt ... */ 5562 5563 switch (st->tag) { 5564 5565 case Ist_WrTmp: 5566 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp), 5567 expr2vbits( &mce, st->Ist.WrTmp.data) ); 5568 break; 5569 5570 case Ist_Put: 5571 do_shadow_PUT( &mce, 5572 st->Ist.Put.offset, 5573 st->Ist.Put.data, 5574 NULL /* shadow atom */, NULL /* guard */ ); 5575 break; 5576 5577 case Ist_PutI: 5578 do_shadow_PUTI( &mce, st->Ist.PutI.details); 5579 break; 5580 5581 case Ist_Store: 5582 do_shadow_Store( &mce, st->Ist.Store.end, 5583 st->Ist.Store.addr, 0/* addr bias */, 5584 st->Ist.Store.data, 5585 NULL /* shadow data */, 5586 NULL/*guard*/ ); 5587 break; 5588 5589 case Ist_Exit: 5590 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL ); 5591 break; 5592 5593 case Ist_IMark: 5594 break; 5595 5596 case Ist_NoOp: 5597 case Ist_MBE: 5598 break; 5599 5600 case Ist_Dirty: 5601 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 5602 break; 5603 5604 case Ist_AbiHint: 5605 do_AbiHint( &mce, st->Ist.AbiHint.base, 5606 st->Ist.AbiHint.len, 5607 st->Ist.AbiHint.nia ); 5608 break; 5609 5610 case Ist_CAS: 5611 do_shadow_CAS( &mce, st->Ist.CAS.details ); 5612 /* Note, do_shadow_CAS copies the CAS itself to the output 5613 block, because it needs to add instrumentation both 5614 before and after it. Hence skip the copy below. Also 5615 skip the origin-tracking stuff (call to schemeS) above, 5616 since that's all tangled up with it too; do_shadow_CAS 5617 does it all. */ 5618 break; 5619 5620 case Ist_LLSC: 5621 do_shadow_LLSC( &mce, 5622 st->Ist.LLSC.end, 5623 st->Ist.LLSC.result, 5624 st->Ist.LLSC.addr, 5625 st->Ist.LLSC.storedata ); 5626 break; 5627 5628 default: 5629 VG_(printf)("\n"); 5630 ppIRStmt(st); 5631 VG_(printf)("\n"); 5632 VG_(tool_panic)("memcheck: unhandled IRStmt"); 5633 5634 } /* switch (st->tag) */ 5635 5636 if (0 && verboze) { 5637 for (j = first_stmt; j < sb_out->stmts_used; j++) { 5638 VG_(printf)(" "); 5639 ppIRStmt(sb_out->stmts[j]); 5640 VG_(printf)("\n"); 5641 } 5642 VG_(printf)("\n"); 5643 } 5644 5645 /* ... and finally copy the stmt itself to the output. Except, 5646 skip the copy of IRCASs; see comments on case Ist_CAS 5647 above. */ 5648 if (st->tag != Ist_CAS) 5649 stmt('C', &mce, st); 5650 } 5651 5652 /* Now we need to complain if the jump target is undefined. */ 5653 first_stmt = sb_out->stmts_used; 5654 5655 if (verboze) { 5656 VG_(printf)("sb_in->next = "); 5657 ppIRExpr(sb_in->next); 5658 VG_(printf)("\n\n"); 5659 } 5660 5661 complainIfUndefined( &mce, sb_in->next, NULL ); 5662 5663 if (0 && verboze) { 5664 for (j = first_stmt; j < sb_out->stmts_used; j++) { 5665 VG_(printf)(" "); 5666 ppIRStmt(sb_out->stmts[j]); 5667 VG_(printf)("\n"); 5668 } 5669 VG_(printf)("\n"); 5670 } 5671 5672 /* If this fails, there's been some serious snafu with tmp management, 5673 that should be investigated. */ 5674 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used ); 5675 VG_(deleteXA)( mce.tmpMap ); 5676 5677 tl_assert(mce.sb == sb_out); 5678 return sb_out; 5679} 5680 5681/*------------------------------------------------------------*/ 5682/*--- Post-tree-build final tidying ---*/ 5683/*------------------------------------------------------------*/ 5684 5685/* This exploits the observation that Memcheck often produces 5686 repeated conditional calls of the form 5687 5688 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag) 5689 5690 with the same guard expression G guarding the same helper call. 5691 The second and subsequent calls are redundant. This usually 5692 results from instrumentation of guest code containing multiple 5693 memory references at different constant offsets from the same base 5694 register. After optimisation of the instrumentation, you get a 5695 test for the definedness of the base register for each memory 5696 reference, which is kinda pointless. MC_(final_tidy) therefore 5697 looks for such repeated calls and removes all but the first. */ 5698 5699/* A struct for recording which (helper, guard) pairs we have already 5700 seen. */ 5701typedef 5702 struct { void* entry; IRExpr* guard; } 5703 Pair; 5704 5705/* Return True if e1 and e2 definitely denote the same value (used to 5706 compare guards). Return False if unknown; False is the safe 5707 answer. Since guest registers and guest memory do not have the 5708 SSA property we must return False if any Gets or Loads appear in 5709 the expression. */ 5710 5711static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) 5712{ 5713 if (e1->tag != e2->tag) 5714 return False; 5715 switch (e1->tag) { 5716 case Iex_Const: 5717 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con ); 5718 case Iex_Binop: 5719 return e1->Iex.Binop.op == e2->Iex.Binop.op 5720 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1) 5721 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2); 5722 case Iex_Unop: 5723 return e1->Iex.Unop.op == e2->Iex.Unop.op 5724 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg); 5725 case Iex_RdTmp: 5726 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp; 5727 case Iex_Mux0X: 5728 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond ) 5729 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 ) 5730 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX ); 5731 case Iex_Qop: 5732 case Iex_Triop: 5733 case Iex_CCall: 5734 /* be lazy. Could define equality for these, but they never 5735 appear to be used. */ 5736 return False; 5737 case Iex_Get: 5738 case Iex_GetI: 5739 case Iex_Load: 5740 /* be conservative - these may not give the same value each 5741 time */ 5742 return False; 5743 case Iex_Binder: 5744 /* should never see this */ 5745 /* fallthrough */ 5746 default: 5747 VG_(printf)("mc_translate.c: sameIRValue: unhandled: "); 5748 ppIRExpr(e1); 5749 VG_(tool_panic)("memcheck:sameIRValue"); 5750 return False; 5751 } 5752} 5753 5754/* See if 'pairs' already has an entry for (entry, guard). Return 5755 True if so. If not, add an entry. */ 5756 5757static 5758Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry ) 5759{ 5760 Pair p; 5761 Pair* pp; 5762 Int i, n = VG_(sizeXA)( pairs ); 5763 for (i = 0; i < n; i++) { 5764 pp = VG_(indexXA)( pairs, i ); 5765 if (pp->entry == entry && sameIRValue(pp->guard, guard)) 5766 return True; 5767 } 5768 p.guard = guard; 5769 p.entry = entry; 5770 VG_(addToXA)( pairs, &p ); 5771 return False; 5772} 5773 5774static Bool is_helperc_value_checkN_fail ( const HChar* name ) 5775{ 5776 return 5777 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)") 5778 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)") 5779 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)") 5780 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)") 5781 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)") 5782 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)") 5783 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)") 5784 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)"); 5785} 5786 5787IRSB* MC_(final_tidy) ( IRSB* sb_in ) 5788{ 5789 Int i; 5790 IRStmt* st; 5791 IRDirty* di; 5792 IRExpr* guard; 5793 IRCallee* cee; 5794 Bool alreadyPresent; 5795 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1", 5796 VG_(free), sizeof(Pair) ); 5797 /* Scan forwards through the statements. Each time a call to one 5798 of the relevant helpers is seen, check if we have made a 5799 previous call to the same helper using the same guard 5800 expression, and if so, delete the call. */ 5801 for (i = 0; i < sb_in->stmts_used; i++) { 5802 st = sb_in->stmts[i]; 5803 tl_assert(st); 5804 if (st->tag != Ist_Dirty) 5805 continue; 5806 di = st->Ist.Dirty.details; 5807 guard = di->guard; 5808 if (!guard) 5809 continue; 5810 if (0) { ppIRExpr(guard); VG_(printf)("\n"); } 5811 cee = di->cee; 5812 if (!is_helperc_value_checkN_fail( cee->name )) 5813 continue; 5814 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with 5815 guard 'guard'. Check if we have already seen a call to this 5816 function with the same guard. If so, delete it. If not, 5817 add it to the set of calls we do know about. */ 5818 alreadyPresent = check_or_add( pairs, guard, cee->addr ); 5819 if (alreadyPresent) { 5820 sb_in->stmts[i] = IRStmt_NoOp(); 5821 if (0) VG_(printf)("XX\n"); 5822 } 5823 } 5824 VG_(deleteXA)( pairs ); 5825 return sb_in; 5826} 5827 5828 5829/*------------------------------------------------------------*/ 5830/*--- Origin tracking stuff ---*/ 5831/*------------------------------------------------------------*/ 5832 5833/* Almost identical to findShadowTmpV. */ 5834static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig ) 5835{ 5836 TempMapEnt* ent; 5837 /* VG_(indexXA) range-checks 'orig', hence no need to check 5838 here. */ 5839 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5840 tl_assert(ent->kind == Orig); 5841 if (ent->shadowB == IRTemp_INVALID) { 5842 IRTemp tmpB 5843 = newTemp( mce, Ity_I32, BSh ); 5844 /* newTemp may cause mce->tmpMap to resize, hence previous results 5845 from VG_(indexXA) are invalid. */ 5846 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5847 tl_assert(ent->kind == Orig); 5848 tl_assert(ent->shadowB == IRTemp_INVALID); 5849 ent->shadowB = tmpB; 5850 } 5851 return ent->shadowB; 5852} 5853 5854static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ) 5855{ 5856 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) ); 5857} 5858 5859static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 5860 IRAtom* baseaddr, Int offset ) 5861{ 5862 void* hFun; 5863 const HChar* hName; 5864 IRTemp bTmp; 5865 IRDirty* di; 5866 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5867 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5868 IRAtom* ea = baseaddr; 5869 if (offset != 0) { 5870 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5871 : mkU64( (Long)(Int)offset ); 5872 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5873 } 5874 bTmp = newTemp(mce, mce->hWordTy, BSh); 5875 5876 switch (szB) { 5877 case 1: hFun = (void*)&MC_(helperc_b_load1); 5878 hName = "MC_(helperc_b_load1)"; 5879 break; 5880 case 2: hFun = (void*)&MC_(helperc_b_load2); 5881 hName = "MC_(helperc_b_load2)"; 5882 break; 5883 case 4: hFun = (void*)&MC_(helperc_b_load4); 5884 hName = "MC_(helperc_b_load4)"; 5885 break; 5886 case 8: hFun = (void*)&MC_(helperc_b_load8); 5887 hName = "MC_(helperc_b_load8)"; 5888 break; 5889 case 16: hFun = (void*)&MC_(helperc_b_load16); 5890 hName = "MC_(helperc_b_load16)"; 5891 break; 5892 case 32: hFun = (void*)&MC_(helperc_b_load32); 5893 hName = "MC_(helperc_b_load32)"; 5894 break; 5895 default: 5896 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB); 5897 tl_assert(0); 5898 } 5899 di = unsafeIRDirty_1_N( 5900 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ), 5901 mkIRExprVec_1( ea ) 5902 ); 5903 /* no need to mess with any annotations. This call accesses 5904 neither guest state nor guest memory. */ 5905 stmt( 'B', mce, IRStmt_Dirty(di) ); 5906 if (mce->hWordTy == Ity_I64) { 5907 /* 64-bit host */ 5908 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh); 5909 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) ); 5910 return mkexpr(bTmp32); 5911 } else { 5912 /* 32-bit host */ 5913 return mkexpr(bTmp); 5914 } 5915} 5916 5917static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr, 5918 Int offset, IRAtom* guard ) 5919{ 5920 if (guard) { 5921 IRAtom *cond, *iffalse, *iftrue; 5922 5923 cond = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard)); 5924 iftrue = assignNew('B', mce, Ity_I32, 5925 gen_load_b(mce, szB, baseaddr, offset)); 5926 iffalse = mkU32(0); 5927 5928 return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue)); 5929 } 5930 5931 return gen_load_b(mce, szB, baseaddr, offset); 5932} 5933 5934/* Generate a shadow store. guard :: Ity_I1 controls whether the 5935 store really happens; NULL means it unconditionally does. */ 5936static void gen_store_b ( MCEnv* mce, Int szB, 5937 IRAtom* baseaddr, Int offset, IRAtom* dataB, 5938 IRAtom* guard ) 5939{ 5940 void* hFun; 5941 const HChar* hName; 5942 IRDirty* di; 5943 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5944 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5945 IRAtom* ea = baseaddr; 5946 if (guard) { 5947 tl_assert(isOriginalAtom(mce, guard)); 5948 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 5949 } 5950 if (offset != 0) { 5951 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5952 : mkU64( (Long)(Int)offset ); 5953 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5954 } 5955 if (mce->hWordTy == Ity_I64) 5956 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB)); 5957 5958 switch (szB) { 5959 case 1: hFun = (void*)&MC_(helperc_b_store1); 5960 hName = "MC_(helperc_b_store1)"; 5961 break; 5962 case 2: hFun = (void*)&MC_(helperc_b_store2); 5963 hName = "MC_(helperc_b_store2)"; 5964 break; 5965 case 4: hFun = (void*)&MC_(helperc_b_store4); 5966 hName = "MC_(helperc_b_store4)"; 5967 break; 5968 case 8: hFun = (void*)&MC_(helperc_b_store8); 5969 hName = "MC_(helperc_b_store8)"; 5970 break; 5971 case 16: hFun = (void*)&MC_(helperc_b_store16); 5972 hName = "MC_(helperc_b_store16)"; 5973 break; 5974 case 32: hFun = (void*)&MC_(helperc_b_store32); 5975 hName = "MC_(helperc_b_store32)"; 5976 break; 5977 default: 5978 tl_assert(0); 5979 } 5980 di = unsafeIRDirty_0_N( 2/*regparms*/, 5981 hName, VG_(fnptr_to_fnentry)( hFun ), 5982 mkIRExprVec_2( ea, dataB ) 5983 ); 5984 /* no need to mess with any annotations. This call accesses 5985 neither guest state nor guest memory. */ 5986 if (guard) di->guard = guard; 5987 stmt( 'B', mce, IRStmt_Dirty(di) ); 5988} 5989 5990static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { 5991 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 5992 if (eTy == Ity_I64) 5993 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) ); 5994 if (eTy == Ity_I32) 5995 return e; 5996 tl_assert(0); 5997} 5998 5999static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) { 6000 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 6001 tl_assert(eTy == Ity_I32); 6002 if (dstTy == Ity_I64) 6003 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) ); 6004 tl_assert(0); 6005} 6006 6007 6008static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) 6009{ 6010 tl_assert(MC_(clo_mc_level) == 3); 6011 6012 switch (e->tag) { 6013 6014 case Iex_GetI: { 6015 IRRegArray* descr_b; 6016 IRAtom *t1, *t2, *t3, *t4; 6017 IRRegArray* descr = e->Iex.GetI.descr; 6018 IRType equivIntTy 6019 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 6020 /* If this array is unshadowable for whatever reason, use the 6021 usual approximation. */ 6022 if (equivIntTy == Ity_INVALID) 6023 return mkU32(0); 6024 tl_assert(sizeofIRType(equivIntTy) >= 4); 6025 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 6026 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 6027 equivIntTy, descr->nElems ); 6028 /* Do a shadow indexed get of the same size, giving t1. Take 6029 the bottom 32 bits of it, giving t2. Compute into t3 the 6030 origin for the index (almost certainly zero, but there's 6031 no harm in being completely general here, since iropt will 6032 remove any useless code), and fold it in, giving a final 6033 value t4. */ 6034 t1 = assignNew( 'B', mce, equivIntTy, 6035 IRExpr_GetI( descr_b, e->Iex.GetI.ix, 6036 e->Iex.GetI.bias )); 6037 t2 = narrowTo32( mce, t1 ); 6038 t3 = schemeE( mce, e->Iex.GetI.ix ); 6039 t4 = gen_maxU32( mce, t2, t3 ); 6040 return t4; 6041 } 6042 case Iex_CCall: { 6043 Int i; 6044 IRAtom* here; 6045 IRExpr** args = e->Iex.CCall.args; 6046 IRAtom* curr = mkU32(0); 6047 for (i = 0; args[i]; i++) { 6048 tl_assert(i < 32); 6049 tl_assert(isOriginalAtom(mce, args[i])); 6050 /* Only take notice of this arg if the callee's 6051 mc-exclusion mask does not say it is to be excluded. */ 6052 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) { 6053 /* the arg is to be excluded from definedness checking. 6054 Do nothing. */ 6055 if (0) VG_(printf)("excluding %s(%d)\n", 6056 e->Iex.CCall.cee->name, i); 6057 } else { 6058 /* calculate the arg's definedness, and pessimistically 6059 merge it in. */ 6060 here = schemeE( mce, args[i] ); 6061 curr = gen_maxU32( mce, curr, here ); 6062 } 6063 } 6064 return curr; 6065 } 6066 case Iex_Load: { 6067 Int dszB; 6068 dszB = sizeofIRType(e->Iex.Load.ty); 6069 /* assert that the B value for the address is already 6070 available (somewhere) */ 6071 tl_assert(isIRAtom(e->Iex.Load.addr)); 6072 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64); 6073 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 ); 6074 } 6075 case Iex_Mux0X: { 6076 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond ); 6077 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 ); 6078 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX ); 6079 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 )); 6080 } 6081 case Iex_Qop: { 6082 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 ); 6083 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 ); 6084 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 ); 6085 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 ); 6086 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ), 6087 gen_maxU32( mce, b3, b4 ) ); 6088 } 6089 case Iex_Triop: { 6090 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 ); 6091 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 ); 6092 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 ); 6093 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) ); 6094 } 6095 case Iex_Binop: { 6096 switch (e->Iex.Binop.op) { 6097 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 6098 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 6099 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 6100 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 6101 /* Just say these all produce a defined result, 6102 regardless of their arguments. See 6103 COMMENT_ON_CasCmpEQ in this file. */ 6104 return mkU32(0); 6105 default: { 6106 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 ); 6107 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 ); 6108 return gen_maxU32( mce, b1, b2 ); 6109 } 6110 } 6111 tl_assert(0); 6112 /*NOTREACHED*/ 6113 } 6114 case Iex_Unop: { 6115 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg ); 6116 return b1; 6117 } 6118 case Iex_Const: 6119 return mkU32(0); 6120 case Iex_RdTmp: 6121 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp )); 6122 case Iex_Get: { 6123 Int b_offset = MC_(get_otrack_shadow_offset)( 6124 e->Iex.Get.offset, 6125 sizeofIRType(e->Iex.Get.ty) 6126 ); 6127 tl_assert(b_offset >= -1 6128 && b_offset <= mce->layout->total_sizeB -4); 6129 if (b_offset >= 0) { 6130 /* FIXME: this isn't an atom! */ 6131 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB, 6132 Ity_I32 ); 6133 } 6134 return mkU32(0); 6135 } 6136 default: 6137 VG_(printf)("mc_translate.c: schemeE: unhandled: "); 6138 ppIRExpr(e); 6139 VG_(tool_panic)("memcheck:schemeE"); 6140 } 6141} 6142 6143 6144static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) 6145{ 6146 // This is a hacked version of do_shadow_Dirty 6147 Int i, k, n, toDo, gSz, gOff; 6148 IRAtom *here, *curr; 6149 IRTemp dst; 6150 6151 /* First check the guard. */ 6152 curr = schemeE( mce, d->guard ); 6153 6154 /* Now round up all inputs and maxU32 over them. */ 6155 6156 /* Inputs: unmasked args 6157 Note: arguments are evaluated REGARDLESS of the guard expression */ 6158 for (i = 0; d->args[i]; i++) { 6159 if (d->cee->mcx_mask & (1<<i)) { 6160 /* ignore this arg */ 6161 } else { 6162 here = schemeE( mce, d->args[i] ); 6163 curr = gen_maxU32( mce, curr, here ); 6164 } 6165 } 6166 6167 /* Inputs: guest state that we read. */ 6168 for (i = 0; i < d->nFxState; i++) { 6169 tl_assert(d->fxState[i].fx != Ifx_None); 6170 if (d->fxState[i].fx == Ifx_Write) 6171 continue; 6172 6173 /* Enumerate the described state segments */ 6174 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 6175 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 6176 gSz = d->fxState[i].size; 6177 6178 /* Ignore any sections marked as 'always defined'. */ 6179 if (isAlwaysDefd(mce, gOff, gSz)) { 6180 if (0) 6181 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 6182 gOff, gSz); 6183 continue; 6184 } 6185 6186 /* This state element is read or modified. So we need to 6187 consider it. If larger than 4 bytes, deal with it in 6188 4-byte chunks. */ 6189 while (True) { 6190 Int b_offset; 6191 tl_assert(gSz >= 0); 6192 if (gSz == 0) break; 6193 n = gSz <= 4 ? gSz : 4; 6194 /* update 'curr' with maxU32 of the state slice 6195 gOff .. gOff+n-1 */ 6196 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 6197 if (b_offset != -1) { 6198 /* Observe the guard expression. If it is false use 0, i.e. 6199 nothing is known about the origin */ 6200 IRAtom *cond, *iffalse, *iftrue; 6201 6202 cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard)); 6203 iffalse = mkU32(0); 6204 iftrue = assignNew( 'B', mce, Ity_I32, 6205 IRExpr_Get(b_offset 6206 + 2*mce->layout->total_sizeB, 6207 Ity_I32)); 6208 here = assignNew( 'B', mce, Ity_I32, 6209 IRExpr_Mux0X(cond, iffalse, iftrue)); 6210 curr = gen_maxU32( mce, curr, here ); 6211 } 6212 gSz -= n; 6213 gOff += n; 6214 } 6215 } 6216 } 6217 6218 /* Inputs: memory */ 6219 6220 if (d->mFx != Ifx_None) { 6221 /* Because we may do multiple shadow loads/stores from the same 6222 base address, it's best to do a single test of its 6223 definedness right now. Post-instrumentation optimisation 6224 should remove all but this test. */ 6225 tl_assert(d->mAddr); 6226 here = schemeE( mce, d->mAddr ); 6227 curr = gen_maxU32( mce, curr, here ); 6228 } 6229 6230 /* Deal with memory inputs (reads or modifies) */ 6231 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 6232 toDo = d->mSize; 6233 /* chew off 32-bit chunks. We don't care about the endianness 6234 since it's all going to be condensed down to a single bit, 6235 but nevertheless choose an endianness which is hopefully 6236 native to the platform. */ 6237 while (toDo >= 4) { 6238 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo, 6239 d->guard ); 6240 curr = gen_maxU32( mce, curr, here ); 6241 toDo -= 4; 6242 } 6243 /* handle possible 16-bit excess */ 6244 while (toDo >= 2) { 6245 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo, 6246 d->guard ); 6247 curr = gen_maxU32( mce, curr, here ); 6248 toDo -= 2; 6249 } 6250 /* chew off the remaining 8-bit chunk, if any */ 6251 if (toDo == 1) { 6252 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo, 6253 d->guard ); 6254 curr = gen_maxU32( mce, curr, here ); 6255 toDo -= 1; 6256 } 6257 tl_assert(toDo == 0); 6258 } 6259 6260 /* Whew! So curr is a 32-bit B-value which should give an origin 6261 of some use if any of the inputs to the helper are undefined. 6262 Now we need to re-distribute the results to all destinations. */ 6263 6264 /* Outputs: the destination temporary, if there is one. */ 6265 if (d->tmp != IRTemp_INVALID) { 6266 dst = findShadowTmpB(mce, d->tmp); 6267 assign( 'V', mce, dst, curr ); 6268 } 6269 6270 /* Outputs: guest state that we write or modify. */ 6271 for (i = 0; i < d->nFxState; i++) { 6272 tl_assert(d->fxState[i].fx != Ifx_None); 6273 if (d->fxState[i].fx == Ifx_Read) 6274 continue; 6275 6276 /* Enumerate the described state segments */ 6277 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 6278 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 6279 gSz = d->fxState[i].size; 6280 6281 /* Ignore any sections marked as 'always defined'. */ 6282 if (isAlwaysDefd(mce, gOff, gSz)) 6283 continue; 6284 6285 /* This state element is written or modified. So we need to 6286 consider it. If larger than 4 bytes, deal with it in 6287 4-byte chunks. */ 6288 while (True) { 6289 Int b_offset; 6290 tl_assert(gSz >= 0); 6291 if (gSz == 0) break; 6292 n = gSz <= 4 ? gSz : 4; 6293 /* Write 'curr' to the state slice gOff .. gOff+n-1 */ 6294 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 6295 if (b_offset != -1) { 6296 if (d->guard) { 6297 /* If the guard expression evaluates to false we simply Put 6298 the value that is already stored in the guest state slot */ 6299 IRAtom *cond, *iffalse; 6300 6301 cond = assignNew('B', mce, Ity_I8, 6302 unop(Iop_1Uto8, d->guard)); 6303 iffalse = assignNew('B', mce, Ity_I32, 6304 IRExpr_Get(b_offset + 6305 2*mce->layout->total_sizeB, 6306 Ity_I32)); 6307 curr = assignNew('V', mce, Ity_I32, 6308 IRExpr_Mux0X(cond, iffalse, curr)); 6309 } 6310 stmt( 'B', mce, IRStmt_Put(b_offset 6311 + 2*mce->layout->total_sizeB, 6312 curr )); 6313 } 6314 gSz -= n; 6315 gOff += n; 6316 } 6317 } 6318 } 6319 6320 /* Outputs: memory that we write or modify. Same comments about 6321 endianness as above apply. */ 6322 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 6323 toDo = d->mSize; 6324 /* chew off 32-bit chunks */ 6325 while (toDo >= 4) { 6326 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr, 6327 d->guard ); 6328 toDo -= 4; 6329 } 6330 /* handle possible 16-bit excess */ 6331 while (toDo >= 2) { 6332 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr, 6333 d->guard ); 6334 toDo -= 2; 6335 } 6336 /* chew off the remaining 8-bit chunk, if any */ 6337 if (toDo == 1) { 6338 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr, 6339 d->guard ); 6340 toDo -= 1; 6341 } 6342 tl_assert(toDo == 0); 6343 } 6344} 6345 6346 6347static void do_origins_Store ( MCEnv* mce, 6348 IREndness stEnd, 6349 IRExpr* stAddr, 6350 IRExpr* stData ) 6351{ 6352 Int dszB; 6353 IRAtom* dataB; 6354 /* assert that the B value for the address is already available 6355 (somewhere), since the call to schemeE will want to see it. 6356 XXXX how does this actually ensure that?? */ 6357 tl_assert(isIRAtom(stAddr)); 6358 tl_assert(isIRAtom(stData)); 6359 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) ); 6360 dataB = schemeE( mce, stData ); 6361 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, 6362 NULL/*guard*/ ); 6363} 6364 6365 6366static void schemeS ( MCEnv* mce, IRStmt* st ) 6367{ 6368 tl_assert(MC_(clo_mc_level) == 3); 6369 6370 switch (st->tag) { 6371 6372 case Ist_AbiHint: 6373 /* The value-check instrumenter handles this - by arranging 6374 to pass the address of the next instruction to 6375 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to 6376 happen for origin tracking w.r.t. AbiHints. So there is 6377 nothing to do here. */ 6378 break; 6379 6380 case Ist_PutI: { 6381 IRPutI *puti = st->Ist.PutI.details; 6382 IRRegArray* descr_b; 6383 IRAtom *t1, *t2, *t3, *t4; 6384 IRRegArray* descr = puti->descr; 6385 IRType equivIntTy 6386 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 6387 /* If this array is unshadowable for whatever reason, 6388 generate no code. */ 6389 if (equivIntTy == Ity_INVALID) 6390 break; 6391 tl_assert(sizeofIRType(equivIntTy) >= 4); 6392 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 6393 descr_b 6394 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 6395 equivIntTy, descr->nElems ); 6396 /* Compute a value to Put - the conjoinment of the origin for 6397 the data to be Put-ted (obviously) and of the index value 6398 (not so obviously). */ 6399 t1 = schemeE( mce, puti->data ); 6400 t2 = schemeE( mce, puti->ix ); 6401 t3 = gen_maxU32( mce, t1, t2 ); 6402 t4 = zWidenFrom32( mce, equivIntTy, t3 ); 6403 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix, 6404 puti->bias, t4) )); 6405 break; 6406 } 6407 6408 case Ist_Dirty: 6409 do_origins_Dirty( mce, st->Ist.Dirty.details ); 6410 break; 6411 6412 case Ist_Store: 6413 do_origins_Store( mce, st->Ist.Store.end, 6414 st->Ist.Store.addr, 6415 st->Ist.Store.data ); 6416 break; 6417 6418 case Ist_LLSC: { 6419 /* In short: treat a load-linked like a normal load followed 6420 by an assignment of the loaded (shadow) data the result 6421 temporary. Treat a store-conditional like a normal store, 6422 and mark the result temporary as defined. */ 6423 if (st->Ist.LLSC.storedata == NULL) { 6424 /* Load Linked */ 6425 IRType resTy 6426 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); 6427 IRExpr* vanillaLoad 6428 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); 6429 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 6430 || resTy == Ity_I16 || resTy == Ity_I8); 6431 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 6432 schemeE(mce, vanillaLoad)); 6433 } else { 6434 /* Store conditional */ 6435 do_origins_Store( mce, st->Ist.LLSC.end, 6436 st->Ist.LLSC.addr, 6437 st->Ist.LLSC.storedata ); 6438 /* For the rationale behind this, see comments at the 6439 place where the V-shadow for .result is constructed, in 6440 do_shadow_LLSC. In short, we regard .result as 6441 always-defined. */ 6442 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 6443 mkU32(0) ); 6444 } 6445 break; 6446 } 6447 6448 case Ist_Put: { 6449 Int b_offset 6450 = MC_(get_otrack_shadow_offset)( 6451 st->Ist.Put.offset, 6452 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data)) 6453 ); 6454 if (b_offset >= 0) { 6455 /* FIXME: this isn't an atom! */ 6456 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 6457 schemeE( mce, st->Ist.Put.data )) ); 6458 } 6459 break; 6460 } 6461 6462 case Ist_WrTmp: 6463 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp), 6464 schemeE(mce, st->Ist.WrTmp.data) ); 6465 break; 6466 6467 case Ist_MBE: 6468 case Ist_NoOp: 6469 case Ist_Exit: 6470 case Ist_IMark: 6471 break; 6472 6473 default: 6474 VG_(printf)("mc_translate.c: schemeS: unhandled: "); 6475 ppIRStmt(st); 6476 VG_(tool_panic)("memcheck:schemeS"); 6477 } 6478} 6479 6480 6481/*--------------------------------------------------------------------*/ 6482/*--- end mc_translate.c ---*/ 6483/*--------------------------------------------------------------------*/ 6484