mc_translate.c revision 7eb17a827d03c3f7c83f75b6634ea15cbdfede5a
1 2/*--------------------------------------------------------------------*/ 3/*--- Instrument IR to perform memory checking operations. ---*/ 4/*--- mc_translate.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of MemCheck, a heavyweight Valgrind tool for 9 detecting memory errors. 10 11 Copyright (C) 2000-2012 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_tool_basics.h" 33#include "pub_tool_poolalloc.h" // For mc_include.h 34#include "pub_tool_hashtable.h" // For mc_include.h 35#include "pub_tool_libcassert.h" 36#include "pub_tool_libcprint.h" 37#include "pub_tool_tooliface.h" 38#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 39#include "pub_tool_xarray.h" 40#include "pub_tool_mallocfree.h" 41#include "pub_tool_libcbase.h" 42 43#include "mc_include.h" 44 45 46/* FIXMEs JRS 2011-June-16. 47 48 Check the interpretation for vector narrowing and widening ops, 49 particularly the saturating ones. I suspect they are either overly 50 pessimistic and/or wrong. 51*/ 52 53/* This file implements the Memcheck instrumentation, and in 54 particular contains the core of its undefined value detection 55 machinery. For a comprehensive background of the terminology, 56 algorithms and rationale used herein, read: 57 58 Using Valgrind to detect undefined value errors with 59 bit-precision 60 61 Julian Seward and Nicholas Nethercote 62 63 2005 USENIX Annual Technical Conference (General Track), 64 Anaheim, CA, USA, April 10-15, 2005. 65 66 ---- 67 68 Here is as good a place as any to record exactly when V bits are and 69 should be checked, why, and what function is responsible. 70 71 72 Memcheck complains when an undefined value is used: 73 74 1. In the condition of a conditional branch. Because it could cause 75 incorrect control flow, and thus cause incorrect externally-visible 76 behaviour. [mc_translate.c:complainIfUndefined] 77 78 2. As an argument to a system call, or as the value that specifies 79 the system call number. Because it could cause an incorrect 80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read] 81 82 3. As the address in a load or store. Because it could cause an 83 incorrect value to be used later, which could cause externally-visible 84 behaviour (eg. via incorrect control flow or an incorrect system call 85 argument) [complainIfUndefined] 86 87 4. As the target address of a branch. Because it could cause incorrect 88 control flow. [complainIfUndefined] 89 90 5. As an argument to setenv, unsetenv, or putenv. Because it could put 91 an incorrect value into the external environment. 92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)] 93 94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn). 95 [complainIfUndefined] 96 97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and 98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user 99 requested it. [in memcheck.h] 100 101 102 Memcheck also complains, but should not, when an undefined value is used: 103 104 8. As the shift value in certain SIMD shift operations (but not in the 105 standard integer shift operations). This inconsistency is due to 106 historical reasons.) [complainIfUndefined] 107 108 109 Memcheck does not complain, but should, when an undefined value is used: 110 111 9. As an input to a client request. Because the client request may 112 affect the visible behaviour -- see bug #144362 for an example 113 involving the malloc replacements in vg_replace_malloc.c and 114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument 115 isn't identified. That bug report also has some info on how to solve 116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST] 117 118 119 In practice, 1 and 2 account for the vast majority of cases. 120*/ 121 122/*------------------------------------------------------------*/ 123/*--- Forward decls ---*/ 124/*------------------------------------------------------------*/ 125 126struct _MCEnv; 127 128static IRType shadowTypeV ( IRType ty ); 129static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 130static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig ); 131 132static IRExpr *i128_const_zero(void); 133 134/*------------------------------------------------------------*/ 135/*--- Memcheck running state, and tmp management. ---*/ 136/*------------------------------------------------------------*/ 137 138/* Carries info about a particular tmp. The tmp's number is not 139 recorded, as this is implied by (equal to) its index in the tmpMap 140 in MCEnv. The tmp's type is also not recorded, as this is present 141 in MCEnv.sb->tyenv. 142 143 When .kind is Orig, .shadowV and .shadowB may give the identities 144 of the temps currently holding the associated definedness (shadowV) 145 and origin (shadowB) values, or these may be IRTemp_INVALID if code 146 to compute such values has not yet been emitted. 147 148 When .kind is VSh or BSh then the tmp is holds a V- or B- value, 149 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is 150 illogical for a shadow tmp itself to be shadowed. 151*/ 152typedef 153 enum { Orig=1, VSh=2, BSh=3 } 154 TempKind; 155 156typedef 157 struct { 158 TempKind kind; 159 IRTemp shadowV; 160 IRTemp shadowB; 161 } 162 TempMapEnt; 163 164 165/* Carries around state during memcheck instrumentation. */ 166typedef 167 struct _MCEnv { 168 /* MODIFIED: the superblock being constructed. IRStmts are 169 added. */ 170 IRSB* sb; 171 Bool trace; 172 173 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the 174 current kind and possibly shadow temps for each temp in the 175 IRSB being constructed. Note that it does not contain the 176 type of each tmp. If you want to know the type, look at the 177 relevant entry in sb->tyenv. It follows that at all times 178 during the instrumentation process, the valid indices for 179 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is 180 total number of Orig, V- and B- temps allocated so far. 181 182 The reason for this strange split (types in one place, all 183 other info in another) is that we need the types to be 184 attached to sb so as to make it possible to do 185 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the 186 instrumentation process. */ 187 XArray* /* of TempMapEnt */ tmpMap; 188 189 /* MODIFIED: indicates whether "bogus" literals have so far been 190 found. Starts off False, and may change to True. */ 191 Bool bogusLiterals; 192 193 /* READONLY: indicates whether we should use expensive 194 interpretations of integer adds, since unfortunately LLVM 195 uses them to do ORs in some circumstances. Defaulted to True 196 on MacOS and False everywhere else. */ 197 Bool useLLVMworkarounds; 198 199 /* READONLY: the guest layout. This indicates which parts of 200 the guest state should be regarded as 'always defined'. */ 201 VexGuestLayout* layout; 202 203 /* READONLY: the host word type. Needed for constructing 204 arguments of type 'HWord' to be passed to helper functions. 205 Ity_I32 or Ity_I64 only. */ 206 IRType hWordTy; 207 } 208 MCEnv; 209 210/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 211 demand), as they are encountered. This is for two reasons. 212 213 (1) (less important reason): Many original tmps are unused due to 214 initial IR optimisation, and we do not want to spaces in tables 215 tracking them. 216 217 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 218 table indexed [0 .. n_types-1], which gives the current shadow for 219 each original tmp, or INVALID_IRTEMP if none is so far assigned. 220 It is necessary to support making multiple assignments to a shadow 221 -- specifically, after testing a shadow for definedness, it needs 222 to be made defined. But IR's SSA property disallows this. 223 224 (2) (more important reason): Therefore, when a shadow needs to get 225 a new value, a new temporary is created, the value is assigned to 226 that, and the tmpMap is updated to reflect the new binding. 227 228 A corollary is that if the tmpMap maps a given tmp to 229 IRTemp_INVALID and we are hoping to read that shadow tmp, it means 230 there's a read-before-write error in the original tmps. The IR 231 sanity checker should catch all such anomalies, however. 232*/ 233 234/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to 235 both the table in mce->sb and to our auxiliary mapping. Note that 236 newTemp may cause mce->tmpMap to resize, hence previous results 237 from VG_(indexXA)(mce->tmpMap) are invalidated. */ 238static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) 239{ 240 Word newIx; 241 TempMapEnt ent; 242 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty); 243 ent.kind = kind; 244 ent.shadowV = IRTemp_INVALID; 245 ent.shadowB = IRTemp_INVALID; 246 newIx = VG_(addToXA)( mce->tmpMap, &ent ); 247 tl_assert(newIx == (Word)tmp); 248 return tmp; 249} 250 251 252/* Find the tmp currently shadowing the given original tmp. If none 253 so far exists, allocate one. */ 254static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) 255{ 256 TempMapEnt* ent; 257 /* VG_(indexXA) range-checks 'orig', hence no need to check 258 here. */ 259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 260 tl_assert(ent->kind == Orig); 261 if (ent->shadowV == IRTemp_INVALID) { 262 IRTemp tmpV 263 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 264 /* newTemp may cause mce->tmpMap to resize, hence previous results 265 from VG_(indexXA) are invalid. */ 266 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 267 tl_assert(ent->kind == Orig); 268 tl_assert(ent->shadowV == IRTemp_INVALID); 269 ent->shadowV = tmpV; 270 } 271 return ent->shadowV; 272} 273 274/* Allocate a new shadow for the given original tmp. This means any 275 previous shadow is abandoned. This is needed because it is 276 necessary to give a new value to a shadow once it has been tested 277 for undefinedness, but unfortunately IR's SSA property disallows 278 this. Instead we must abandon the old shadow, allocate a new one 279 and use that instead. 280 281 This is the same as findShadowTmpV, except we don't bother to see 282 if a shadow temp already existed -- we simply allocate a new one 283 regardless. */ 284static void newShadowTmpV ( MCEnv* mce, IRTemp orig ) 285{ 286 TempMapEnt* ent; 287 /* VG_(indexXA) range-checks 'orig', hence no need to check 288 here. */ 289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 290 tl_assert(ent->kind == Orig); 291 if (1) { 292 IRTemp tmpV 293 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 294 /* newTemp may cause mce->tmpMap to resize, hence previous results 295 from VG_(indexXA) are invalid. */ 296 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 297 tl_assert(ent->kind == Orig); 298 ent->shadowV = tmpV; 299 } 300} 301 302 303/*------------------------------------------------------------*/ 304/*--- IRAtoms -- a subset of IRExprs ---*/ 305/*------------------------------------------------------------*/ 306 307/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 308 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 309 input, most of this code deals in atoms. Usefully, a value atom 310 always has a V-value which is also an atom: constants are shadowed 311 by constants, and temps are shadowed by the corresponding shadow 312 temporary. */ 313 314typedef IRExpr IRAtom; 315 316/* (used for sanity checks only): is this an atom which looks 317 like it's from original code? */ 318static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 319{ 320 if (a1->tag == Iex_Const) 321 return True; 322 if (a1->tag == Iex_RdTmp) { 323 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 324 return ent->kind == Orig; 325 } 326 return False; 327} 328 329/* (used for sanity checks only): is this an atom which looks 330 like it's from shadow code? */ 331static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 332{ 333 if (a1->tag == Iex_Const) 334 return True; 335 if (a1->tag == Iex_RdTmp) { 336 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 337 return ent->kind == VSh || ent->kind == BSh; 338 } 339 return False; 340} 341 342/* (used for sanity checks only): check that both args are atoms and 343 are identically-kinded. */ 344static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 345{ 346 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp) 347 return True; 348 if (a1->tag == Iex_Const && a2->tag == Iex_Const) 349 return True; 350 return False; 351} 352 353 354/*------------------------------------------------------------*/ 355/*--- Type management ---*/ 356/*------------------------------------------------------------*/ 357 358/* Shadow state is always accessed using integer types. This returns 359 an integer type with the same size (as per sizeofIRType) as the 360 given type. The only valid shadow types are Bit, I8, I16, I32, 361 I64, I128, V128, V256. */ 362 363static IRType shadowTypeV ( IRType ty ) 364{ 365 switch (ty) { 366 case Ity_I1: 367 case Ity_I8: 368 case Ity_I16: 369 case Ity_I32: 370 case Ity_I64: 371 case Ity_I128: return ty; 372 case Ity_F32: return Ity_I32; 373 case Ity_D32: return Ity_I32; 374 case Ity_F64: return Ity_I64; 375 case Ity_D64: return Ity_I64; 376 case Ity_F128: return Ity_I128; 377 case Ity_D128: return Ity_I128; 378 case Ity_V128: return Ity_V128; 379 case Ity_V256: return Ity_V256; 380 default: ppIRType(ty); 381 VG_(tool_panic)("memcheck:shadowTypeV"); 382 } 383} 384 385/* Produce a 'defined' value of the given shadow type. Should only be 386 supplied shadow types (Bit/I8/I16/I32/UI64). */ 387static IRExpr* definedOfType ( IRType ty ) { 388 switch (ty) { 389 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 390 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 391 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 392 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 393 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 394 case Ity_I128: return i128_const_zero(); 395 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 396 default: VG_(tool_panic)("memcheck:definedOfType"); 397 } 398} 399 400 401/*------------------------------------------------------------*/ 402/*--- Constructing IR fragments ---*/ 403/*------------------------------------------------------------*/ 404 405/* add stmt to a bb */ 406static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) { 407 if (mce->trace) { 408 VG_(printf)(" %c: ", cat); 409 ppIRStmt(st); 410 VG_(printf)("\n"); 411 } 412 addStmtToIRSB(mce->sb, st); 413} 414 415/* assign value to tmp */ 416static inline 417void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) { 418 stmt(cat, mce, IRStmt_WrTmp(tmp,expr)); 419} 420 421/* build various kinds of expressions */ 422#define triop(_op, _arg1, _arg2, _arg3) \ 423 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3)) 424#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 425#define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 426#define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 427#define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 428#define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 429#define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 430#define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 431#define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) 432 433/* Bind the given expression to a new temporary, and return the 434 temporary. This effectively converts an arbitrary expression into 435 an atom. 436 437 'ty' is the type of 'e' and hence the type that the new temporary 438 needs to be. But passing it in is redundant, since we can deduce 439 the type merely by inspecting 'e'. So at least use that fact to 440 assert that the two types agree. */ 441static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) 442{ 443 TempKind k; 444 IRTemp t; 445 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e); 446 447 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */ 448 switch (cat) { 449 case 'V': k = VSh; break; 450 case 'B': k = BSh; break; 451 case 'C': k = Orig; break; 452 /* happens when we are making up new "orig" 453 expressions, for IRCAS handling */ 454 default: tl_assert(0); 455 } 456 t = newTemp(mce, ty, k); 457 assign(cat, mce, t, e); 458 return mkexpr(t); 459} 460 461 462/*------------------------------------------------------------*/ 463/*--- Helper functions for 128-bit ops ---*/ 464/*------------------------------------------------------------*/ 465 466static IRExpr *i128_const_zero(void) 467{ 468 IRAtom* z64 = IRExpr_Const(IRConst_U64(0)); 469 return binop(Iop_64HLto128, z64, z64); 470} 471 472/* There are no I128-bit loads and/or stores [as generated by any 473 current front ends]. So we do not need to worry about that in 474 expr2vbits_Load */ 475 476 477/*------------------------------------------------------------*/ 478/*--- Constructing definedness primitive ops ---*/ 479/*------------------------------------------------------------*/ 480 481/* --------- Defined-if-either-defined --------- */ 482 483static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 484 tl_assert(isShadowAtom(mce,a1)); 485 tl_assert(isShadowAtom(mce,a2)); 486 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2)); 487} 488 489static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 490 tl_assert(isShadowAtom(mce,a1)); 491 tl_assert(isShadowAtom(mce,a2)); 492 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2)); 493} 494 495static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 496 tl_assert(isShadowAtom(mce,a1)); 497 tl_assert(isShadowAtom(mce,a2)); 498 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2)); 499} 500 501static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 502 tl_assert(isShadowAtom(mce,a1)); 503 tl_assert(isShadowAtom(mce,a2)); 504 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2)); 505} 506 507static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 508 tl_assert(isShadowAtom(mce,a1)); 509 tl_assert(isShadowAtom(mce,a2)); 510 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 511} 512 513static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 514 tl_assert(isShadowAtom(mce,a1)); 515 tl_assert(isShadowAtom(mce,a2)); 516 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2)); 517} 518 519/* --------- Undefined-if-either-undefined --------- */ 520 521static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 522 tl_assert(isShadowAtom(mce,a1)); 523 tl_assert(isShadowAtom(mce,a2)); 524 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2)); 525} 526 527static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 528 tl_assert(isShadowAtom(mce,a1)); 529 tl_assert(isShadowAtom(mce,a2)); 530 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2)); 531} 532 533static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 534 tl_assert(isShadowAtom(mce,a1)); 535 tl_assert(isShadowAtom(mce,a2)); 536 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2)); 537} 538 539static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 540 tl_assert(isShadowAtom(mce,a1)); 541 tl_assert(isShadowAtom(mce,a2)); 542 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2)); 543} 544 545static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 546 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6; 547 tl_assert(isShadowAtom(mce,a1)); 548 tl_assert(isShadowAtom(mce,a2)); 549 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1)); 550 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1)); 551 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2)); 552 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2)); 553 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3)); 554 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4)); 555 556 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5)); 557} 558 559static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 560 tl_assert(isShadowAtom(mce,a1)); 561 tl_assert(isShadowAtom(mce,a2)); 562 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 563} 564 565static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 566 tl_assert(isShadowAtom(mce,a1)); 567 tl_assert(isShadowAtom(mce,a2)); 568 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2)); 569} 570 571static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 572 switch (vty) { 573 case Ity_I8: return mkUifU8(mce, a1, a2); 574 case Ity_I16: return mkUifU16(mce, a1, a2); 575 case Ity_I32: return mkUifU32(mce, a1, a2); 576 case Ity_I64: return mkUifU64(mce, a1, a2); 577 case Ity_I128: return mkUifU128(mce, a1, a2); 578 case Ity_V128: return mkUifUV128(mce, a1, a2); 579 default: 580 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 581 VG_(tool_panic)("memcheck:mkUifU"); 582 } 583} 584 585/* --------- The Left-family of operations. --------- */ 586 587static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 588 tl_assert(isShadowAtom(mce,a1)); 589 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1)); 590} 591 592static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 593 tl_assert(isShadowAtom(mce,a1)); 594 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1)); 595} 596 597static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 598 tl_assert(isShadowAtom(mce,a1)); 599 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1)); 600} 601 602static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) { 603 tl_assert(isShadowAtom(mce,a1)); 604 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1)); 605} 606 607/* --------- 'Improvement' functions for AND/OR. --------- */ 608 609/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 610 defined (0); all other -> undefined (1). 611*/ 612static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 613{ 614 tl_assert(isOriginalAtom(mce, data)); 615 tl_assert(isShadowAtom(mce, vbits)); 616 tl_assert(sameKindedAtoms(data, vbits)); 617 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits)); 618} 619 620static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 621{ 622 tl_assert(isOriginalAtom(mce, data)); 623 tl_assert(isShadowAtom(mce, vbits)); 624 tl_assert(sameKindedAtoms(data, vbits)); 625 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits)); 626} 627 628static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 629{ 630 tl_assert(isOriginalAtom(mce, data)); 631 tl_assert(isShadowAtom(mce, vbits)); 632 tl_assert(sameKindedAtoms(data, vbits)); 633 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits)); 634} 635 636static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 637{ 638 tl_assert(isOriginalAtom(mce, data)); 639 tl_assert(isShadowAtom(mce, vbits)); 640 tl_assert(sameKindedAtoms(data, vbits)); 641 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits)); 642} 643 644static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 645{ 646 tl_assert(isOriginalAtom(mce, data)); 647 tl_assert(isShadowAtom(mce, vbits)); 648 tl_assert(sameKindedAtoms(data, vbits)); 649 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 650} 651 652static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 653{ 654 tl_assert(isOriginalAtom(mce, data)); 655 tl_assert(isShadowAtom(mce, vbits)); 656 tl_assert(sameKindedAtoms(data, vbits)); 657 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits)); 658} 659 660/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 661 defined (0); all other -> undefined (1). 662*/ 663static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 664{ 665 tl_assert(isOriginalAtom(mce, data)); 666 tl_assert(isShadowAtom(mce, vbits)); 667 tl_assert(sameKindedAtoms(data, vbits)); 668 return assignNew( 669 'V', mce, Ity_I8, 670 binop(Iop_Or8, 671 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)), 672 vbits) ); 673} 674 675static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 676{ 677 tl_assert(isOriginalAtom(mce, data)); 678 tl_assert(isShadowAtom(mce, vbits)); 679 tl_assert(sameKindedAtoms(data, vbits)); 680 return assignNew( 681 'V', mce, Ity_I16, 682 binop(Iop_Or16, 683 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)), 684 vbits) ); 685} 686 687static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 688{ 689 tl_assert(isOriginalAtom(mce, data)); 690 tl_assert(isShadowAtom(mce, vbits)); 691 tl_assert(sameKindedAtoms(data, vbits)); 692 return assignNew( 693 'V', mce, Ity_I32, 694 binop(Iop_Or32, 695 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)), 696 vbits) ); 697} 698 699static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 700{ 701 tl_assert(isOriginalAtom(mce, data)); 702 tl_assert(isShadowAtom(mce, vbits)); 703 tl_assert(sameKindedAtoms(data, vbits)); 704 return assignNew( 705 'V', mce, Ity_I64, 706 binop(Iop_Or64, 707 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)), 708 vbits) ); 709} 710 711static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 712{ 713 tl_assert(isOriginalAtom(mce, data)); 714 tl_assert(isShadowAtom(mce, vbits)); 715 tl_assert(sameKindedAtoms(data, vbits)); 716 return assignNew( 717 'V', mce, Ity_V128, 718 binop(Iop_OrV128, 719 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)), 720 vbits) ); 721} 722 723static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 724{ 725 tl_assert(isOriginalAtom(mce, data)); 726 tl_assert(isShadowAtom(mce, vbits)); 727 tl_assert(sameKindedAtoms(data, vbits)); 728 return assignNew( 729 'V', mce, Ity_V256, 730 binop(Iop_OrV256, 731 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)), 732 vbits) ); 733} 734 735/* --------- Pessimising casts. --------- */ 736 737/* The function returns an expression of type DST_TY. If any of the VBITS 738 is undefined (value == 1) the resulting expression has all bits set to 739 1. Otherwise, all bits are 0. */ 740 741static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 742{ 743 IRType src_ty; 744 IRAtom* tmp1; 745 746 /* Note, dst_ty is a shadow type, not an original type. */ 747 tl_assert(isShadowAtom(mce,vbits)); 748 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits); 749 750 /* Fast-track some common cases */ 751 if (src_ty == Ity_I32 && dst_ty == Ity_I32) 752 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 753 754 if (src_ty == Ity_I64 && dst_ty == Ity_I64) 755 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 756 757 if (src_ty == Ity_I32 && dst_ty == Ity_I64) { 758 /* PCast the arg, then clone it. */ 759 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 760 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 761 } 762 763 if (src_ty == Ity_I64 && dst_ty == Ity_I32) { 764 /* PCast the arg. This gives all 0s or all 1s. Then throw away 765 the top half. */ 766 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 767 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp)); 768 } 769 770 /* Else do it the slow way .. */ 771 /* First of all, collapse vbits down to a single bit. */ 772 tmp1 = NULL; 773 switch (src_ty) { 774 case Ity_I1: 775 tmp1 = vbits; 776 break; 777 case Ity_I8: 778 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits)); 779 break; 780 case Ity_I16: 781 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits)); 782 break; 783 case Ity_I32: 784 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits)); 785 break; 786 case Ity_I64: 787 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits)); 788 break; 789 case Ity_I128: { 790 /* Gah. Chop it in half, OR the halves together, and compare 791 that with zero. */ 792 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits)); 793 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits)); 794 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 795 tmp1 = assignNew('V', mce, Ity_I1, 796 unop(Iop_CmpNEZ64, tmp4)); 797 break; 798 } 799 default: 800 ppIRType(src_ty); 801 VG_(tool_panic)("mkPCastTo(1)"); 802 } 803 tl_assert(tmp1); 804 /* Now widen up to the dst type. */ 805 switch (dst_ty) { 806 case Ity_I1: 807 return tmp1; 808 case Ity_I8: 809 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 810 case Ity_I16: 811 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 812 case Ity_I32: 813 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 814 case Ity_I64: 815 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 816 case Ity_V128: 817 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 818 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 819 return tmp1; 820 case Ity_I128: 821 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 822 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1)); 823 return tmp1; 824 default: 825 ppIRType(dst_ty); 826 VG_(tool_panic)("mkPCastTo(2)"); 827 } 828} 829 830/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */ 831/* 832 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and 833 PCasting to Ity_U1. However, sometimes it is necessary to be more 834 accurate. The insight is that the result is defined if two 835 corresponding bits can be found, one from each argument, so that 836 both bits are defined but are different -- that makes EQ say "No" 837 and NE say "Yes". Hence, we compute an improvement term and DifD 838 it onto the "normal" (UifU) result. 839 840 The result is: 841 842 PCastTo<1> ( 843 -- naive version 844 PCastTo<sz>( UifU<sz>(vxx, vyy) ) 845 846 `DifD<sz>` 847 848 -- improvement term 849 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) ) 850 ) 851 852 where 853 vec contains 0 (defined) bits where the corresponding arg bits 854 are defined but different, and 1 bits otherwise. 855 856 vec = Or<sz>( vxx, // 0 iff bit defined 857 vyy, // 0 iff bit defined 858 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different 859 ) 860 861 If any bit of vec is 0, the result is defined and so the 862 improvement term should produce 0...0, else it should produce 863 1...1. 864 865 Hence require for the improvement term: 866 867 if vec == 1...1 then 1...1 else 0...0 868 -> 869 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) 870 871 This was extensively re-analysed and checked on 6 July 05. 872*/ 873static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, 874 IRType ty, 875 IRAtom* vxx, IRAtom* vyy, 876 IRAtom* xx, IRAtom* yy ) 877{ 878 IRAtom *naive, *vec, *improvement_term; 879 IRAtom *improved, *final_cast, *top; 880 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR; 881 882 tl_assert(isShadowAtom(mce,vxx)); 883 tl_assert(isShadowAtom(mce,vyy)); 884 tl_assert(isOriginalAtom(mce,xx)); 885 tl_assert(isOriginalAtom(mce,yy)); 886 tl_assert(sameKindedAtoms(vxx,xx)); 887 tl_assert(sameKindedAtoms(vyy,yy)); 888 889 switch (ty) { 890 case Ity_I32: 891 opOR = Iop_Or32; 892 opDIFD = Iop_And32; 893 opUIFU = Iop_Or32; 894 opNOT = Iop_Not32; 895 opXOR = Iop_Xor32; 896 opCMP = Iop_CmpEQ32; 897 top = mkU32(0xFFFFFFFF); 898 break; 899 case Ity_I64: 900 opOR = Iop_Or64; 901 opDIFD = Iop_And64; 902 opUIFU = Iop_Or64; 903 opNOT = Iop_Not64; 904 opXOR = Iop_Xor64; 905 opCMP = Iop_CmpEQ64; 906 top = mkU64(0xFFFFFFFFFFFFFFFFULL); 907 break; 908 default: 909 VG_(tool_panic)("expensiveCmpEQorNE"); 910 } 911 912 naive 913 = mkPCastTo(mce,ty, 914 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy))); 915 916 vec 917 = assignNew( 918 'V', mce,ty, 919 binop( opOR, 920 assignNew('V', mce,ty, binop(opOR, vxx, vyy)), 921 assignNew( 922 'V', mce,ty, 923 unop( opNOT, 924 assignNew('V', mce,ty, binop(opXOR, xx, yy)))))); 925 926 improvement_term 927 = mkPCastTo( mce,ty, 928 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top))); 929 930 improved 931 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) ); 932 933 final_cast 934 = mkPCastTo( mce, Ity_I1, improved ); 935 936 return final_cast; 937} 938 939 940/* --------- Semi-accurate interpretation of CmpORD. --------- */ 941 942/* CmpORD32{S,U} does PowerPC-style 3-way comparisons: 943 944 CmpORD32S(x,y) = 1<<3 if x <s y 945 = 1<<2 if x >s y 946 = 1<<1 if x == y 947 948 and similarly the unsigned variant. The default interpretation is: 949 950 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#) 951 & (7<<1) 952 953 The "& (7<<1)" reflects the fact that all result bits except 3,2,1 954 are zero and therefore defined (viz, zero). 955 956 Also deal with a special case better: 957 958 CmpORD32S(x,0) 959 960 Here, bit 3 (LT) of the result is a copy of the top bit of x and 961 will be defined even if the rest of x isn't. In which case we do: 962 963 CmpORD32S#(x,x#,0,{impliedly 0}#) 964 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ# 965 | (x# >>u 31) << 3 -- LT# = x#[31] 966 967 Analogous handling for CmpORD64{S,U}. 968*/ 969static Bool isZeroU32 ( IRAtom* e ) 970{ 971 return 972 toBool( e->tag == Iex_Const 973 && e->Iex.Const.con->tag == Ico_U32 974 && e->Iex.Const.con->Ico.U32 == 0 ); 975} 976 977static Bool isZeroU64 ( IRAtom* e ) 978{ 979 return 980 toBool( e->tag == Iex_Const 981 && e->Iex.Const.con->tag == Ico_U64 982 && e->Iex.Const.con->Ico.U64 == 0 ); 983} 984 985static IRAtom* doCmpORD ( MCEnv* mce, 986 IROp cmp_op, 987 IRAtom* xxhash, IRAtom* yyhash, 988 IRAtom* xx, IRAtom* yy ) 989{ 990 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U; 991 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S; 992 IROp opOR = m64 ? Iop_Or64 : Iop_Or32; 993 IROp opAND = m64 ? Iop_And64 : Iop_And32; 994 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32; 995 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32; 996 IRType ty = m64 ? Ity_I64 : Ity_I32; 997 Int width = m64 ? 64 : 32; 998 999 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32; 1000 1001 IRAtom* threeLeft1 = NULL; 1002 IRAtom* sevenLeft1 = NULL; 1003 1004 tl_assert(isShadowAtom(mce,xxhash)); 1005 tl_assert(isShadowAtom(mce,yyhash)); 1006 tl_assert(isOriginalAtom(mce,xx)); 1007 tl_assert(isOriginalAtom(mce,yy)); 1008 tl_assert(sameKindedAtoms(xxhash,xx)); 1009 tl_assert(sameKindedAtoms(yyhash,yy)); 1010 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U 1011 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U); 1012 1013 if (0) { 1014 ppIROp(cmp_op); VG_(printf)(" "); 1015 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n"); 1016 } 1017 1018 if (syned && isZero(yy)) { 1019 /* fancy interpretation */ 1020 /* if yy is zero, then it must be fully defined (zero#). */ 1021 tl_assert(isZero(yyhash)); 1022 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1); 1023 return 1024 binop( 1025 opOR, 1026 assignNew( 1027 'V', mce,ty, 1028 binop( 1029 opAND, 1030 mkPCastTo(mce,ty, xxhash), 1031 threeLeft1 1032 )), 1033 assignNew( 1034 'V', mce,ty, 1035 binop( 1036 opSHL, 1037 assignNew( 1038 'V', mce,ty, 1039 binop(opSHR, xxhash, mkU8(width-1))), 1040 mkU8(3) 1041 )) 1042 ); 1043 } else { 1044 /* standard interpretation */ 1045 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1); 1046 return 1047 binop( 1048 opAND, 1049 mkPCastTo( mce,ty, 1050 mkUifU(mce,ty, xxhash,yyhash)), 1051 sevenLeft1 1052 ); 1053 } 1054} 1055 1056 1057/*------------------------------------------------------------*/ 1058/*--- Emit a test and complaint if something is undefined. ---*/ 1059/*------------------------------------------------------------*/ 1060 1061static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */ 1062 1063 1064/* Set the annotations on a dirty helper to indicate that the stack 1065 pointer and instruction pointers might be read. This is the 1066 behaviour of all 'emit-a-complaint' style functions we might 1067 call. */ 1068 1069static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 1070 di->nFxState = 2; 1071 di->fxState[0].fx = Ifx_Read; 1072 di->fxState[0].offset = mce->layout->offset_SP; 1073 di->fxState[0].size = mce->layout->sizeof_SP; 1074 di->fxState[0].nRepeats = 0; 1075 di->fxState[0].repeatLen = 0; 1076 di->fxState[1].fx = Ifx_Read; 1077 di->fxState[1].offset = mce->layout->offset_IP; 1078 di->fxState[1].size = mce->layout->sizeof_IP; 1079 di->fxState[1].nRepeats = 0; 1080 di->fxState[1].repeatLen = 0; 1081} 1082 1083 1084/* Check the supplied **original** atom for undefinedness, and emit a 1085 complaint if so. Once that happens, mark it as defined. This is 1086 possible because the atom is either a tmp or literal. If it's a 1087 tmp, it will be shadowed by a tmp, and so we can set the shadow to 1088 be defined. In fact as mentioned above, we will have to allocate a 1089 new tmp to carry the new 'defined' shadow value, and update the 1090 original->tmp mapping accordingly; we cannot simply assign a new 1091 value to an existing shadow tmp as this breaks SSAness -- resulting 1092 in the post-instrumentation sanity checker spluttering in disapproval. 1093*/ 1094static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard ) 1095{ 1096 IRAtom* vatom; 1097 IRType ty; 1098 Int sz; 1099 IRDirty* di; 1100 IRAtom* cond; 1101 IRAtom* origin; 1102 void* fn; 1103 HChar* nm; 1104 IRExpr** args; 1105 Int nargs; 1106 1107 // Don't do V bit tests if we're not reporting undefined value errors. 1108 if (MC_(clo_mc_level) == 1) 1109 return; 1110 1111 /* Since the original expression is atomic, there's no duplicated 1112 work generated by making multiple V-expressions for it. So we 1113 don't really care about the possibility that someone else may 1114 also create a V-interpretion for it. */ 1115 tl_assert(isOriginalAtom(mce, atom)); 1116 vatom = expr2vbits( mce, atom ); 1117 tl_assert(isShadowAtom(mce, vatom)); 1118 tl_assert(sameKindedAtoms(atom, vatom)); 1119 1120 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1121 1122 /* sz is only used for constructing the error message */ 1123 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 1124 1125 cond = mkPCastTo( mce, Ity_I1, vatom ); 1126 /* cond will be 0 if all defined, and 1 if any not defined. */ 1127 1128 /* Get the origin info for the value we are about to check. At 1129 least, if we are doing origin tracking. If not, use a dummy 1130 zero origin. */ 1131 if (MC_(clo_mc_level) == 3) { 1132 origin = schemeE( mce, atom ); 1133 if (mce->hWordTy == Ity_I64) { 1134 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) ); 1135 } 1136 } else { 1137 origin = NULL; 1138 } 1139 1140 fn = NULL; 1141 nm = NULL; 1142 args = NULL; 1143 nargs = -1; 1144 1145 switch (sz) { 1146 case 0: 1147 if (origin) { 1148 fn = &MC_(helperc_value_check0_fail_w_o); 1149 nm = "MC_(helperc_value_check0_fail_w_o)"; 1150 args = mkIRExprVec_1(origin); 1151 nargs = 1; 1152 } else { 1153 fn = &MC_(helperc_value_check0_fail_no_o); 1154 nm = "MC_(helperc_value_check0_fail_no_o)"; 1155 args = mkIRExprVec_0(); 1156 nargs = 0; 1157 } 1158 break; 1159 case 1: 1160 if (origin) { 1161 fn = &MC_(helperc_value_check1_fail_w_o); 1162 nm = "MC_(helperc_value_check1_fail_w_o)"; 1163 args = mkIRExprVec_1(origin); 1164 nargs = 1; 1165 } else { 1166 fn = &MC_(helperc_value_check1_fail_no_o); 1167 nm = "MC_(helperc_value_check1_fail_no_o)"; 1168 args = mkIRExprVec_0(); 1169 nargs = 0; 1170 } 1171 break; 1172 case 4: 1173 if (origin) { 1174 fn = &MC_(helperc_value_check4_fail_w_o); 1175 nm = "MC_(helperc_value_check4_fail_w_o)"; 1176 args = mkIRExprVec_1(origin); 1177 nargs = 1; 1178 } else { 1179 fn = &MC_(helperc_value_check4_fail_no_o); 1180 nm = "MC_(helperc_value_check4_fail_no_o)"; 1181 args = mkIRExprVec_0(); 1182 nargs = 0; 1183 } 1184 break; 1185 case 8: 1186 if (origin) { 1187 fn = &MC_(helperc_value_check8_fail_w_o); 1188 nm = "MC_(helperc_value_check8_fail_w_o)"; 1189 args = mkIRExprVec_1(origin); 1190 nargs = 1; 1191 } else { 1192 fn = &MC_(helperc_value_check8_fail_no_o); 1193 nm = "MC_(helperc_value_check8_fail_no_o)"; 1194 args = mkIRExprVec_0(); 1195 nargs = 0; 1196 } 1197 break; 1198 case 2: 1199 case 16: 1200 if (origin) { 1201 fn = &MC_(helperc_value_checkN_fail_w_o); 1202 nm = "MC_(helperc_value_checkN_fail_w_o)"; 1203 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin); 1204 nargs = 2; 1205 } else { 1206 fn = &MC_(helperc_value_checkN_fail_no_o); 1207 nm = "MC_(helperc_value_checkN_fail_no_o)"; 1208 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) ); 1209 nargs = 1; 1210 } 1211 break; 1212 default: 1213 VG_(tool_panic)("unexpected szB"); 1214 } 1215 1216 tl_assert(fn); 1217 tl_assert(nm); 1218 tl_assert(args); 1219 tl_assert(nargs >= 0 && nargs <= 2); 1220 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL) 1221 || (MC_(clo_mc_level) == 2 && origin == NULL) ); 1222 1223 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm, 1224 VG_(fnptr_to_fnentry)( fn ), args ); 1225 di->guard = cond; 1226 1227 /* If the complaint is to be issued under a guard condition, AND that 1228 guard condition. */ 1229 if (guard) { 1230 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard)); 1231 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard)); 1232 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2)); 1233 1234 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e)); 1235 } 1236 1237 setHelperAnns( mce, di ); 1238 stmt( 'V', mce, IRStmt_Dirty(di)); 1239 1240 /* Set the shadow tmp to be defined. First, update the 1241 orig->shadow tmp mapping to reflect the fact that this shadow is 1242 getting a new value. */ 1243 tl_assert(isIRAtom(vatom)); 1244 /* sameKindedAtoms ... */ 1245 if (vatom->tag == Iex_RdTmp) { 1246 tl_assert(atom->tag == Iex_RdTmp); 1247 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1248 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), 1249 definedOfType(ty)); 1250 } 1251} 1252 1253 1254/*------------------------------------------------------------*/ 1255/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 1256/*------------------------------------------------------------*/ 1257 1258/* Examine the always-defined sections declared in layout to see if 1259 the (offset,size) section is within one. Note, is is an error to 1260 partially fall into such a region: (offset,size) should either be 1261 completely in such a region or completely not-in such a region. 1262*/ 1263static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 1264{ 1265 Int minoffD, maxoffD, i; 1266 Int minoff = offset; 1267 Int maxoff = minoff + size - 1; 1268 tl_assert((minoff & ~0xFFFF) == 0); 1269 tl_assert((maxoff & ~0xFFFF) == 0); 1270 1271 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 1272 minoffD = mce->layout->alwaysDefd[i].offset; 1273 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 1274 tl_assert((minoffD & ~0xFFFF) == 0); 1275 tl_assert((maxoffD & ~0xFFFF) == 0); 1276 1277 if (maxoff < minoffD || maxoffD < minoff) 1278 continue; /* no overlap */ 1279 if (minoff >= minoffD && maxoff <= maxoffD) 1280 return True; /* completely contained in an always-defd section */ 1281 1282 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 1283 } 1284 return False; /* could not find any containing section */ 1285} 1286 1287 1288/* Generate into bb suitable actions to shadow this Put. If the state 1289 slice is marked 'always defined', do nothing. Otherwise, write the 1290 supplied V bits to the shadow state. We can pass in either an 1291 original atom or a V-atom, but not both. In the former case the 1292 relevant V-bits are then generated from the original. 1293 We assume here, that the definedness of GUARD has already been checked. 1294*/ 1295static 1296void do_shadow_PUT ( MCEnv* mce, Int offset, 1297 IRAtom* atom, IRAtom* vatom, IRExpr *guard ) 1298{ 1299 IRType ty; 1300 1301 // Don't do shadow PUTs if we're not doing undefined value checking. 1302 // Their absence lets Vex's optimiser remove all the shadow computation 1303 // that they depend on, which includes GETs of the shadow registers. 1304 if (MC_(clo_mc_level) == 1) 1305 return; 1306 1307 if (atom) { 1308 tl_assert(!vatom); 1309 tl_assert(isOriginalAtom(mce, atom)); 1310 vatom = expr2vbits( mce, atom ); 1311 } else { 1312 tl_assert(vatom); 1313 tl_assert(isShadowAtom(mce, vatom)); 1314 } 1315 1316 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1317 tl_assert(ty != Ity_I1); 1318 tl_assert(ty != Ity_I128); 1319 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1320 /* later: no ... */ 1321 /* emit code to emit a complaint if any of the vbits are 1. */ 1322 /* complainIfUndefined(mce, atom); */ 1323 } else { 1324 /* Do a plain shadow Put. */ 1325 if (guard) { 1326 /* If the guard expression evaluates to false we simply Put the value 1327 that is already stored in the guest state slot */ 1328 IRAtom *cond, *iffalse; 1329 1330 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard)); 1331 iffalse = assignNew('V', mce, ty, 1332 IRExpr_Get(offset + mce->layout->total_sizeB, ty)); 1333 vatom = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom)); 1334 } 1335 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom )); 1336 } 1337} 1338 1339 1340/* Return an expression which contains the V bits corresponding to the 1341 given GETI (passed in in pieces). 1342*/ 1343static 1344void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti) 1345{ 1346 IRAtom* vatom; 1347 IRType ty, tyS; 1348 Int arrSize;; 1349 IRRegArray* descr = puti->descr; 1350 IRAtom* ix = puti->ix; 1351 Int bias = puti->bias; 1352 IRAtom* atom = puti->data; 1353 1354 // Don't do shadow PUTIs if we're not doing undefined value checking. 1355 // Their absence lets Vex's optimiser remove all the shadow computation 1356 // that they depend on, which includes GETIs of the shadow registers. 1357 if (MC_(clo_mc_level) == 1) 1358 return; 1359 1360 tl_assert(isOriginalAtom(mce,atom)); 1361 vatom = expr2vbits( mce, atom ); 1362 tl_assert(sameKindedAtoms(atom, vatom)); 1363 ty = descr->elemTy; 1364 tyS = shadowTypeV(ty); 1365 arrSize = descr->nElems * sizeofIRType(ty); 1366 tl_assert(ty != Ity_I1); 1367 tl_assert(isOriginalAtom(mce,ix)); 1368 complainIfUndefined(mce, ix, NULL); 1369 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1370 /* later: no ... */ 1371 /* emit code to emit a complaint if any of the vbits are 1. */ 1372 /* complainIfUndefined(mce, atom); */ 1373 } else { 1374 /* Do a cloned version of the Put that refers to the shadow 1375 area. */ 1376 IRRegArray* new_descr 1377 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1378 tyS, descr->nElems); 1379 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) )); 1380 } 1381} 1382 1383 1384/* Return an expression which contains the V bits corresponding to the 1385 given GET (passed in in pieces). 1386*/ 1387static 1388IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 1389{ 1390 IRType tyS = shadowTypeV(ty); 1391 tl_assert(ty != Ity_I1); 1392 tl_assert(ty != Ity_I128); 1393 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1394 /* Always defined, return all zeroes of the relevant type */ 1395 return definedOfType(tyS); 1396 } else { 1397 /* return a cloned version of the Get that refers to the shadow 1398 area. */ 1399 /* FIXME: this isn't an atom! */ 1400 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 1401 } 1402} 1403 1404 1405/* Return an expression which contains the V bits corresponding to the 1406 given GETI (passed in in pieces). 1407*/ 1408static 1409IRExpr* shadow_GETI ( MCEnv* mce, 1410 IRRegArray* descr, IRAtom* ix, Int bias ) 1411{ 1412 IRType ty = descr->elemTy; 1413 IRType tyS = shadowTypeV(ty); 1414 Int arrSize = descr->nElems * sizeofIRType(ty); 1415 tl_assert(ty != Ity_I1); 1416 tl_assert(isOriginalAtom(mce,ix)); 1417 complainIfUndefined(mce, ix, NULL); 1418 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1419 /* Always defined, return all zeroes of the relevant type */ 1420 return definedOfType(tyS); 1421 } else { 1422 /* return a cloned version of the Get that refers to the shadow 1423 area. */ 1424 IRRegArray* new_descr 1425 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1426 tyS, descr->nElems); 1427 return IRExpr_GetI( new_descr, ix, bias ); 1428 } 1429} 1430 1431 1432/*------------------------------------------------------------*/ 1433/*--- Generating approximations for unknown operations, ---*/ 1434/*--- using lazy-propagate semantics ---*/ 1435/*------------------------------------------------------------*/ 1436 1437/* Lazy propagation of undefinedness from two values, resulting in the 1438 specified shadow type. 1439*/ 1440static 1441IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 1442{ 1443 IRAtom* at; 1444 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1445 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1446 tl_assert(isShadowAtom(mce,va1)); 1447 tl_assert(isShadowAtom(mce,va2)); 1448 1449 /* The general case is inefficient because PCast is an expensive 1450 operation. Here are some special cases which use PCast only 1451 once rather than twice. */ 1452 1453 /* I64 x I64 -> I64 */ 1454 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) { 1455 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n"); 1456 at = mkUifU(mce, Ity_I64, va1, va2); 1457 at = mkPCastTo(mce, Ity_I64, at); 1458 return at; 1459 } 1460 1461 /* I64 x I64 -> I32 */ 1462 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) { 1463 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n"); 1464 at = mkUifU(mce, Ity_I64, va1, va2); 1465 at = mkPCastTo(mce, Ity_I32, at); 1466 return at; 1467 } 1468 1469 if (0) { 1470 VG_(printf)("mkLazy2 "); 1471 ppIRType(t1); 1472 VG_(printf)("_"); 1473 ppIRType(t2); 1474 VG_(printf)("_"); 1475 ppIRType(finalVty); 1476 VG_(printf)("\n"); 1477 } 1478 1479 /* General case: force everything via 32-bit intermediaries. */ 1480 at = mkPCastTo(mce, Ity_I32, va1); 1481 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1482 at = mkPCastTo(mce, finalVty, at); 1483 return at; 1484} 1485 1486 1487/* 3-arg version of the above. */ 1488static 1489IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, 1490 IRAtom* va1, IRAtom* va2, IRAtom* va3 ) 1491{ 1492 IRAtom* at; 1493 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1494 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1495 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1496 tl_assert(isShadowAtom(mce,va1)); 1497 tl_assert(isShadowAtom(mce,va2)); 1498 tl_assert(isShadowAtom(mce,va3)); 1499 1500 /* The general case is inefficient because PCast is an expensive 1501 operation. Here are some special cases which use PCast only 1502 twice rather than three times. */ 1503 1504 /* I32 x I64 x I64 -> I64 */ 1505 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1506 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1507 && finalVty == Ity_I64) { 1508 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n"); 1509 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1510 mode indication which is fully defined, this should get 1511 folded out later. */ 1512 at = mkPCastTo(mce, Ity_I64, va1); 1513 /* Now fold in 2nd and 3rd args. */ 1514 at = mkUifU(mce, Ity_I64, at, va2); 1515 at = mkUifU(mce, Ity_I64, at, va3); 1516 /* and PCast once again. */ 1517 at = mkPCastTo(mce, Ity_I64, at); 1518 return at; 1519 } 1520 1521 /* I32 x I64 x I64 -> I32 */ 1522 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1523 && finalVty == Ity_I32) { 1524 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n"); 1525 at = mkPCastTo(mce, Ity_I64, va1); 1526 at = mkUifU(mce, Ity_I64, at, va2); 1527 at = mkUifU(mce, Ity_I64, at, va3); 1528 at = mkPCastTo(mce, Ity_I32, at); 1529 return at; 1530 } 1531 1532 /* I32 x I32 x I32 -> I32 */ 1533 /* 32-bit FP idiom, as (eg) happens on ARM */ 1534 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 1535 && finalVty == Ity_I32) { 1536 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n"); 1537 at = va1; 1538 at = mkUifU(mce, Ity_I32, at, va2); 1539 at = mkUifU(mce, Ity_I32, at, va3); 1540 at = mkPCastTo(mce, Ity_I32, at); 1541 return at; 1542 } 1543 1544 /* I32 x I128 x I128 -> I128 */ 1545 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1546 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 1547 && finalVty == Ity_I128) { 1548 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n"); 1549 /* Widen 1st arg to I128. Since 1st arg is typically a rounding 1550 mode indication which is fully defined, this should get 1551 folded out later. */ 1552 at = mkPCastTo(mce, Ity_I128, va1); 1553 /* Now fold in 2nd and 3rd args. */ 1554 at = mkUifU(mce, Ity_I128, at, va2); 1555 at = mkUifU(mce, Ity_I128, at, va3); 1556 /* and PCast once again. */ 1557 at = mkPCastTo(mce, Ity_I128, at); 1558 return at; 1559 } 1560 if (1) { 1561 VG_(printf)("mkLazy3: "); 1562 ppIRType(t1); 1563 VG_(printf)(" x "); 1564 ppIRType(t2); 1565 VG_(printf)(" x "); 1566 ppIRType(t3); 1567 VG_(printf)(" -> "); 1568 ppIRType(finalVty); 1569 VG_(printf)("\n"); 1570 } 1571 1572 tl_assert(0); 1573 /* General case: force everything via 32-bit intermediaries. */ 1574 /* 1575 at = mkPCastTo(mce, Ity_I32, va1); 1576 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1577 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3)); 1578 at = mkPCastTo(mce, finalVty, at); 1579 return at; 1580 */ 1581} 1582 1583 1584/* 4-arg version of the above. */ 1585static 1586IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty, 1587 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 ) 1588{ 1589 IRAtom* at; 1590 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1591 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1592 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1593 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4); 1594 tl_assert(isShadowAtom(mce,va1)); 1595 tl_assert(isShadowAtom(mce,va2)); 1596 tl_assert(isShadowAtom(mce,va3)); 1597 tl_assert(isShadowAtom(mce,va4)); 1598 1599 /* The general case is inefficient because PCast is an expensive 1600 operation. Here are some special cases which use PCast only 1601 twice rather than three times. */ 1602 1603 /* I32 x I64 x I64 x I64 -> I64 */ 1604 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1605 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64 1606 && finalVty == Ity_I64) { 1607 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n"); 1608 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1609 mode indication which is fully defined, this should get 1610 folded out later. */ 1611 at = mkPCastTo(mce, Ity_I64, va1); 1612 /* Now fold in 2nd, 3rd, 4th args. */ 1613 at = mkUifU(mce, Ity_I64, at, va2); 1614 at = mkUifU(mce, Ity_I64, at, va3); 1615 at = mkUifU(mce, Ity_I64, at, va4); 1616 /* and PCast once again. */ 1617 at = mkPCastTo(mce, Ity_I64, at); 1618 return at; 1619 } 1620 /* I32 x I32 x I32 x I32 -> I32 */ 1621 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1622 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32 1623 && finalVty == Ity_I32) { 1624 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n"); 1625 at = va1; 1626 /* Now fold in 2nd, 3rd, 4th args. */ 1627 at = mkUifU(mce, Ity_I32, at, va2); 1628 at = mkUifU(mce, Ity_I32, at, va3); 1629 at = mkUifU(mce, Ity_I32, at, va4); 1630 at = mkPCastTo(mce, Ity_I32, at); 1631 return at; 1632 } 1633 1634 if (1) { 1635 VG_(printf)("mkLazy4: "); 1636 ppIRType(t1); 1637 VG_(printf)(" x "); 1638 ppIRType(t2); 1639 VG_(printf)(" x "); 1640 ppIRType(t3); 1641 VG_(printf)(" x "); 1642 ppIRType(t4); 1643 VG_(printf)(" -> "); 1644 ppIRType(finalVty); 1645 VG_(printf)("\n"); 1646 } 1647 1648 tl_assert(0); 1649} 1650 1651 1652/* Do the lazy propagation game from a null-terminated vector of 1653 atoms. This is presumably the arguments to a helper call, so the 1654 IRCallee info is also supplied in order that we can know which 1655 arguments should be ignored (via the .mcx_mask field). 1656*/ 1657static 1658IRAtom* mkLazyN ( MCEnv* mce, 1659 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 1660{ 1661 Int i; 1662 IRAtom* here; 1663 IRAtom* curr; 1664 IRType mergeTy; 1665 Bool mergeTy64 = True; 1666 1667 /* Decide on the type of the merge intermediary. If all relevant 1668 args are I64, then it's I64. In all other circumstances, use 1669 I32. */ 1670 for (i = 0; exprvec[i]; i++) { 1671 tl_assert(i < 32); 1672 tl_assert(isOriginalAtom(mce, exprvec[i])); 1673 if (cee->mcx_mask & (1<<i)) 1674 continue; 1675 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64) 1676 mergeTy64 = False; 1677 } 1678 1679 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32; 1680 curr = definedOfType(mergeTy); 1681 1682 for (i = 0; exprvec[i]; i++) { 1683 tl_assert(i < 32); 1684 tl_assert(isOriginalAtom(mce, exprvec[i])); 1685 /* Only take notice of this arg if the callee's mc-exclusion 1686 mask does not say it is to be excluded. */ 1687 if (cee->mcx_mask & (1<<i)) { 1688 /* the arg is to be excluded from definedness checking. Do 1689 nothing. */ 1690 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 1691 } else { 1692 /* calculate the arg's definedness, and pessimistically merge 1693 it in. */ 1694 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) ); 1695 curr = mergeTy64 1696 ? mkUifU64(mce, here, curr) 1697 : mkUifU32(mce, here, curr); 1698 } 1699 } 1700 return mkPCastTo(mce, finalVtype, curr ); 1701} 1702 1703 1704/*------------------------------------------------------------*/ 1705/*--- Generating expensive sequences for exact carry-chain ---*/ 1706/*--- propagation in add/sub and related operations. ---*/ 1707/*------------------------------------------------------------*/ 1708 1709static 1710IRAtom* expensiveAddSub ( MCEnv* mce, 1711 Bool add, 1712 IRType ty, 1713 IRAtom* qaa, IRAtom* qbb, 1714 IRAtom* aa, IRAtom* bb ) 1715{ 1716 IRAtom *a_min, *b_min, *a_max, *b_max; 1717 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB; 1718 1719 tl_assert(isShadowAtom(mce,qaa)); 1720 tl_assert(isShadowAtom(mce,qbb)); 1721 tl_assert(isOriginalAtom(mce,aa)); 1722 tl_assert(isOriginalAtom(mce,bb)); 1723 tl_assert(sameKindedAtoms(qaa,aa)); 1724 tl_assert(sameKindedAtoms(qbb,bb)); 1725 1726 switch (ty) { 1727 case Ity_I32: 1728 opAND = Iop_And32; 1729 opOR = Iop_Or32; 1730 opXOR = Iop_Xor32; 1731 opNOT = Iop_Not32; 1732 opADD = Iop_Add32; 1733 opSUB = Iop_Sub32; 1734 break; 1735 case Ity_I64: 1736 opAND = Iop_And64; 1737 opOR = Iop_Or64; 1738 opXOR = Iop_Xor64; 1739 opNOT = Iop_Not64; 1740 opADD = Iop_Add64; 1741 opSUB = Iop_Sub64; 1742 break; 1743 default: 1744 VG_(tool_panic)("expensiveAddSub"); 1745 } 1746 1747 // a_min = aa & ~qaa 1748 a_min = assignNew('V', mce,ty, 1749 binop(opAND, aa, 1750 assignNew('V', mce,ty, unop(opNOT, qaa)))); 1751 1752 // b_min = bb & ~qbb 1753 b_min = assignNew('V', mce,ty, 1754 binop(opAND, bb, 1755 assignNew('V', mce,ty, unop(opNOT, qbb)))); 1756 1757 // a_max = aa | qaa 1758 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa)); 1759 1760 // b_max = bb | qbb 1761 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb)); 1762 1763 if (add) { 1764 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1765 return 1766 assignNew('V', mce,ty, 1767 binop( opOR, 1768 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1769 assignNew('V', mce,ty, 1770 binop( opXOR, 1771 assignNew('V', mce,ty, binop(opADD, a_min, b_min)), 1772 assignNew('V', mce,ty, binop(opADD, a_max, b_max)) 1773 ) 1774 ) 1775 ) 1776 ); 1777 } else { 1778 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min)) 1779 return 1780 assignNew('V', mce,ty, 1781 binop( opOR, 1782 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1783 assignNew('V', mce,ty, 1784 binop( opXOR, 1785 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)), 1786 assignNew('V', mce,ty, binop(opSUB, a_max, b_min)) 1787 ) 1788 ) 1789 ) 1790 ); 1791 } 1792 1793} 1794 1795 1796/*------------------------------------------------------------*/ 1797/*--- Scalar shifts. ---*/ 1798/*------------------------------------------------------------*/ 1799 1800/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic 1801 idea is to shift the definedness bits by the original shift amount. 1802 This introduces 0s ("defined") in new positions for left shifts and 1803 unsigned right shifts, and copies the top definedness bit for 1804 signed right shifts. So, conveniently, applying the original shift 1805 operator to the definedness bits for the left arg is exactly the 1806 right thing to do: 1807 1808 (qaa << bb) 1809 1810 However if the shift amount is undefined then the whole result 1811 is undefined. Hence need: 1812 1813 (qaa << bb) `UifU` PCast(qbb) 1814 1815 If the shift amount bb is a literal than qbb will say 'all defined' 1816 and the UifU and PCast will get folded out by post-instrumentation 1817 optimisation. 1818*/ 1819static IRAtom* scalarShift ( MCEnv* mce, 1820 IRType ty, 1821 IROp original_op, 1822 IRAtom* qaa, IRAtom* qbb, 1823 IRAtom* aa, IRAtom* bb ) 1824{ 1825 tl_assert(isShadowAtom(mce,qaa)); 1826 tl_assert(isShadowAtom(mce,qbb)); 1827 tl_assert(isOriginalAtom(mce,aa)); 1828 tl_assert(isOriginalAtom(mce,bb)); 1829 tl_assert(sameKindedAtoms(qaa,aa)); 1830 tl_assert(sameKindedAtoms(qbb,bb)); 1831 return 1832 assignNew( 1833 'V', mce, ty, 1834 mkUifU( mce, ty, 1835 assignNew('V', mce, ty, binop(original_op, qaa, bb)), 1836 mkPCastTo(mce, ty, qbb) 1837 ) 1838 ); 1839} 1840 1841 1842/*------------------------------------------------------------*/ 1843/*--- Helpers for dealing with vector primops. ---*/ 1844/*------------------------------------------------------------*/ 1845 1846/* Vector pessimisation -- pessimise within each lane individually. */ 1847 1848static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 1849{ 1850 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 1851} 1852 1853static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 1854{ 1855 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 1856} 1857 1858static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 1859{ 1860 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 1861} 1862 1863static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 1864{ 1865 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 1866} 1867 1868static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at ) 1869{ 1870 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at)); 1871} 1872 1873static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at ) 1874{ 1875 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at)); 1876} 1877 1878static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at ) 1879{ 1880 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at)); 1881} 1882 1883static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at ) 1884{ 1885 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at)); 1886} 1887 1888static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at ) 1889{ 1890 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at)); 1891} 1892 1893static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at ) 1894{ 1895 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at)); 1896} 1897 1898static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at ) 1899{ 1900 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at)); 1901} 1902 1903 1904/* Here's a simple scheme capable of handling ops derived from SSE1 1905 code and while only generating ops that can be efficiently 1906 implemented in SSE1. */ 1907 1908/* All-lanes versions are straightforward: 1909 1910 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 1911 1912 unary32Fx4(x,y) ==> PCast32x4(x#) 1913 1914 Lowest-lane-only versions are more complex: 1915 1916 binary32F0x4(x,y) ==> SetV128lo32( 1917 x#, 1918 PCast32(V128to32(UifUV128(x#,y#))) 1919 ) 1920 1921 This is perhaps not so obvious. In particular, it's faster to 1922 do a V128-bit UifU and then take the bottom 32 bits than the more 1923 obvious scheme of taking the bottom 32 bits of each operand 1924 and doing a 32-bit UifU. Basically since UifU is fast and 1925 chopping lanes off vector values is slow. 1926 1927 Finally: 1928 1929 unary32F0x4(x) ==> SetV128lo32( 1930 x#, 1931 PCast32(V128to32(x#)) 1932 ) 1933 1934 Where: 1935 1936 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 1937 PCast32x4(v#) = CmpNEZ32x4(v#) 1938*/ 1939 1940static 1941IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1942{ 1943 IRAtom* at; 1944 tl_assert(isShadowAtom(mce, vatomX)); 1945 tl_assert(isShadowAtom(mce, vatomY)); 1946 at = mkUifUV128(mce, vatomX, vatomY); 1947 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at)); 1948 return at; 1949} 1950 1951static 1952IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 1953{ 1954 IRAtom* at; 1955 tl_assert(isShadowAtom(mce, vatomX)); 1956 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX)); 1957 return at; 1958} 1959 1960static 1961IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1962{ 1963 IRAtom* at; 1964 tl_assert(isShadowAtom(mce, vatomX)); 1965 tl_assert(isShadowAtom(mce, vatomY)); 1966 at = mkUifUV128(mce, vatomX, vatomY); 1967 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at)); 1968 at = mkPCastTo(mce, Ity_I32, at); 1969 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1970 return at; 1971} 1972 1973static 1974IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 1975{ 1976 IRAtom* at; 1977 tl_assert(isShadowAtom(mce, vatomX)); 1978 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX)); 1979 at = mkPCastTo(mce, Ity_I32, at); 1980 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1981 return at; 1982} 1983 1984/* --- ... and ... 64Fx2 versions of the same ... --- */ 1985 1986static 1987IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1988{ 1989 IRAtom* at; 1990 tl_assert(isShadowAtom(mce, vatomX)); 1991 tl_assert(isShadowAtom(mce, vatomY)); 1992 at = mkUifUV128(mce, vatomX, vatomY); 1993 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at)); 1994 return at; 1995} 1996 1997static 1998IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 1999{ 2000 IRAtom* at; 2001 tl_assert(isShadowAtom(mce, vatomX)); 2002 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX)); 2003 return at; 2004} 2005 2006static 2007IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2008{ 2009 IRAtom* at; 2010 tl_assert(isShadowAtom(mce, vatomX)); 2011 tl_assert(isShadowAtom(mce, vatomY)); 2012 at = mkUifUV128(mce, vatomX, vatomY); 2013 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at)); 2014 at = mkPCastTo(mce, Ity_I64, at); 2015 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2016 return at; 2017} 2018 2019static 2020IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 2021{ 2022 IRAtom* at; 2023 tl_assert(isShadowAtom(mce, vatomX)); 2024 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX)); 2025 at = mkPCastTo(mce, Ity_I64, at); 2026 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2027 return at; 2028} 2029 2030/* --- --- ... and ... 32Fx2 versions of the same --- --- */ 2031 2032static 2033IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2034{ 2035 IRAtom* at; 2036 tl_assert(isShadowAtom(mce, vatomX)); 2037 tl_assert(isShadowAtom(mce, vatomY)); 2038 at = mkUifU64(mce, vatomX, vatomY); 2039 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at)); 2040 return at; 2041} 2042 2043static 2044IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2045{ 2046 IRAtom* at; 2047 tl_assert(isShadowAtom(mce, vatomX)); 2048 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX)); 2049 return at; 2050} 2051 2052/* --- ... and ... 64Fx4 versions of the same ... --- */ 2053 2054static 2055IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2056{ 2057 IRAtom* at; 2058 tl_assert(isShadowAtom(mce, vatomX)); 2059 tl_assert(isShadowAtom(mce, vatomY)); 2060 at = mkUifUV256(mce, vatomX, vatomY); 2061 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at)); 2062 return at; 2063} 2064 2065static 2066IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2067{ 2068 IRAtom* at; 2069 tl_assert(isShadowAtom(mce, vatomX)); 2070 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX)); 2071 return at; 2072} 2073 2074/* --- ... and ... 32Fx8 versions of the same ... --- */ 2075 2076static 2077IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2078{ 2079 IRAtom* at; 2080 tl_assert(isShadowAtom(mce, vatomX)); 2081 tl_assert(isShadowAtom(mce, vatomY)); 2082 at = mkUifUV256(mce, vatomX, vatomY); 2083 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at)); 2084 return at; 2085} 2086 2087static 2088IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX ) 2089{ 2090 IRAtom* at; 2091 tl_assert(isShadowAtom(mce, vatomX)); 2092 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX)); 2093 return at; 2094} 2095 2096/* --- --- Vector saturated narrowing --- --- */ 2097 2098/* We used to do something very clever here, but on closer inspection 2099 (2011-Jun-15), and in particular bug #279698, it turns out to be 2100 wrong. Part of the problem came from the fact that for a long 2101 time, the IR primops to do with saturated narrowing were 2102 underspecified and managed to confuse multiple cases which needed 2103 to be separate: the op names had a signedness qualifier, but in 2104 fact the source and destination signednesses needed to be specified 2105 independently, so the op names really need two independent 2106 signedness specifiers. 2107 2108 As of 2011-Jun-15 (ish) the underspecification was sorted out 2109 properly. The incorrect instrumentation remained, though. That 2110 has now (2011-Oct-22) been fixed. 2111 2112 What we now do is simple: 2113 2114 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a 2115 number of lanes, X is the source lane width and signedness, and Y 2116 is the destination lane width and signedness. In all cases the 2117 destination lane width is half the source lane width, so the names 2118 have a bit of redundancy, but are at least easy to read. 2119 2120 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s 2121 to unsigned 16s. 2122 2123 Let Vanilla(OP) be a function that takes OP, one of these 2124 saturating narrowing ops, and produces the same "shaped" narrowing 2125 op which is not saturating, but merely dumps the most significant 2126 bits. "same shape" means that the lane numbers and widths are the 2127 same as with OP. 2128 2129 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8) 2130 = Iop_NarrowBin32to16x8, 2131 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by 2132 dumping the top half of each lane. 2133 2134 So, with that in place, the scheme is simple, and it is simple to 2135 pessimise each lane individually and then apply Vanilla(OP) so as 2136 to get the result in the right "shape". If the original OP is 2137 QNarrowBinXtoYxZ then we produce 2138 2139 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) ) 2140 2141 or for the case when OP is unary (Iop_QNarrowUn*) 2142 2143 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) ) 2144*/ 2145static 2146IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp ) 2147{ 2148 switch (qnarrowOp) { 2149 /* Binary: (128, 128) -> 128 */ 2150 case Iop_QNarrowBin16Sto8Ux16: 2151 case Iop_QNarrowBin16Sto8Sx16: 2152 case Iop_QNarrowBin16Uto8Ux16: 2153 return Iop_NarrowBin16to8x16; 2154 case Iop_QNarrowBin32Sto16Ux8: 2155 case Iop_QNarrowBin32Sto16Sx8: 2156 case Iop_QNarrowBin32Uto16Ux8: 2157 return Iop_NarrowBin32to16x8; 2158 /* Binary: (64, 64) -> 64 */ 2159 case Iop_QNarrowBin32Sto16Sx4: 2160 return Iop_NarrowBin32to16x4; 2161 case Iop_QNarrowBin16Sto8Ux8: 2162 case Iop_QNarrowBin16Sto8Sx8: 2163 return Iop_NarrowBin16to8x8; 2164 /* Unary: 128 -> 64 */ 2165 case Iop_QNarrowUn64Uto32Ux2: 2166 case Iop_QNarrowUn64Sto32Sx2: 2167 case Iop_QNarrowUn64Sto32Ux2: 2168 return Iop_NarrowUn64to32x2; 2169 case Iop_QNarrowUn32Uto16Ux4: 2170 case Iop_QNarrowUn32Sto16Sx4: 2171 case Iop_QNarrowUn32Sto16Ux4: 2172 return Iop_NarrowUn32to16x4; 2173 case Iop_QNarrowUn16Uto8Ux8: 2174 case Iop_QNarrowUn16Sto8Sx8: 2175 case Iop_QNarrowUn16Sto8Ux8: 2176 return Iop_NarrowUn16to8x8; 2177 default: 2178 ppIROp(qnarrowOp); 2179 VG_(tool_panic)("vanillaNarrowOpOfShape"); 2180 } 2181} 2182 2183static 2184IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op, 2185 IRAtom* vatom1, IRAtom* vatom2) 2186{ 2187 IRAtom *at1, *at2, *at3; 2188 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2189 switch (narrow_op) { 2190 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break; 2191 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break; 2192 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break; 2193 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break; 2194 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break; 2195 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break; 2196 default: VG_(tool_panic)("vectorNarrowBinV128"); 2197 } 2198 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2199 tl_assert(isShadowAtom(mce,vatom1)); 2200 tl_assert(isShadowAtom(mce,vatom2)); 2201 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2202 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2)); 2203 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2)); 2204 return at3; 2205} 2206 2207static 2208IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op, 2209 IRAtom* vatom1, IRAtom* vatom2) 2210{ 2211 IRAtom *at1, *at2, *at3; 2212 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2213 switch (narrow_op) { 2214 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break; 2215 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break; 2216 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break; 2217 default: VG_(tool_panic)("vectorNarrowBin64"); 2218 } 2219 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2220 tl_assert(isShadowAtom(mce,vatom1)); 2221 tl_assert(isShadowAtom(mce,vatom2)); 2222 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1)); 2223 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2)); 2224 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2)); 2225 return at3; 2226} 2227 2228static 2229IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op, 2230 IRAtom* vatom1) 2231{ 2232 IRAtom *at1, *at2; 2233 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2234 tl_assert(isShadowAtom(mce,vatom1)); 2235 /* For vanilla narrowing (non-saturating), we can just apply 2236 the op directly to the V bits. */ 2237 switch (narrow_op) { 2238 case Iop_NarrowUn16to8x8: 2239 case Iop_NarrowUn32to16x4: 2240 case Iop_NarrowUn64to32x2: 2241 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1)); 2242 return at1; 2243 default: 2244 break; /* Do Plan B */ 2245 } 2246 /* Plan B: for ops that involve a saturation operation on the args, 2247 we must PCast before the vanilla narrow. */ 2248 switch (narrow_op) { 2249 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break; 2250 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break; 2251 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break; 2252 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break; 2253 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break; 2254 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break; 2255 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break; 2256 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break; 2257 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break; 2258 default: VG_(tool_panic)("vectorNarrowUnV128"); 2259 } 2260 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2261 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2262 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1)); 2263 return at2; 2264} 2265 2266static 2267IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op, 2268 IRAtom* vatom1) 2269{ 2270 IRAtom *at1, *at2; 2271 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2272 switch (longen_op) { 2273 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break; 2274 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break; 2275 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break; 2276 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break; 2277 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break; 2278 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break; 2279 default: VG_(tool_panic)("vectorWidenI64"); 2280 } 2281 tl_assert(isShadowAtom(mce,vatom1)); 2282 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1)); 2283 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1)); 2284 return at2; 2285} 2286 2287 2288/* --- --- Vector integer arithmetic --- --- */ 2289 2290/* Simple ... UifU the args and per-lane pessimise the results. */ 2291 2292/* --- V128-bit versions --- */ 2293 2294static 2295IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2296{ 2297 IRAtom* at; 2298 at = mkUifUV128(mce, vatom1, vatom2); 2299 at = mkPCast8x16(mce, at); 2300 return at; 2301} 2302 2303static 2304IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2305{ 2306 IRAtom* at; 2307 at = mkUifUV128(mce, vatom1, vatom2); 2308 at = mkPCast16x8(mce, at); 2309 return at; 2310} 2311 2312static 2313IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2314{ 2315 IRAtom* at; 2316 at = mkUifUV128(mce, vatom1, vatom2); 2317 at = mkPCast32x4(mce, at); 2318 return at; 2319} 2320 2321static 2322IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2323{ 2324 IRAtom* at; 2325 at = mkUifUV128(mce, vatom1, vatom2); 2326 at = mkPCast64x2(mce, at); 2327 return at; 2328} 2329 2330/* --- 64-bit versions --- */ 2331 2332static 2333IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2334{ 2335 IRAtom* at; 2336 at = mkUifU64(mce, vatom1, vatom2); 2337 at = mkPCast8x8(mce, at); 2338 return at; 2339} 2340 2341static 2342IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2343{ 2344 IRAtom* at; 2345 at = mkUifU64(mce, vatom1, vatom2); 2346 at = mkPCast16x4(mce, at); 2347 return at; 2348} 2349 2350static 2351IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2352{ 2353 IRAtom* at; 2354 at = mkUifU64(mce, vatom1, vatom2); 2355 at = mkPCast32x2(mce, at); 2356 return at; 2357} 2358 2359static 2360IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2361{ 2362 IRAtom* at; 2363 at = mkUifU64(mce, vatom1, vatom2); 2364 at = mkPCastTo(mce, Ity_I64, at); 2365 return at; 2366} 2367 2368/* --- 32-bit versions --- */ 2369 2370static 2371IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2372{ 2373 IRAtom* at; 2374 at = mkUifU32(mce, vatom1, vatom2); 2375 at = mkPCast8x4(mce, at); 2376 return at; 2377} 2378 2379static 2380IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2381{ 2382 IRAtom* at; 2383 at = mkUifU32(mce, vatom1, vatom2); 2384 at = mkPCast16x2(mce, at); 2385 return at; 2386} 2387 2388 2389/*------------------------------------------------------------*/ 2390/*--- Generate shadow values from all kinds of IRExprs. ---*/ 2391/*------------------------------------------------------------*/ 2392 2393static 2394IRAtom* expr2vbits_Qop ( MCEnv* mce, 2395 IROp op, 2396 IRAtom* atom1, IRAtom* atom2, 2397 IRAtom* atom3, IRAtom* atom4 ) 2398{ 2399 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2400 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2401 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2402 IRAtom* vatom4 = expr2vbits( mce, atom4 ); 2403 2404 tl_assert(isOriginalAtom(mce,atom1)); 2405 tl_assert(isOriginalAtom(mce,atom2)); 2406 tl_assert(isOriginalAtom(mce,atom3)); 2407 tl_assert(isOriginalAtom(mce,atom4)); 2408 tl_assert(isShadowAtom(mce,vatom1)); 2409 tl_assert(isShadowAtom(mce,vatom2)); 2410 tl_assert(isShadowAtom(mce,vatom3)); 2411 tl_assert(isShadowAtom(mce,vatom4)); 2412 tl_assert(sameKindedAtoms(atom1,vatom1)); 2413 tl_assert(sameKindedAtoms(atom2,vatom2)); 2414 tl_assert(sameKindedAtoms(atom3,vatom3)); 2415 tl_assert(sameKindedAtoms(atom4,vatom4)); 2416 switch (op) { 2417 case Iop_MAddF64: 2418 case Iop_MAddF64r32: 2419 case Iop_MSubF64: 2420 case Iop_MSubF64r32: 2421 /* I32(rm) x F64 x F64 x F64 -> F64 */ 2422 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4); 2423 2424 case Iop_MAddF32: 2425 case Iop_MSubF32: 2426 /* I32(rm) x F32 x F32 x F32 -> F32 */ 2427 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4); 2428 2429 /* V256-bit data-steering */ 2430 case Iop_64x4toV256: 2431 return assignNew('V', mce, Ity_V256, 2432 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4)); 2433 2434 default: 2435 ppIROp(op); 2436 VG_(tool_panic)("memcheck:expr2vbits_Qop"); 2437 } 2438} 2439 2440 2441static 2442IRAtom* expr2vbits_Triop ( MCEnv* mce, 2443 IROp op, 2444 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 ) 2445{ 2446 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2447 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2448 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2449 2450 tl_assert(isOriginalAtom(mce,atom1)); 2451 tl_assert(isOriginalAtom(mce,atom2)); 2452 tl_assert(isOriginalAtom(mce,atom3)); 2453 tl_assert(isShadowAtom(mce,vatom1)); 2454 tl_assert(isShadowAtom(mce,vatom2)); 2455 tl_assert(isShadowAtom(mce,vatom3)); 2456 tl_assert(sameKindedAtoms(atom1,vatom1)); 2457 tl_assert(sameKindedAtoms(atom2,vatom2)); 2458 tl_assert(sameKindedAtoms(atom3,vatom3)); 2459 switch (op) { 2460 case Iop_AddF128: 2461 case Iop_AddD128: 2462 case Iop_SubF128: 2463 case Iop_SubD128: 2464 case Iop_MulF128: 2465 case Iop_MulD128: 2466 case Iop_DivF128: 2467 case Iop_DivD128: 2468 case Iop_QuantizeD128: 2469 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */ 2470 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2471 case Iop_AddF64: 2472 case Iop_AddD64: 2473 case Iop_AddF64r32: 2474 case Iop_SubF64: 2475 case Iop_SubD64: 2476 case Iop_SubF64r32: 2477 case Iop_MulF64: 2478 case Iop_MulD64: 2479 case Iop_MulF64r32: 2480 case Iop_DivF64: 2481 case Iop_DivD64: 2482 case Iop_DivF64r32: 2483 case Iop_ScaleF64: 2484 case Iop_Yl2xF64: 2485 case Iop_Yl2xp1F64: 2486 case Iop_AtanF64: 2487 case Iop_PRemF64: 2488 case Iop_PRem1F64: 2489 case Iop_QuantizeD64: 2490 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */ 2491 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2492 case Iop_PRemC3210F64: 2493 case Iop_PRem1C3210F64: 2494 /* I32(rm) x F64 x F64 -> I32 */ 2495 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2496 case Iop_AddF32: 2497 case Iop_SubF32: 2498 case Iop_MulF32: 2499 case Iop_DivF32: 2500 /* I32(rm) x F32 x F32 -> I32 */ 2501 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2502 case Iop_SignificanceRoundD64: 2503 /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */ 2504 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2505 case Iop_SignificanceRoundD128: 2506 /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */ 2507 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2508 case Iop_ExtractV128: 2509 complainIfUndefined(mce, atom3, NULL); 2510 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2511 case Iop_Extract64: 2512 complainIfUndefined(mce, atom3, NULL); 2513 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3)); 2514 case Iop_SetElem8x8: 2515 case Iop_SetElem16x4: 2516 case Iop_SetElem32x2: 2517 complainIfUndefined(mce, atom2, NULL); 2518 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3)); 2519 default: 2520 ppIROp(op); 2521 VG_(tool_panic)("memcheck:expr2vbits_Triop"); 2522 } 2523} 2524 2525 2526static 2527IRAtom* expr2vbits_Binop ( MCEnv* mce, 2528 IROp op, 2529 IRAtom* atom1, IRAtom* atom2 ) 2530{ 2531 IRType and_or_ty; 2532 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 2533 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 2534 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 2535 2536 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2537 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2538 2539 tl_assert(isOriginalAtom(mce,atom1)); 2540 tl_assert(isOriginalAtom(mce,atom2)); 2541 tl_assert(isShadowAtom(mce,vatom1)); 2542 tl_assert(isShadowAtom(mce,vatom2)); 2543 tl_assert(sameKindedAtoms(atom1,vatom1)); 2544 tl_assert(sameKindedAtoms(atom2,vatom2)); 2545 switch (op) { 2546 2547 /* 32-bit SIMD */ 2548 2549 case Iop_Add16x2: 2550 case Iop_HAdd16Ux2: 2551 case Iop_HAdd16Sx2: 2552 case Iop_Sub16x2: 2553 case Iop_HSub16Ux2: 2554 case Iop_HSub16Sx2: 2555 case Iop_QAdd16Sx2: 2556 case Iop_QSub16Sx2: 2557 return binary16Ix2(mce, vatom1, vatom2); 2558 2559 case Iop_Add8x4: 2560 case Iop_HAdd8Ux4: 2561 case Iop_HAdd8Sx4: 2562 case Iop_Sub8x4: 2563 case Iop_HSub8Ux4: 2564 case Iop_HSub8Sx4: 2565 case Iop_QSub8Ux4: 2566 case Iop_QAdd8Ux4: 2567 case Iop_QSub8Sx4: 2568 case Iop_QAdd8Sx4: 2569 return binary8Ix4(mce, vatom1, vatom2); 2570 2571 /* 64-bit SIMD */ 2572 2573 case Iop_ShrN8x8: 2574 case Iop_ShrN16x4: 2575 case Iop_ShrN32x2: 2576 case Iop_SarN8x8: 2577 case Iop_SarN16x4: 2578 case Iop_SarN32x2: 2579 case Iop_ShlN16x4: 2580 case Iop_ShlN32x2: 2581 case Iop_ShlN8x8: 2582 /* Same scheme as with all other shifts. */ 2583 complainIfUndefined(mce, atom2, NULL); 2584 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 2585 2586 case Iop_QNarrowBin32Sto16Sx4: 2587 case Iop_QNarrowBin16Sto8Sx8: 2588 case Iop_QNarrowBin16Sto8Ux8: 2589 return vectorNarrowBin64(mce, op, vatom1, vatom2); 2590 2591 case Iop_Min8Ux8: 2592 case Iop_Min8Sx8: 2593 case Iop_Max8Ux8: 2594 case Iop_Max8Sx8: 2595 case Iop_Avg8Ux8: 2596 case Iop_QSub8Sx8: 2597 case Iop_QSub8Ux8: 2598 case Iop_Sub8x8: 2599 case Iop_CmpGT8Sx8: 2600 case Iop_CmpGT8Ux8: 2601 case Iop_CmpEQ8x8: 2602 case Iop_QAdd8Sx8: 2603 case Iop_QAdd8Ux8: 2604 case Iop_QSal8x8: 2605 case Iop_QShl8x8: 2606 case Iop_Add8x8: 2607 case Iop_Mul8x8: 2608 case Iop_PolynomialMul8x8: 2609 return binary8Ix8(mce, vatom1, vatom2); 2610 2611 case Iop_Min16Sx4: 2612 case Iop_Min16Ux4: 2613 case Iop_Max16Sx4: 2614 case Iop_Max16Ux4: 2615 case Iop_Avg16Ux4: 2616 case Iop_QSub16Ux4: 2617 case Iop_QSub16Sx4: 2618 case Iop_Sub16x4: 2619 case Iop_Mul16x4: 2620 case Iop_MulHi16Sx4: 2621 case Iop_MulHi16Ux4: 2622 case Iop_CmpGT16Sx4: 2623 case Iop_CmpGT16Ux4: 2624 case Iop_CmpEQ16x4: 2625 case Iop_QAdd16Sx4: 2626 case Iop_QAdd16Ux4: 2627 case Iop_QSal16x4: 2628 case Iop_QShl16x4: 2629 case Iop_Add16x4: 2630 case Iop_QDMulHi16Sx4: 2631 case Iop_QRDMulHi16Sx4: 2632 return binary16Ix4(mce, vatom1, vatom2); 2633 2634 case Iop_Sub32x2: 2635 case Iop_Mul32x2: 2636 case Iop_Max32Sx2: 2637 case Iop_Max32Ux2: 2638 case Iop_Min32Sx2: 2639 case Iop_Min32Ux2: 2640 case Iop_CmpGT32Sx2: 2641 case Iop_CmpGT32Ux2: 2642 case Iop_CmpEQ32x2: 2643 case Iop_Add32x2: 2644 case Iop_QAdd32Ux2: 2645 case Iop_QAdd32Sx2: 2646 case Iop_QSub32Ux2: 2647 case Iop_QSub32Sx2: 2648 case Iop_QSal32x2: 2649 case Iop_QShl32x2: 2650 case Iop_QDMulHi32Sx2: 2651 case Iop_QRDMulHi32Sx2: 2652 return binary32Ix2(mce, vatom1, vatom2); 2653 2654 case Iop_QSub64Ux1: 2655 case Iop_QSub64Sx1: 2656 case Iop_QAdd64Ux1: 2657 case Iop_QAdd64Sx1: 2658 case Iop_QSal64x1: 2659 case Iop_QShl64x1: 2660 case Iop_Sal64x1: 2661 return binary64Ix1(mce, vatom1, vatom2); 2662 2663 case Iop_QShlN8Sx8: 2664 case Iop_QShlN8x8: 2665 case Iop_QSalN8x8: 2666 complainIfUndefined(mce, atom2, NULL); 2667 return mkPCast8x8(mce, vatom1); 2668 2669 case Iop_QShlN16Sx4: 2670 case Iop_QShlN16x4: 2671 case Iop_QSalN16x4: 2672 complainIfUndefined(mce, atom2, NULL); 2673 return mkPCast16x4(mce, vatom1); 2674 2675 case Iop_QShlN32Sx2: 2676 case Iop_QShlN32x2: 2677 case Iop_QSalN32x2: 2678 complainIfUndefined(mce, atom2, NULL); 2679 return mkPCast32x2(mce, vatom1); 2680 2681 case Iop_QShlN64Sx1: 2682 case Iop_QShlN64x1: 2683 case Iop_QSalN64x1: 2684 complainIfUndefined(mce, atom2, NULL); 2685 return mkPCast32x2(mce, vatom1); 2686 2687 case Iop_PwMax32Sx2: 2688 case Iop_PwMax32Ux2: 2689 case Iop_PwMin32Sx2: 2690 case Iop_PwMin32Ux2: 2691 case Iop_PwMax32Fx2: 2692 case Iop_PwMin32Fx2: 2693 return assignNew('V', mce, Ity_I64, 2694 binop(Iop_PwMax32Ux2, 2695 mkPCast32x2(mce, vatom1), 2696 mkPCast32x2(mce, vatom2))); 2697 2698 case Iop_PwMax16Sx4: 2699 case Iop_PwMax16Ux4: 2700 case Iop_PwMin16Sx4: 2701 case Iop_PwMin16Ux4: 2702 return assignNew('V', mce, Ity_I64, 2703 binop(Iop_PwMax16Ux4, 2704 mkPCast16x4(mce, vatom1), 2705 mkPCast16x4(mce, vatom2))); 2706 2707 case Iop_PwMax8Sx8: 2708 case Iop_PwMax8Ux8: 2709 case Iop_PwMin8Sx8: 2710 case Iop_PwMin8Ux8: 2711 return assignNew('V', mce, Ity_I64, 2712 binop(Iop_PwMax8Ux8, 2713 mkPCast8x8(mce, vatom1), 2714 mkPCast8x8(mce, vatom2))); 2715 2716 case Iop_PwAdd32x2: 2717 case Iop_PwAdd32Fx2: 2718 return mkPCast32x2(mce, 2719 assignNew('V', mce, Ity_I64, 2720 binop(Iop_PwAdd32x2, 2721 mkPCast32x2(mce, vatom1), 2722 mkPCast32x2(mce, vatom2)))); 2723 2724 case Iop_PwAdd16x4: 2725 return mkPCast16x4(mce, 2726 assignNew('V', mce, Ity_I64, 2727 binop(op, mkPCast16x4(mce, vatom1), 2728 mkPCast16x4(mce, vatom2)))); 2729 2730 case Iop_PwAdd8x8: 2731 return mkPCast8x8(mce, 2732 assignNew('V', mce, Ity_I64, 2733 binop(op, mkPCast8x8(mce, vatom1), 2734 mkPCast8x8(mce, vatom2)))); 2735 2736 case Iop_Shl8x8: 2737 case Iop_Shr8x8: 2738 case Iop_Sar8x8: 2739 case Iop_Sal8x8: 2740 return mkUifU64(mce, 2741 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2742 mkPCast8x8(mce,vatom2) 2743 ); 2744 2745 case Iop_Shl16x4: 2746 case Iop_Shr16x4: 2747 case Iop_Sar16x4: 2748 case Iop_Sal16x4: 2749 return mkUifU64(mce, 2750 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2751 mkPCast16x4(mce,vatom2) 2752 ); 2753 2754 case Iop_Shl32x2: 2755 case Iop_Shr32x2: 2756 case Iop_Sar32x2: 2757 case Iop_Sal32x2: 2758 return mkUifU64(mce, 2759 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2760 mkPCast32x2(mce,vatom2) 2761 ); 2762 2763 /* 64-bit data-steering */ 2764 case Iop_InterleaveLO32x2: 2765 case Iop_InterleaveLO16x4: 2766 case Iop_InterleaveLO8x8: 2767 case Iop_InterleaveHI32x2: 2768 case Iop_InterleaveHI16x4: 2769 case Iop_InterleaveHI8x8: 2770 case Iop_CatOddLanes8x8: 2771 case Iop_CatEvenLanes8x8: 2772 case Iop_CatOddLanes16x4: 2773 case Iop_CatEvenLanes16x4: 2774 case Iop_InterleaveOddLanes8x8: 2775 case Iop_InterleaveEvenLanes8x8: 2776 case Iop_InterleaveOddLanes16x4: 2777 case Iop_InterleaveEvenLanes16x4: 2778 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 2779 2780 case Iop_GetElem8x8: 2781 complainIfUndefined(mce, atom2, NULL); 2782 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 2783 case Iop_GetElem16x4: 2784 complainIfUndefined(mce, atom2, NULL); 2785 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 2786 case Iop_GetElem32x2: 2787 complainIfUndefined(mce, atom2, NULL); 2788 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 2789 2790 /* Perm8x8: rearrange values in left arg using steering values 2791 from right arg. So rearrange the vbits in the same way but 2792 pessimise wrt steering values. */ 2793 case Iop_Perm8x8: 2794 return mkUifU64( 2795 mce, 2796 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2797 mkPCast8x8(mce, vatom2) 2798 ); 2799 2800 /* V128-bit SIMD */ 2801 2802 case Iop_ShrN8x16: 2803 case Iop_ShrN16x8: 2804 case Iop_ShrN32x4: 2805 case Iop_ShrN64x2: 2806 case Iop_SarN8x16: 2807 case Iop_SarN16x8: 2808 case Iop_SarN32x4: 2809 case Iop_SarN64x2: 2810 case Iop_ShlN8x16: 2811 case Iop_ShlN16x8: 2812 case Iop_ShlN32x4: 2813 case Iop_ShlN64x2: 2814 /* Same scheme as with all other shifts. Note: 22 Oct 05: 2815 this is wrong now, scalar shifts are done properly lazily. 2816 Vector shifts should be fixed too. */ 2817 complainIfUndefined(mce, atom2, NULL); 2818 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 2819 2820 /* V x V shifts/rotates are done using the standard lazy scheme. */ 2821 case Iop_Shl8x16: 2822 case Iop_Shr8x16: 2823 case Iop_Sar8x16: 2824 case Iop_Sal8x16: 2825 case Iop_Rol8x16: 2826 return mkUifUV128(mce, 2827 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2828 mkPCast8x16(mce,vatom2) 2829 ); 2830 2831 case Iop_Shl16x8: 2832 case Iop_Shr16x8: 2833 case Iop_Sar16x8: 2834 case Iop_Sal16x8: 2835 case Iop_Rol16x8: 2836 return mkUifUV128(mce, 2837 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2838 mkPCast16x8(mce,vatom2) 2839 ); 2840 2841 case Iop_Shl32x4: 2842 case Iop_Shr32x4: 2843 case Iop_Sar32x4: 2844 case Iop_Sal32x4: 2845 case Iop_Rol32x4: 2846 return mkUifUV128(mce, 2847 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2848 mkPCast32x4(mce,vatom2) 2849 ); 2850 2851 case Iop_Shl64x2: 2852 case Iop_Shr64x2: 2853 case Iop_Sar64x2: 2854 case Iop_Sal64x2: 2855 return mkUifUV128(mce, 2856 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2857 mkPCast64x2(mce,vatom2) 2858 ); 2859 2860 case Iop_F32ToFixed32Ux4_RZ: 2861 case Iop_F32ToFixed32Sx4_RZ: 2862 case Iop_Fixed32UToF32x4_RN: 2863 case Iop_Fixed32SToF32x4_RN: 2864 complainIfUndefined(mce, atom2, NULL); 2865 return mkPCast32x4(mce, vatom1); 2866 2867 case Iop_F32ToFixed32Ux2_RZ: 2868 case Iop_F32ToFixed32Sx2_RZ: 2869 case Iop_Fixed32UToF32x2_RN: 2870 case Iop_Fixed32SToF32x2_RN: 2871 complainIfUndefined(mce, atom2, NULL); 2872 return mkPCast32x2(mce, vatom1); 2873 2874 case Iop_QSub8Ux16: 2875 case Iop_QSub8Sx16: 2876 case Iop_Sub8x16: 2877 case Iop_Min8Ux16: 2878 case Iop_Min8Sx16: 2879 case Iop_Max8Ux16: 2880 case Iop_Max8Sx16: 2881 case Iop_CmpGT8Sx16: 2882 case Iop_CmpGT8Ux16: 2883 case Iop_CmpEQ8x16: 2884 case Iop_Avg8Ux16: 2885 case Iop_Avg8Sx16: 2886 case Iop_QAdd8Ux16: 2887 case Iop_QAdd8Sx16: 2888 case Iop_QSal8x16: 2889 case Iop_QShl8x16: 2890 case Iop_Add8x16: 2891 case Iop_Mul8x16: 2892 case Iop_PolynomialMul8x16: 2893 return binary8Ix16(mce, vatom1, vatom2); 2894 2895 case Iop_QSub16Ux8: 2896 case Iop_QSub16Sx8: 2897 case Iop_Sub16x8: 2898 case Iop_Mul16x8: 2899 case Iop_MulHi16Sx8: 2900 case Iop_MulHi16Ux8: 2901 case Iop_Min16Sx8: 2902 case Iop_Min16Ux8: 2903 case Iop_Max16Sx8: 2904 case Iop_Max16Ux8: 2905 case Iop_CmpGT16Sx8: 2906 case Iop_CmpGT16Ux8: 2907 case Iop_CmpEQ16x8: 2908 case Iop_Avg16Ux8: 2909 case Iop_Avg16Sx8: 2910 case Iop_QAdd16Ux8: 2911 case Iop_QAdd16Sx8: 2912 case Iop_QSal16x8: 2913 case Iop_QShl16x8: 2914 case Iop_Add16x8: 2915 case Iop_QDMulHi16Sx8: 2916 case Iop_QRDMulHi16Sx8: 2917 return binary16Ix8(mce, vatom1, vatom2); 2918 2919 case Iop_Sub32x4: 2920 case Iop_CmpGT32Sx4: 2921 case Iop_CmpGT32Ux4: 2922 case Iop_CmpEQ32x4: 2923 case Iop_QAdd32Sx4: 2924 case Iop_QAdd32Ux4: 2925 case Iop_QSub32Sx4: 2926 case Iop_QSub32Ux4: 2927 case Iop_QSal32x4: 2928 case Iop_QShl32x4: 2929 case Iop_Avg32Ux4: 2930 case Iop_Avg32Sx4: 2931 case Iop_Add32x4: 2932 case Iop_Max32Ux4: 2933 case Iop_Max32Sx4: 2934 case Iop_Min32Ux4: 2935 case Iop_Min32Sx4: 2936 case Iop_Mul32x4: 2937 case Iop_QDMulHi32Sx4: 2938 case Iop_QRDMulHi32Sx4: 2939 return binary32Ix4(mce, vatom1, vatom2); 2940 2941 case Iop_Sub64x2: 2942 case Iop_Add64x2: 2943 case Iop_CmpEQ64x2: 2944 case Iop_CmpGT64Sx2: 2945 case Iop_QSal64x2: 2946 case Iop_QShl64x2: 2947 case Iop_QAdd64Ux2: 2948 case Iop_QAdd64Sx2: 2949 case Iop_QSub64Ux2: 2950 case Iop_QSub64Sx2: 2951 return binary64Ix2(mce, vatom1, vatom2); 2952 2953 case Iop_QNarrowBin32Sto16Sx8: 2954 case Iop_QNarrowBin32Uto16Ux8: 2955 case Iop_QNarrowBin32Sto16Ux8: 2956 case Iop_QNarrowBin16Sto8Sx16: 2957 case Iop_QNarrowBin16Uto8Ux16: 2958 case Iop_QNarrowBin16Sto8Ux16: 2959 return vectorNarrowBinV128(mce, op, vatom1, vatom2); 2960 2961 case Iop_Sub64Fx2: 2962 case Iop_Mul64Fx2: 2963 case Iop_Min64Fx2: 2964 case Iop_Max64Fx2: 2965 case Iop_Div64Fx2: 2966 case Iop_CmpLT64Fx2: 2967 case Iop_CmpLE64Fx2: 2968 case Iop_CmpEQ64Fx2: 2969 case Iop_CmpUN64Fx2: 2970 case Iop_Add64Fx2: 2971 return binary64Fx2(mce, vatom1, vatom2); 2972 2973 case Iop_Sub64F0x2: 2974 case Iop_Mul64F0x2: 2975 case Iop_Min64F0x2: 2976 case Iop_Max64F0x2: 2977 case Iop_Div64F0x2: 2978 case Iop_CmpLT64F0x2: 2979 case Iop_CmpLE64F0x2: 2980 case Iop_CmpEQ64F0x2: 2981 case Iop_CmpUN64F0x2: 2982 case Iop_Add64F0x2: 2983 return binary64F0x2(mce, vatom1, vatom2); 2984 2985 case Iop_Sub32Fx4: 2986 case Iop_Mul32Fx4: 2987 case Iop_Min32Fx4: 2988 case Iop_Max32Fx4: 2989 case Iop_Div32Fx4: 2990 case Iop_CmpLT32Fx4: 2991 case Iop_CmpLE32Fx4: 2992 case Iop_CmpEQ32Fx4: 2993 case Iop_CmpUN32Fx4: 2994 case Iop_CmpGT32Fx4: 2995 case Iop_CmpGE32Fx4: 2996 case Iop_Add32Fx4: 2997 case Iop_Recps32Fx4: 2998 case Iop_Rsqrts32Fx4: 2999 return binary32Fx4(mce, vatom1, vatom2); 3000 3001 case Iop_Sub32Fx2: 3002 case Iop_Mul32Fx2: 3003 case Iop_Min32Fx2: 3004 case Iop_Max32Fx2: 3005 case Iop_CmpEQ32Fx2: 3006 case Iop_CmpGT32Fx2: 3007 case Iop_CmpGE32Fx2: 3008 case Iop_Add32Fx2: 3009 case Iop_Recps32Fx2: 3010 case Iop_Rsqrts32Fx2: 3011 return binary32Fx2(mce, vatom1, vatom2); 3012 3013 case Iop_Sub32F0x4: 3014 case Iop_Mul32F0x4: 3015 case Iop_Min32F0x4: 3016 case Iop_Max32F0x4: 3017 case Iop_Div32F0x4: 3018 case Iop_CmpLT32F0x4: 3019 case Iop_CmpLE32F0x4: 3020 case Iop_CmpEQ32F0x4: 3021 case Iop_CmpUN32F0x4: 3022 case Iop_Add32F0x4: 3023 return binary32F0x4(mce, vatom1, vatom2); 3024 3025 case Iop_QShlN8Sx16: 3026 case Iop_QShlN8x16: 3027 case Iop_QSalN8x16: 3028 complainIfUndefined(mce, atom2, NULL); 3029 return mkPCast8x16(mce, vatom1); 3030 3031 case Iop_QShlN16Sx8: 3032 case Iop_QShlN16x8: 3033 case Iop_QSalN16x8: 3034 complainIfUndefined(mce, atom2, NULL); 3035 return mkPCast16x8(mce, vatom1); 3036 3037 case Iop_QShlN32Sx4: 3038 case Iop_QShlN32x4: 3039 case Iop_QSalN32x4: 3040 complainIfUndefined(mce, atom2, NULL); 3041 return mkPCast32x4(mce, vatom1); 3042 3043 case Iop_QShlN64Sx2: 3044 case Iop_QShlN64x2: 3045 case Iop_QSalN64x2: 3046 complainIfUndefined(mce, atom2, NULL); 3047 return mkPCast32x4(mce, vatom1); 3048 3049 case Iop_Mull32Sx2: 3050 case Iop_Mull32Ux2: 3051 case Iop_QDMulLong32Sx2: 3052 return vectorWidenI64(mce, Iop_Widen32Sto64x2, 3053 mkUifU64(mce, vatom1, vatom2)); 3054 3055 case Iop_Mull16Sx4: 3056 case Iop_Mull16Ux4: 3057 case Iop_QDMulLong16Sx4: 3058 return vectorWidenI64(mce, Iop_Widen16Sto32x4, 3059 mkUifU64(mce, vatom1, vatom2)); 3060 3061 case Iop_Mull8Sx8: 3062 case Iop_Mull8Ux8: 3063 case Iop_PolynomialMull8x8: 3064 return vectorWidenI64(mce, Iop_Widen8Sto16x8, 3065 mkUifU64(mce, vatom1, vatom2)); 3066 3067 case Iop_PwAdd32x4: 3068 return mkPCast32x4(mce, 3069 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1), 3070 mkPCast32x4(mce, vatom2)))); 3071 3072 case Iop_PwAdd16x8: 3073 return mkPCast16x8(mce, 3074 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1), 3075 mkPCast16x8(mce, vatom2)))); 3076 3077 case Iop_PwAdd8x16: 3078 return mkPCast8x16(mce, 3079 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1), 3080 mkPCast8x16(mce, vatom2)))); 3081 3082 /* V128-bit data-steering */ 3083 case Iop_SetV128lo32: 3084 case Iop_SetV128lo64: 3085 case Iop_64HLtoV128: 3086 case Iop_InterleaveLO64x2: 3087 case Iop_InterleaveLO32x4: 3088 case Iop_InterleaveLO16x8: 3089 case Iop_InterleaveLO8x16: 3090 case Iop_InterleaveHI64x2: 3091 case Iop_InterleaveHI32x4: 3092 case Iop_InterleaveHI16x8: 3093 case Iop_InterleaveHI8x16: 3094 case Iop_CatOddLanes8x16: 3095 case Iop_CatOddLanes16x8: 3096 case Iop_CatOddLanes32x4: 3097 case Iop_CatEvenLanes8x16: 3098 case Iop_CatEvenLanes16x8: 3099 case Iop_CatEvenLanes32x4: 3100 case Iop_InterleaveOddLanes8x16: 3101 case Iop_InterleaveOddLanes16x8: 3102 case Iop_InterleaveOddLanes32x4: 3103 case Iop_InterleaveEvenLanes8x16: 3104 case Iop_InterleaveEvenLanes16x8: 3105 case Iop_InterleaveEvenLanes32x4: 3106 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2)); 3107 3108 case Iop_GetElem8x16: 3109 complainIfUndefined(mce, atom2, NULL); 3110 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 3111 case Iop_GetElem16x8: 3112 complainIfUndefined(mce, atom2, NULL); 3113 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 3114 case Iop_GetElem32x4: 3115 complainIfUndefined(mce, atom2, NULL); 3116 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 3117 case Iop_GetElem64x2: 3118 complainIfUndefined(mce, atom2, NULL); 3119 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 3120 3121 /* Perm8x16: rearrange values in left arg using steering values 3122 from right arg. So rearrange the vbits in the same way but 3123 pessimise wrt steering values. Perm32x4 ditto. */ 3124 case Iop_Perm8x16: 3125 return mkUifUV128( 3126 mce, 3127 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3128 mkPCast8x16(mce, vatom2) 3129 ); 3130 case Iop_Perm32x4: 3131 return mkUifUV128( 3132 mce, 3133 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3134 mkPCast32x4(mce, vatom2) 3135 ); 3136 3137 /* These two take the lower half of each 16-bit lane, sign/zero 3138 extend it to 32, and multiply together, producing a 32x4 3139 result (and implicitly ignoring half the operand bits). So 3140 treat it as a bunch of independent 16x8 operations, but then 3141 do 32-bit shifts left-right to copy the lower half results 3142 (which are all 0s or all 1s due to PCasting in binary16Ix8) 3143 into the upper half of each result lane. */ 3144 case Iop_MullEven16Ux8: 3145 case Iop_MullEven16Sx8: { 3146 IRAtom* at; 3147 at = binary16Ix8(mce,vatom1,vatom2); 3148 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16))); 3149 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16))); 3150 return at; 3151 } 3152 3153 /* Same deal as Iop_MullEven16{S,U}x8 */ 3154 case Iop_MullEven8Ux16: 3155 case Iop_MullEven8Sx16: { 3156 IRAtom* at; 3157 at = binary8Ix16(mce,vatom1,vatom2); 3158 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8))); 3159 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8))); 3160 return at; 3161 } 3162 3163 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x 3164 32x4 -> 16x8 laneage, discarding the upper half of each lane. 3165 Simply apply same op to the V bits, since this really no more 3166 than a data steering operation. */ 3167 case Iop_NarrowBin32to16x8: 3168 case Iop_NarrowBin16to8x16: 3169 return assignNew('V', mce, Ity_V128, 3170 binop(op, vatom1, vatom2)); 3171 3172 case Iop_ShrV128: 3173 case Iop_ShlV128: 3174 /* Same scheme as with all other shifts. Note: 10 Nov 05: 3175 this is wrong now, scalar shifts are done properly lazily. 3176 Vector shifts should be fixed too. */ 3177 complainIfUndefined(mce, atom2, NULL); 3178 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3179 3180 /* I128-bit data-steering */ 3181 case Iop_64HLto128: 3182 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2)); 3183 3184 /* V256-bit SIMD */ 3185 3186 case Iop_Add64Fx4: 3187 case Iop_Sub64Fx4: 3188 case Iop_Mul64Fx4: 3189 case Iop_Div64Fx4: 3190 case Iop_Max64Fx4: 3191 case Iop_Min64Fx4: 3192 return binary64Fx4(mce, vatom1, vatom2); 3193 3194 case Iop_Add32Fx8: 3195 case Iop_Sub32Fx8: 3196 case Iop_Mul32Fx8: 3197 case Iop_Div32Fx8: 3198 case Iop_Max32Fx8: 3199 case Iop_Min32Fx8: 3200 return binary32Fx8(mce, vatom1, vatom2); 3201 3202 /* V256-bit data-steering */ 3203 case Iop_V128HLtoV256: 3204 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2)); 3205 3206 /* Scalar floating point */ 3207 3208 case Iop_F32toI64S: 3209 /* I32(rm) x F32 -> I64 */ 3210 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3211 3212 case Iop_I64StoF32: 3213 /* I32(rm) x I64 -> F32 */ 3214 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3215 3216 case Iop_RoundF64toInt: 3217 case Iop_RoundF64toF32: 3218 case Iop_F64toI64S: 3219 case Iop_F64toI64U: 3220 case Iop_I64StoF64: 3221 case Iop_I64UtoF64: 3222 case Iop_SinF64: 3223 case Iop_CosF64: 3224 case Iop_TanF64: 3225 case Iop_2xm1F64: 3226 case Iop_SqrtF64: 3227 /* I32(rm) x I64/F64 -> I64/F64 */ 3228 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3229 3230 case Iop_ShlD64: 3231 case Iop_ShrD64: 3232 case Iop_RoundD64toInt: 3233 /* I32(DFP rm) x D64 -> D64 */ 3234 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3235 3236 case Iop_ShlD128: 3237 case Iop_ShrD128: 3238 case Iop_RoundD128toInt: 3239 /* I32(DFP rm) x D128 -> D128 */ 3240 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3241 3242 case Iop_D64toI64S: 3243 case Iop_I64StoD64: 3244 /* I64(DFP rm) x I64 -> D64 */ 3245 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3246 3247 case Iop_RoundF32toInt: 3248 case Iop_SqrtF32: 3249 /* I32(rm) x I32/F32 -> I32/F32 */ 3250 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3251 3252 case Iop_SqrtF128: 3253 /* I32(rm) x F128 -> F128 */ 3254 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3255 3256 case Iop_I32StoF32: 3257 case Iop_F32toI32S: 3258 /* First arg is I32 (rounding mode), second is F32/I32 (data). */ 3259 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3260 3261 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */ 3262 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */ 3263 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3264 3265 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */ 3266 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */ 3267 case Iop_D128toD64: /* IRRoundingModeDFP(I64) x D128 -> D64 */ 3268 case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64 */ 3269 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3270 3271 case Iop_F64HLtoF128: 3272 case Iop_D64HLtoD128: 3273 return assignNew('V', mce, Ity_I128, 3274 binop(Iop_64HLto128, vatom1, vatom2)); 3275 3276 case Iop_F64toI32U: 3277 case Iop_F64toI32S: 3278 case Iop_F64toF32: 3279 case Iop_I64UtoF32: 3280 /* First arg is I32 (rounding mode), second is F64 (data). */ 3281 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3282 3283 case Iop_D64toD32: 3284 /* First arg is I64 (DFProunding mode), second is D64 (data). */ 3285 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3286 3287 case Iop_F64toI16S: 3288 /* First arg is I32 (rounding mode), second is F64 (data). */ 3289 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 3290 3291 case Iop_InsertExpD64: 3292 /* I64 x I64 -> D64 */ 3293 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3294 3295 case Iop_InsertExpD128: 3296 /* I64 x I128 -> D128 */ 3297 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3298 3299 case Iop_CmpF32: 3300 case Iop_CmpF64: 3301 case Iop_CmpF128: 3302 case Iop_CmpD64: 3303 case Iop_CmpD128: 3304 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3305 3306 /* non-FP after here */ 3307 3308 case Iop_DivModU64to32: 3309 case Iop_DivModS64to32: 3310 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3311 3312 case Iop_DivModU128to64: 3313 case Iop_DivModS128to64: 3314 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3315 3316 case Iop_16HLto32: 3317 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2)); 3318 case Iop_32HLto64: 3319 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3320 3321 case Iop_DivModS64to64: 3322 case Iop_MullS64: 3323 case Iop_MullU64: { 3324 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3325 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64); 3326 return assignNew('V', mce, Ity_I128, 3327 binop(Iop_64HLto128, vHi64, vLo64)); 3328 } 3329 3330 case Iop_MullS32: 3331 case Iop_MullU32: { 3332 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3333 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 3334 return assignNew('V', mce, Ity_I64, 3335 binop(Iop_32HLto64, vHi32, vLo32)); 3336 } 3337 3338 case Iop_MullS16: 3339 case Iop_MullU16: { 3340 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3341 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 3342 return assignNew('V', mce, Ity_I32, 3343 binop(Iop_16HLto32, vHi16, vLo16)); 3344 } 3345 3346 case Iop_MullS8: 3347 case Iop_MullU8: { 3348 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3349 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 3350 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 3351 } 3352 3353 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */ 3354 case Iop_DivS32: 3355 case Iop_DivU32: 3356 case Iop_DivU32E: 3357 case Iop_DivS32E: 3358 case Iop_QAdd32S: /* could probably do better */ 3359 case Iop_QSub32S: /* could probably do better */ 3360 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3361 3362 case Iop_DivS64: 3363 case Iop_DivU64: 3364 case Iop_DivS64E: 3365 case Iop_DivU64E: 3366 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3367 3368 case Iop_Add32: 3369 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3370 return expensiveAddSub(mce,True,Ity_I32, 3371 vatom1,vatom2, atom1,atom2); 3372 else 3373 goto cheap_AddSub32; 3374 case Iop_Sub32: 3375 if (mce->bogusLiterals) 3376 return expensiveAddSub(mce,False,Ity_I32, 3377 vatom1,vatom2, atom1,atom2); 3378 else 3379 goto cheap_AddSub32; 3380 3381 cheap_AddSub32: 3382 case Iop_Mul32: 3383 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3384 3385 case Iop_CmpORD32S: 3386 case Iop_CmpORD32U: 3387 case Iop_CmpORD64S: 3388 case Iop_CmpORD64U: 3389 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2); 3390 3391 case Iop_Add64: 3392 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3393 return expensiveAddSub(mce,True,Ity_I64, 3394 vatom1,vatom2, atom1,atom2); 3395 else 3396 goto cheap_AddSub64; 3397 case Iop_Sub64: 3398 if (mce->bogusLiterals) 3399 return expensiveAddSub(mce,False,Ity_I64, 3400 vatom1,vatom2, atom1,atom2); 3401 else 3402 goto cheap_AddSub64; 3403 3404 cheap_AddSub64: 3405 case Iop_Mul64: 3406 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3407 3408 case Iop_Mul16: 3409 case Iop_Add16: 3410 case Iop_Sub16: 3411 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3412 3413 case Iop_Sub8: 3414 case Iop_Add8: 3415 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3416 3417 case Iop_CmpEQ64: 3418 case Iop_CmpNE64: 3419 if (mce->bogusLiterals) 3420 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 ); 3421 else 3422 goto cheap_cmp64; 3423 cheap_cmp64: 3424 case Iop_CmpLE64S: case Iop_CmpLE64U: 3425 case Iop_CmpLT64U: case Iop_CmpLT64S: 3426 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2)); 3427 3428 case Iop_CmpEQ32: 3429 case Iop_CmpNE32: 3430 if (mce->bogusLiterals) 3431 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 ); 3432 else 3433 goto cheap_cmp32; 3434 cheap_cmp32: 3435 case Iop_CmpLE32S: case Iop_CmpLE32U: 3436 case Iop_CmpLT32U: case Iop_CmpLT32S: 3437 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 3438 3439 case Iop_CmpEQ16: case Iop_CmpNE16: 3440 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 3441 3442 case Iop_CmpEQ8: case Iop_CmpNE8: 3443 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 3444 3445 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 3446 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 3447 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 3448 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 3449 /* Just say these all produce a defined result, regardless 3450 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */ 3451 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1)); 3452 3453 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: 3454 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 ); 3455 3456 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 3457 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 ); 3458 3459 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 3460 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 ); 3461 3462 case Iop_Shl8: case Iop_Shr8: 3463 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 ); 3464 3465 case Iop_AndV256: 3466 uifu = mkUifUV256; difd = mkDifDV256; 3467 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or; 3468 case Iop_AndV128: 3469 uifu = mkUifUV128; difd = mkDifDV128; 3470 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 3471 case Iop_And64: 3472 uifu = mkUifU64; difd = mkDifD64; 3473 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 3474 case Iop_And32: 3475 uifu = mkUifU32; difd = mkDifD32; 3476 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 3477 case Iop_And16: 3478 uifu = mkUifU16; difd = mkDifD16; 3479 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 3480 case Iop_And8: 3481 uifu = mkUifU8; difd = mkDifD8; 3482 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 3483 3484 case Iop_OrV256: 3485 uifu = mkUifUV256; difd = mkDifDV256; 3486 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or; 3487 case Iop_OrV128: 3488 uifu = mkUifUV128; difd = mkDifDV128; 3489 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 3490 case Iop_Or64: 3491 uifu = mkUifU64; difd = mkDifD64; 3492 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 3493 case Iop_Or32: 3494 uifu = mkUifU32; difd = mkDifD32; 3495 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 3496 case Iop_Or16: 3497 uifu = mkUifU16; difd = mkDifD16; 3498 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 3499 case Iop_Or8: 3500 uifu = mkUifU8; difd = mkDifD8; 3501 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 3502 3503 do_And_Or: 3504 return 3505 assignNew( 3506 'V', mce, 3507 and_or_ty, 3508 difd(mce, uifu(mce, vatom1, vatom2), 3509 difd(mce, improve(mce, atom1, vatom1), 3510 improve(mce, atom2, vatom2) ) ) ); 3511 3512 case Iop_Xor8: 3513 return mkUifU8(mce, vatom1, vatom2); 3514 case Iop_Xor16: 3515 return mkUifU16(mce, vatom1, vatom2); 3516 case Iop_Xor32: 3517 return mkUifU32(mce, vatom1, vatom2); 3518 case Iop_Xor64: 3519 return mkUifU64(mce, vatom1, vatom2); 3520 case Iop_XorV128: 3521 return mkUifUV128(mce, vatom1, vatom2); 3522 case Iop_XorV256: 3523 return mkUifUV256(mce, vatom1, vatom2); 3524 3525 default: 3526 ppIROp(op); 3527 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 3528 } 3529} 3530 3531 3532static 3533IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 3534{ 3535 IRAtom* vatom = expr2vbits( mce, atom ); 3536 tl_assert(isOriginalAtom(mce,atom)); 3537 switch (op) { 3538 3539 case Iop_Sqrt64Fx2: 3540 return unary64Fx2(mce, vatom); 3541 3542 case Iop_Sqrt64F0x2: 3543 return unary64F0x2(mce, vatom); 3544 3545 case Iop_Sqrt32Fx8: 3546 case Iop_RSqrt32Fx8: 3547 case Iop_Recip32Fx8: 3548 return unary32Fx8(mce, vatom); 3549 3550 case Iop_Sqrt64Fx4: 3551 return unary64Fx4(mce, vatom); 3552 3553 case Iop_Sqrt32Fx4: 3554 case Iop_RSqrt32Fx4: 3555 case Iop_Recip32Fx4: 3556 case Iop_I32UtoFx4: 3557 case Iop_I32StoFx4: 3558 case Iop_QFtoI32Ux4_RZ: 3559 case Iop_QFtoI32Sx4_RZ: 3560 case Iop_RoundF32x4_RM: 3561 case Iop_RoundF32x4_RP: 3562 case Iop_RoundF32x4_RN: 3563 case Iop_RoundF32x4_RZ: 3564 case Iop_Recip32x4: 3565 case Iop_Abs32Fx4: 3566 case Iop_Neg32Fx4: 3567 case Iop_Rsqrte32Fx4: 3568 return unary32Fx4(mce, vatom); 3569 3570 case Iop_I32UtoFx2: 3571 case Iop_I32StoFx2: 3572 case Iop_Recip32Fx2: 3573 case Iop_Recip32x2: 3574 case Iop_Abs32Fx2: 3575 case Iop_Neg32Fx2: 3576 case Iop_Rsqrte32Fx2: 3577 return unary32Fx2(mce, vatom); 3578 3579 case Iop_Sqrt32F0x4: 3580 case Iop_RSqrt32F0x4: 3581 case Iop_Recip32F0x4: 3582 return unary32F0x4(mce, vatom); 3583 3584 case Iop_32UtoV128: 3585 case Iop_64UtoV128: 3586 case Iop_Dup8x16: 3587 case Iop_Dup16x8: 3588 case Iop_Dup32x4: 3589 case Iop_Reverse16_8x16: 3590 case Iop_Reverse32_8x16: 3591 case Iop_Reverse32_16x8: 3592 case Iop_Reverse64_8x16: 3593 case Iop_Reverse64_16x8: 3594 case Iop_Reverse64_32x4: 3595 case Iop_V256toV128_1: case Iop_V256toV128_0: 3596 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 3597 3598 case Iop_F128HItoF64: /* F128 -> high half of F128 */ 3599 case Iop_D128HItoD64: /* D128 -> high half of D128 */ 3600 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom)); 3601 case Iop_F128LOtoF64: /* F128 -> low half of F128 */ 3602 case Iop_D128LOtoD64: /* D128 -> low half of D128 */ 3603 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom)); 3604 3605 case Iop_NegF128: 3606 case Iop_AbsF128: 3607 return mkPCastTo(mce, Ity_I128, vatom); 3608 3609 case Iop_I32StoF128: /* signed I32 -> F128 */ 3610 case Iop_I64StoF128: /* signed I64 -> F128 */ 3611 case Iop_F32toF128: /* F32 -> F128 */ 3612 case Iop_F64toF128: /* F64 -> F128 */ 3613 case Iop_I64StoD128: /* signed I64 -> D128 */ 3614 return mkPCastTo(mce, Ity_I128, vatom); 3615 3616 case Iop_F32toF64: 3617 case Iop_I32StoF64: 3618 case Iop_I32UtoF64: 3619 case Iop_NegF64: 3620 case Iop_AbsF64: 3621 case Iop_Est5FRSqrt: 3622 case Iop_RoundF64toF64_NEAREST: 3623 case Iop_RoundF64toF64_NegINF: 3624 case Iop_RoundF64toF64_PosINF: 3625 case Iop_RoundF64toF64_ZERO: 3626 case Iop_Clz64: 3627 case Iop_Ctz64: 3628 case Iop_D32toD64: 3629 case Iop_ExtractExpD64: /* D64 -> I64 */ 3630 case Iop_ExtractExpD128: /* D128 -> I64 */ 3631 return mkPCastTo(mce, Ity_I64, vatom); 3632 3633 case Iop_D64toD128: 3634 return mkPCastTo(mce, Ity_I128, vatom); 3635 3636 case Iop_Clz32: 3637 case Iop_Ctz32: 3638 case Iop_TruncF64asF32: 3639 case Iop_NegF32: 3640 case Iop_AbsF32: 3641 return mkPCastTo(mce, Ity_I32, vatom); 3642 3643 case Iop_1Uto64: 3644 case Iop_1Sto64: 3645 case Iop_8Uto64: 3646 case Iop_8Sto64: 3647 case Iop_16Uto64: 3648 case Iop_16Sto64: 3649 case Iop_32Sto64: 3650 case Iop_32Uto64: 3651 case Iop_V128to64: 3652 case Iop_V128HIto64: 3653 case Iop_128HIto64: 3654 case Iop_128to64: 3655 case Iop_Dup8x8: 3656 case Iop_Dup16x4: 3657 case Iop_Dup32x2: 3658 case Iop_Reverse16_8x8: 3659 case Iop_Reverse32_8x8: 3660 case Iop_Reverse32_16x4: 3661 case Iop_Reverse64_8x8: 3662 case Iop_Reverse64_16x4: 3663 case Iop_Reverse64_32x2: 3664 case Iop_V256to64_0: case Iop_V256to64_1: 3665 case Iop_V256to64_2: case Iop_V256to64_3: 3666 return assignNew('V', mce, Ity_I64, unop(op, vatom)); 3667 3668 case Iop_64to32: 3669 case Iop_64HIto32: 3670 case Iop_1Uto32: 3671 case Iop_1Sto32: 3672 case Iop_8Uto32: 3673 case Iop_16Uto32: 3674 case Iop_16Sto32: 3675 case Iop_8Sto32: 3676 case Iop_V128to32: 3677 return assignNew('V', mce, Ity_I32, unop(op, vatom)); 3678 3679 case Iop_8Sto16: 3680 case Iop_8Uto16: 3681 case Iop_32to16: 3682 case Iop_32HIto16: 3683 case Iop_64to16: 3684 return assignNew('V', mce, Ity_I16, unop(op, vatom)); 3685 3686 case Iop_1Uto8: 3687 case Iop_1Sto8: 3688 case Iop_16to8: 3689 case Iop_16HIto8: 3690 case Iop_32to8: 3691 case Iop_64to8: 3692 return assignNew('V', mce, Ity_I8, unop(op, vatom)); 3693 3694 case Iop_32to1: 3695 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom)); 3696 3697 case Iop_64to1: 3698 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom)); 3699 3700 case Iop_ReinterpF64asI64: 3701 case Iop_ReinterpI64asF64: 3702 case Iop_ReinterpI32asF32: 3703 case Iop_ReinterpF32asI32: 3704 case Iop_ReinterpI64asD64: 3705 case Iop_ReinterpD64asI64: 3706 case Iop_DPBtoBCD: 3707 case Iop_BCDtoDPB: 3708 case Iop_NotV256: 3709 case Iop_NotV128: 3710 case Iop_Not64: 3711 case Iop_Not32: 3712 case Iop_Not16: 3713 case Iop_Not8: 3714 case Iop_Not1: 3715 return vatom; 3716 3717 case Iop_CmpNEZ8x8: 3718 case Iop_Cnt8x8: 3719 case Iop_Clz8Sx8: 3720 case Iop_Cls8Sx8: 3721 case Iop_Abs8x8: 3722 return mkPCast8x8(mce, vatom); 3723 3724 case Iop_CmpNEZ8x16: 3725 case Iop_Cnt8x16: 3726 case Iop_Clz8Sx16: 3727 case Iop_Cls8Sx16: 3728 case Iop_Abs8x16: 3729 return mkPCast8x16(mce, vatom); 3730 3731 case Iop_CmpNEZ16x4: 3732 case Iop_Clz16Sx4: 3733 case Iop_Cls16Sx4: 3734 case Iop_Abs16x4: 3735 return mkPCast16x4(mce, vatom); 3736 3737 case Iop_CmpNEZ16x8: 3738 case Iop_Clz16Sx8: 3739 case Iop_Cls16Sx8: 3740 case Iop_Abs16x8: 3741 return mkPCast16x8(mce, vatom); 3742 3743 case Iop_CmpNEZ32x2: 3744 case Iop_Clz32Sx2: 3745 case Iop_Cls32Sx2: 3746 case Iop_FtoI32Ux2_RZ: 3747 case Iop_FtoI32Sx2_RZ: 3748 case Iop_Abs32x2: 3749 return mkPCast32x2(mce, vatom); 3750 3751 case Iop_CmpNEZ32x4: 3752 case Iop_Clz32Sx4: 3753 case Iop_Cls32Sx4: 3754 case Iop_FtoI32Ux4_RZ: 3755 case Iop_FtoI32Sx4_RZ: 3756 case Iop_Abs32x4: 3757 return mkPCast32x4(mce, vatom); 3758 3759 case Iop_CmpwNEZ64: 3760 return mkPCastTo(mce, Ity_I64, vatom); 3761 3762 case Iop_CmpNEZ64x2: 3763 return mkPCast64x2(mce, vatom); 3764 3765 case Iop_NarrowUn16to8x8: 3766 case Iop_NarrowUn32to16x4: 3767 case Iop_NarrowUn64to32x2: 3768 case Iop_QNarrowUn16Sto8Sx8: 3769 case Iop_QNarrowUn16Sto8Ux8: 3770 case Iop_QNarrowUn16Uto8Ux8: 3771 case Iop_QNarrowUn32Sto16Sx4: 3772 case Iop_QNarrowUn32Sto16Ux4: 3773 case Iop_QNarrowUn32Uto16Ux4: 3774 case Iop_QNarrowUn64Sto32Sx2: 3775 case Iop_QNarrowUn64Sto32Ux2: 3776 case Iop_QNarrowUn64Uto32Ux2: 3777 return vectorNarrowUnV128(mce, op, vatom); 3778 3779 case Iop_Widen8Sto16x8: 3780 case Iop_Widen8Uto16x8: 3781 case Iop_Widen16Sto32x4: 3782 case Iop_Widen16Uto32x4: 3783 case Iop_Widen32Sto64x2: 3784 case Iop_Widen32Uto64x2: 3785 return vectorWidenI64(mce, op, vatom); 3786 3787 case Iop_PwAddL32Ux2: 3788 case Iop_PwAddL32Sx2: 3789 return mkPCastTo(mce, Ity_I64, 3790 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom)))); 3791 3792 case Iop_PwAddL16Ux4: 3793 case Iop_PwAddL16Sx4: 3794 return mkPCast32x2(mce, 3795 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom)))); 3796 3797 case Iop_PwAddL8Ux8: 3798 case Iop_PwAddL8Sx8: 3799 return mkPCast16x4(mce, 3800 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom)))); 3801 3802 case Iop_PwAddL32Ux4: 3803 case Iop_PwAddL32Sx4: 3804 return mkPCast64x2(mce, 3805 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom)))); 3806 3807 case Iop_PwAddL16Ux8: 3808 case Iop_PwAddL16Sx8: 3809 return mkPCast32x4(mce, 3810 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom)))); 3811 3812 case Iop_PwAddL8Ux16: 3813 case Iop_PwAddL8Sx16: 3814 return mkPCast16x8(mce, 3815 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom)))); 3816 3817 case Iop_I64UtoF32: 3818 default: 3819 ppIROp(op); 3820 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 3821 } 3822} 3823 3824 3825/* Worker function; do not call directly. */ 3826static 3827IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 3828 IREndness end, IRType ty, 3829 IRAtom* addr, UInt bias ) 3830{ 3831 void* helper; 3832 Char* hname; 3833 IRDirty* di; 3834 IRTemp datavbits; 3835 IRAtom* addrAct; 3836 3837 tl_assert(isOriginalAtom(mce,addr)); 3838 tl_assert(end == Iend_LE || end == Iend_BE); 3839 3840 /* First, emit a definedness test for the address. This also sets 3841 the address (shadow) to 'defined' following the test. */ 3842 complainIfUndefined( mce, addr, NULL ); 3843 3844 /* Now cook up a call to the relevant helper function, to read the 3845 data V bits from shadow memory. */ 3846 ty = shadowTypeV(ty); 3847 3848 if (end == Iend_LE) { 3849 switch (ty) { 3850 case Ity_I64: helper = &MC_(helperc_LOADV64le); 3851 hname = "MC_(helperc_LOADV64le)"; 3852 break; 3853 case Ity_I32: helper = &MC_(helperc_LOADV32le); 3854 hname = "MC_(helperc_LOADV32le)"; 3855 break; 3856 case Ity_I16: helper = &MC_(helperc_LOADV16le); 3857 hname = "MC_(helperc_LOADV16le)"; 3858 break; 3859 case Ity_I8: helper = &MC_(helperc_LOADV8); 3860 hname = "MC_(helperc_LOADV8)"; 3861 break; 3862 default: ppIRType(ty); 3863 VG_(tool_panic)("memcheck:do_shadow_Load(LE)"); 3864 } 3865 } else { 3866 switch (ty) { 3867 case Ity_I64: helper = &MC_(helperc_LOADV64be); 3868 hname = "MC_(helperc_LOADV64be)"; 3869 break; 3870 case Ity_I32: helper = &MC_(helperc_LOADV32be); 3871 hname = "MC_(helperc_LOADV32be)"; 3872 break; 3873 case Ity_I16: helper = &MC_(helperc_LOADV16be); 3874 hname = "MC_(helperc_LOADV16be)"; 3875 break; 3876 case Ity_I8: helper = &MC_(helperc_LOADV8); 3877 hname = "MC_(helperc_LOADV8)"; 3878 break; 3879 default: ppIRType(ty); 3880 VG_(tool_panic)("memcheck:do_shadow_Load(BE)"); 3881 } 3882 } 3883 3884 /* Generate the actual address into addrAct. */ 3885 if (bias == 0) { 3886 addrAct = addr; 3887 } else { 3888 IROp mkAdd; 3889 IRAtom* eBias; 3890 IRType tyAddr = mce->hWordTy; 3891 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 3892 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 3893 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 3894 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) ); 3895 } 3896 3897 /* We need to have a place to park the V bits we're just about to 3898 read. */ 3899 datavbits = newTemp(mce, ty, VSh); 3900 di = unsafeIRDirty_1_N( datavbits, 3901 1/*regparms*/, 3902 hname, VG_(fnptr_to_fnentry)( helper ), 3903 mkIRExprVec_1( addrAct )); 3904 setHelperAnns( mce, di ); 3905 stmt( 'V', mce, IRStmt_Dirty(di) ); 3906 3907 return mkexpr(datavbits); 3908} 3909 3910 3911static 3912IRAtom* expr2vbits_Load ( MCEnv* mce, 3913 IREndness end, IRType ty, 3914 IRAtom* addr, UInt bias ) 3915{ 3916 tl_assert(end == Iend_LE || end == Iend_BE); 3917 switch (shadowTypeV(ty)) { 3918 case Ity_I8: 3919 case Ity_I16: 3920 case Ity_I32: 3921 case Ity_I64: 3922 return expr2vbits_Load_WRK(mce, end, ty, addr, bias); 3923 case Ity_V128: { 3924 IRAtom *v64hi, *v64lo; 3925 if (end == Iend_LE) { 3926 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0); 3927 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3928 } else { 3929 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0); 3930 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3931 } 3932 return assignNew( 'V', mce, 3933 Ity_V128, 3934 binop(Iop_64HLtoV128, v64hi, v64lo)); 3935 } 3936 case Ity_V256: { 3937 /* V256-bit case -- phrased in terms of 64 bit units (Qs), 3938 with Q3 being the most significant lane. */ 3939 if (end == Iend_BE) goto unhandled; 3940 IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0); 3941 IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3942 IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16); 3943 IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24); 3944 return assignNew( 'V', mce, 3945 Ity_V256, 3946 IRExpr_Qop(Iop_64x4toV256, 3947 v64Q3, v64Q2, v64Q1, v64Q0)); 3948 } 3949 unhandled: 3950 default: 3951 VG_(tool_panic)("expr2vbits_Load"); 3952 } 3953} 3954 3955 3956/* If there is no guard expression or the guard is always TRUE this function 3957 behaves like expr2vbits_Load. If the guard is not true at runtime, an 3958 all-bits-defined bit pattern will be returned. 3959 It is assumed that definedness of GUARD has already been checked at the call 3960 site. */ 3961static 3962IRAtom* expr2vbits_guarded_Load ( MCEnv* mce, 3963 IREndness end, IRType ty, 3964 IRAtom* addr, UInt bias, IRAtom *guard ) 3965{ 3966 if (guard) { 3967 IRAtom *cond, *iffalse, *iftrue; 3968 3969 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard)); 3970 iftrue = assignNew('V', mce, ty, 3971 expr2vbits_Load(mce, end, ty, addr, bias)); 3972 iffalse = assignNew('V', mce, ty, definedOfType(ty)); 3973 3974 return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue)); 3975 } 3976 3977 /* No guard expression or unconditional load */ 3978 return expr2vbits_Load(mce, end, ty, addr, bias); 3979} 3980 3981 3982static 3983IRAtom* expr2vbits_Mux0X ( MCEnv* mce, 3984 IRAtom* cond, IRAtom* expr0, IRAtom* exprX ) 3985{ 3986 IRAtom *vbitsC, *vbits0, *vbitsX; 3987 IRType ty; 3988 /* Given Mux0X(cond,expr0,exprX), generate 3989 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#) 3990 That is, steer the V bits like the originals, but trash the 3991 result if the steering value is undefined. This gives 3992 lazy propagation. */ 3993 tl_assert(isOriginalAtom(mce, cond)); 3994 tl_assert(isOriginalAtom(mce, expr0)); 3995 tl_assert(isOriginalAtom(mce, exprX)); 3996 3997 vbitsC = expr2vbits(mce, cond); 3998 vbits0 = expr2vbits(mce, expr0); 3999 vbitsX = expr2vbits(mce, exprX); 4000 ty = typeOfIRExpr(mce->sb->tyenv, vbits0); 4001 4002 return 4003 mkUifU(mce, ty, assignNew('V', mce, ty, 4004 IRExpr_Mux0X(cond, vbits0, vbitsX)), 4005 mkPCastTo(mce, ty, vbitsC) ); 4006} 4007 4008/* --------- This is the main expression-handling function. --------- */ 4009 4010static 4011IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 4012{ 4013 switch (e->tag) { 4014 4015 case Iex_Get: 4016 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 4017 4018 case Iex_GetI: 4019 return shadow_GETI( mce, e->Iex.GetI.descr, 4020 e->Iex.GetI.ix, e->Iex.GetI.bias ); 4021 4022 case Iex_RdTmp: 4023 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) ); 4024 4025 case Iex_Const: 4026 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e))); 4027 4028 case Iex_Qop: 4029 return expr2vbits_Qop( 4030 mce, 4031 e->Iex.Qop.details->op, 4032 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2, 4033 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4 4034 ); 4035 4036 case Iex_Triop: 4037 return expr2vbits_Triop( 4038 mce, 4039 e->Iex.Triop.details->op, 4040 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2, 4041 e->Iex.Triop.details->arg3 4042 ); 4043 4044 case Iex_Binop: 4045 return expr2vbits_Binop( 4046 mce, 4047 e->Iex.Binop.op, 4048 e->Iex.Binop.arg1, e->Iex.Binop.arg2 4049 ); 4050 4051 case Iex_Unop: 4052 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 4053 4054 case Iex_Load: 4055 return expr2vbits_Load( mce, e->Iex.Load.end, 4056 e->Iex.Load.ty, 4057 e->Iex.Load.addr, 0/*addr bias*/ ); 4058 4059 case Iex_CCall: 4060 return mkLazyN( mce, e->Iex.CCall.args, 4061 e->Iex.CCall.retty, 4062 e->Iex.CCall.cee ); 4063 4064 case Iex_Mux0X: 4065 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0, 4066 e->Iex.Mux0X.exprX); 4067 4068 default: 4069 VG_(printf)("\n"); 4070 ppIRExpr(e); 4071 VG_(printf)("\n"); 4072 VG_(tool_panic)("memcheck: expr2vbits"); 4073 } 4074} 4075 4076/*------------------------------------------------------------*/ 4077/*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 4078/*------------------------------------------------------------*/ 4079 4080/* Widen a value to the host word size. */ 4081 4082static 4083IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 4084{ 4085 IRType ty, tyH; 4086 4087 /* vatom is vbits-value and as such can only have a shadow type. */ 4088 tl_assert(isShadowAtom(mce,vatom)); 4089 4090 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 4091 tyH = mce->hWordTy; 4092 4093 if (tyH == Ity_I32) { 4094 switch (ty) { 4095 case Ity_I32: 4096 return vatom; 4097 case Ity_I16: 4098 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom)); 4099 case Ity_I8: 4100 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom)); 4101 default: 4102 goto unhandled; 4103 } 4104 } else 4105 if (tyH == Ity_I64) { 4106 switch (ty) { 4107 case Ity_I32: 4108 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom)); 4109 case Ity_I16: 4110 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4111 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom)))); 4112 case Ity_I8: 4113 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4114 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom)))); 4115 default: 4116 goto unhandled; 4117 } 4118 } else { 4119 goto unhandled; 4120 } 4121 unhandled: 4122 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 4123 VG_(tool_panic)("zwidenToHostWord"); 4124} 4125 4126 4127/* Generate a shadow store. addr is always the original address atom. 4128 You can pass in either originals or V-bits for the data atom, but 4129 obviously not both. guard :: Ity_I1 controls whether the store 4130 really happens; NULL means it unconditionally does. Note that 4131 guard itself is not checked for definedness; the caller of this 4132 function must do that if necessary. */ 4133 4134static 4135void do_shadow_Store ( MCEnv* mce, 4136 IREndness end, 4137 IRAtom* addr, UInt bias, 4138 IRAtom* data, IRAtom* vdata, 4139 IRAtom* guard ) 4140{ 4141 IROp mkAdd; 4142 IRType ty, tyAddr; 4143 void* helper = NULL; 4144 Char* hname = NULL; 4145 IRConst* c; 4146 4147 tyAddr = mce->hWordTy; 4148 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 4149 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 4150 tl_assert( end == Iend_LE || end == Iend_BE ); 4151 4152 if (data) { 4153 tl_assert(!vdata); 4154 tl_assert(isOriginalAtom(mce, data)); 4155 tl_assert(bias == 0); 4156 vdata = expr2vbits( mce, data ); 4157 } else { 4158 tl_assert(vdata); 4159 } 4160 4161 tl_assert(isOriginalAtom(mce,addr)); 4162 tl_assert(isShadowAtom(mce,vdata)); 4163 4164 if (guard) { 4165 tl_assert(isOriginalAtom(mce, guard)); 4166 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 4167 } 4168 4169 ty = typeOfIRExpr(mce->sb->tyenv, vdata); 4170 4171 // If we're not doing undefined value checking, pretend that this value 4172 // is "all valid". That lets Vex's optimiser remove some of the V bit 4173 // shadow computation ops that precede it. 4174 if (MC_(clo_mc_level) == 1) { 4175 switch (ty) { 4176 case Ity_V256: // V256 weirdness -- used four times 4177 c = IRConst_V256(V_BITS32_DEFINED); break; 4178 case Ity_V128: // V128 weirdness -- used twice 4179 c = IRConst_V128(V_BITS16_DEFINED); break; 4180 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; 4181 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; 4182 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; 4183 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break; 4184 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 4185 } 4186 vdata = IRExpr_Const( c ); 4187 } 4188 4189 /* First, emit a definedness test for the address. This also sets 4190 the address (shadow) to 'defined' following the test. */ 4191 complainIfUndefined( mce, addr, guard ); 4192 4193 /* Now decide which helper function to call to write the data V 4194 bits into shadow memory. */ 4195 if (end == Iend_LE) { 4196 switch (ty) { 4197 case Ity_V256: /* we'll use the helper four times */ 4198 case Ity_V128: /* we'll use the helper twice */ 4199 case Ity_I64: helper = &MC_(helperc_STOREV64le); 4200 hname = "MC_(helperc_STOREV64le)"; 4201 break; 4202 case Ity_I32: helper = &MC_(helperc_STOREV32le); 4203 hname = "MC_(helperc_STOREV32le)"; 4204 break; 4205 case Ity_I16: helper = &MC_(helperc_STOREV16le); 4206 hname = "MC_(helperc_STOREV16le)"; 4207 break; 4208 case Ity_I8: helper = &MC_(helperc_STOREV8); 4209 hname = "MC_(helperc_STOREV8)"; 4210 break; 4211 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 4212 } 4213 } else { 4214 switch (ty) { 4215 case Ity_V128: /* we'll use the helper twice */ 4216 case Ity_I64: helper = &MC_(helperc_STOREV64be); 4217 hname = "MC_(helperc_STOREV64be)"; 4218 break; 4219 case Ity_I32: helper = &MC_(helperc_STOREV32be); 4220 hname = "MC_(helperc_STOREV32be)"; 4221 break; 4222 case Ity_I16: helper = &MC_(helperc_STOREV16be); 4223 hname = "MC_(helperc_STOREV16be)"; 4224 break; 4225 case Ity_I8: helper = &MC_(helperc_STOREV8); 4226 hname = "MC_(helperc_STOREV8)"; 4227 break; 4228 /* Note, no V256 case here, because no big-endian target that 4229 we support, has 256 vectors. */ 4230 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)"); 4231 } 4232 } 4233 4234 if (UNLIKELY(ty == Ity_V256)) { 4235 4236 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with 4237 Q3 being the most significant lane. */ 4238 /* These are the offsets of the Qs in memory. */ 4239 Int offQ0, offQ1, offQ2, offQ3; 4240 4241 /* Various bits for constructing the 4 lane helper calls */ 4242 IRDirty *diQ0, *diQ1, *diQ2, *diQ3; 4243 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3; 4244 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3; 4245 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3; 4246 4247 if (end == Iend_LE) { 4248 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24; 4249 } else { 4250 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24; 4251 } 4252 4253 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0); 4254 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) ); 4255 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata)); 4256 diQ0 = unsafeIRDirty_0_N( 4257 1/*regparms*/, 4258 hname, VG_(fnptr_to_fnentry)( helper ), 4259 mkIRExprVec_2( addrQ0, vdataQ0 ) 4260 ); 4261 4262 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1); 4263 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) ); 4264 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata)); 4265 diQ1 = unsafeIRDirty_0_N( 4266 1/*regparms*/, 4267 hname, VG_(fnptr_to_fnentry)( helper ), 4268 mkIRExprVec_2( addrQ1, vdataQ1 ) 4269 ); 4270 4271 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2); 4272 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) ); 4273 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata)); 4274 diQ2 = unsafeIRDirty_0_N( 4275 1/*regparms*/, 4276 hname, VG_(fnptr_to_fnentry)( helper ), 4277 mkIRExprVec_2( addrQ2, vdataQ2 ) 4278 ); 4279 4280 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3); 4281 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) ); 4282 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata)); 4283 diQ3 = unsafeIRDirty_0_N( 4284 1/*regparms*/, 4285 hname, VG_(fnptr_to_fnentry)( helper ), 4286 mkIRExprVec_2( addrQ3, vdataQ3 ) 4287 ); 4288 4289 if (guard) 4290 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard; 4291 4292 setHelperAnns( mce, diQ0 ); 4293 setHelperAnns( mce, diQ1 ); 4294 setHelperAnns( mce, diQ2 ); 4295 setHelperAnns( mce, diQ3 ); 4296 stmt( 'V', mce, IRStmt_Dirty(diQ0) ); 4297 stmt( 'V', mce, IRStmt_Dirty(diQ1) ); 4298 stmt( 'V', mce, IRStmt_Dirty(diQ2) ); 4299 stmt( 'V', mce, IRStmt_Dirty(diQ3) ); 4300 4301 } 4302 else if (UNLIKELY(ty == Ity_V128)) { 4303 4304 /* V128-bit case */ 4305 /* See comment in next clause re 64-bit regparms */ 4306 /* also, need to be careful about endianness */ 4307 4308 Int offLo64, offHi64; 4309 IRDirty *diLo64, *diHi64; 4310 IRAtom *addrLo64, *addrHi64; 4311 IRAtom *vdataLo64, *vdataHi64; 4312 IRAtom *eBiasLo64, *eBiasHi64; 4313 4314 if (end == Iend_LE) { 4315 offLo64 = 0; 4316 offHi64 = 8; 4317 } else { 4318 offLo64 = 8; 4319 offHi64 = 0; 4320 } 4321 4322 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); 4323 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); 4324 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); 4325 diLo64 = unsafeIRDirty_0_N( 4326 1/*regparms*/, 4327 hname, VG_(fnptr_to_fnentry)( helper ), 4328 mkIRExprVec_2( addrLo64, vdataLo64 ) 4329 ); 4330 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); 4331 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); 4332 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 4333 diHi64 = unsafeIRDirty_0_N( 4334 1/*regparms*/, 4335 hname, VG_(fnptr_to_fnentry)( helper ), 4336 mkIRExprVec_2( addrHi64, vdataHi64 ) 4337 ); 4338 if (guard) diLo64->guard = guard; 4339 if (guard) diHi64->guard = guard; 4340 setHelperAnns( mce, diLo64 ); 4341 setHelperAnns( mce, diHi64 ); 4342 stmt( 'V', mce, IRStmt_Dirty(diLo64) ); 4343 stmt( 'V', mce, IRStmt_Dirty(diHi64) ); 4344 4345 } else { 4346 4347 IRDirty *di; 4348 IRAtom *addrAct; 4349 4350 /* 8/16/32/64-bit cases */ 4351 /* Generate the actual address into addrAct. */ 4352 if (bias == 0) { 4353 addrAct = addr; 4354 } else { 4355 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 4356 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias)); 4357 } 4358 4359 if (ty == Ity_I64) { 4360 /* We can't do this with regparm 2 on 32-bit platforms, since 4361 the back ends aren't clever enough to handle 64-bit 4362 regparm args. Therefore be different. */ 4363 di = unsafeIRDirty_0_N( 4364 1/*regparms*/, 4365 hname, VG_(fnptr_to_fnentry)( helper ), 4366 mkIRExprVec_2( addrAct, vdata ) 4367 ); 4368 } else { 4369 di = unsafeIRDirty_0_N( 4370 2/*regparms*/, 4371 hname, VG_(fnptr_to_fnentry)( helper ), 4372 mkIRExprVec_2( addrAct, 4373 zwidenToHostWord( mce, vdata )) 4374 ); 4375 } 4376 if (guard) di->guard = guard; 4377 setHelperAnns( mce, di ); 4378 stmt( 'V', mce, IRStmt_Dirty(di) ); 4379 } 4380 4381} 4382 4383 4384/* Do lazy pessimistic propagation through a dirty helper call, by 4385 looking at the annotations on it. This is the most complex part of 4386 Memcheck. */ 4387 4388static IRType szToITy ( Int n ) 4389{ 4390 switch (n) { 4391 case 1: return Ity_I8; 4392 case 2: return Ity_I16; 4393 case 4: return Ity_I32; 4394 case 8: return Ity_I64; 4395 default: VG_(tool_panic)("szToITy(memcheck)"); 4396 } 4397} 4398 4399static 4400void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 4401{ 4402 Int i, k, n, toDo, gSz, gOff; 4403 IRAtom *src, *here, *curr; 4404 IRType tySrc, tyDst; 4405 IRTemp dst; 4406 IREndness end; 4407 4408 /* What's the native endianness? We need to know this. */ 4409# if defined(VG_BIGENDIAN) 4410 end = Iend_BE; 4411# elif defined(VG_LITTLEENDIAN) 4412 end = Iend_LE; 4413# else 4414# error "Unknown endianness" 4415# endif 4416 4417 /* First check the guard. */ 4418 complainIfUndefined(mce, d->guard, NULL); 4419 4420 /* Now round up all inputs and PCast over them. */ 4421 curr = definedOfType(Ity_I32); 4422 4423 /* Inputs: unmasked args 4424 Note: arguments are evaluated REGARDLESS of the guard expression */ 4425 for (i = 0; d->args[i]; i++) { 4426 if (d->cee->mcx_mask & (1<<i)) { 4427 /* ignore this arg */ 4428 } else { 4429 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) ); 4430 curr = mkUifU32(mce, here, curr); 4431 } 4432 } 4433 4434 /* Inputs: guest state that we read. */ 4435 for (i = 0; i < d->nFxState; i++) { 4436 tl_assert(d->fxState[i].fx != Ifx_None); 4437 if (d->fxState[i].fx == Ifx_Write) 4438 continue; 4439 4440 /* Enumerate the described state segments */ 4441 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 4442 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 4443 gSz = d->fxState[i].size; 4444 4445 /* Ignore any sections marked as 'always defined'. */ 4446 if (isAlwaysDefd(mce, gOff, gSz)) { 4447 if (0) 4448 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 4449 gOff, gSz); 4450 continue; 4451 } 4452 4453 /* This state element is read or modified. So we need to 4454 consider it. If larger than 8 bytes, deal with it in 4455 8-byte chunks. */ 4456 while (True) { 4457 tl_assert(gSz >= 0); 4458 if (gSz == 0) break; 4459 n = gSz <= 8 ? gSz : 8; 4460 /* update 'curr' with UifU of the state slice 4461 gOff .. gOff+n-1 */ 4462 tySrc = szToITy( n ); 4463 4464 /* Observe the guard expression. If it is false use an 4465 all-bits-defined bit pattern */ 4466 IRAtom *cond, *iffalse, *iftrue; 4467 4468 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard)); 4469 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc)); 4470 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc)); 4471 src = assignNew('V', mce, tySrc, 4472 IRExpr_Mux0X(cond, iffalse, iftrue)); 4473 4474 here = mkPCastTo( mce, Ity_I32, src ); 4475 curr = mkUifU32(mce, here, curr); 4476 gSz -= n; 4477 gOff += n; 4478 } 4479 } 4480 } 4481 4482 /* Inputs: memory. First set up some info needed regardless of 4483 whether we're doing reads or writes. */ 4484 4485 if (d->mFx != Ifx_None) { 4486 /* Because we may do multiple shadow loads/stores from the same 4487 base address, it's best to do a single test of its 4488 definedness right now. Post-instrumentation optimisation 4489 should remove all but this test. */ 4490 IRType tyAddr; 4491 tl_assert(d->mAddr); 4492 complainIfUndefined(mce, d->mAddr, d->guard); 4493 4494 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr); 4495 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 4496 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 4497 } 4498 4499 /* Deal with memory inputs (reads or modifies) */ 4500 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 4501 toDo = d->mSize; 4502 /* chew off 32-bit chunks. We don't care about the endianness 4503 since it's all going to be condensed down to a single bit, 4504 but nevertheless choose an endianness which is hopefully 4505 native to the platform. */ 4506 while (toDo >= 4) { 4507 here = mkPCastTo( 4508 mce, Ity_I32, 4509 expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr, 4510 d->mSize - toDo, d->guard ) 4511 ); 4512 curr = mkUifU32(mce, here, curr); 4513 toDo -= 4; 4514 } 4515 /* chew off 16-bit chunks */ 4516 while (toDo >= 2) { 4517 here = mkPCastTo( 4518 mce, Ity_I32, 4519 expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr, 4520 d->mSize - toDo, d->guard ) 4521 ); 4522 curr = mkUifU32(mce, here, curr); 4523 toDo -= 2; 4524 } 4525 /* chew off the remaining 8-bit chunk, if any */ 4526 if (toDo == 1) { 4527 here = mkPCastTo( 4528 mce, Ity_I32, 4529 expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr, 4530 d->mSize - toDo, d->guard ) 4531 ); 4532 curr = mkUifU32(mce, here, curr); 4533 toDo -= 1; 4534 } 4535 tl_assert(toDo == 0); 4536 } 4537 4538 /* Whew! So curr is a 32-bit V-value summarising pessimistically 4539 all the inputs to the helper. Now we need to re-distribute the 4540 results to all destinations. */ 4541 4542 /* Outputs: the destination temporary, if there is one. */ 4543 if (d->tmp != IRTemp_INVALID) { 4544 dst = findShadowTmpV(mce, d->tmp); 4545 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp); 4546 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) ); 4547 } 4548 4549 /* Outputs: guest state that we write or modify. */ 4550 for (i = 0; i < d->nFxState; i++) { 4551 tl_assert(d->fxState[i].fx != Ifx_None); 4552 if (d->fxState[i].fx == Ifx_Read) 4553 continue; 4554 4555 /* Enumerate the described state segments */ 4556 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 4557 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 4558 gSz = d->fxState[i].size; 4559 4560 /* Ignore any sections marked as 'always defined'. */ 4561 if (isAlwaysDefd(mce, gOff, gSz)) 4562 continue; 4563 4564 /* This state element is written or modified. So we need to 4565 consider it. If larger than 8 bytes, deal with it in 4566 8-byte chunks. */ 4567 while (True) { 4568 tl_assert(gSz >= 0); 4569 if (gSz == 0) break; 4570 n = gSz <= 8 ? gSz : 8; 4571 /* Write suitably-casted 'curr' to the state slice 4572 gOff .. gOff+n-1 */ 4573 tyDst = szToITy( n ); 4574 do_shadow_PUT( mce, gOff, 4575 NULL, /* original atom */ 4576 mkPCastTo( mce, tyDst, curr ), d->guard ); 4577 gSz -= n; 4578 gOff += n; 4579 } 4580 } 4581 } 4582 4583 /* Outputs: memory that we write or modify. Same comments about 4584 endianness as above apply. */ 4585 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 4586 toDo = d->mSize; 4587 /* chew off 32-bit chunks */ 4588 while (toDo >= 4) { 4589 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4590 NULL, /* original data */ 4591 mkPCastTo( mce, Ity_I32, curr ), 4592 d->guard ); 4593 toDo -= 4; 4594 } 4595 /* chew off 16-bit chunks */ 4596 while (toDo >= 2) { 4597 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4598 NULL, /* original data */ 4599 mkPCastTo( mce, Ity_I16, curr ), 4600 d->guard ); 4601 toDo -= 2; 4602 } 4603 /* chew off the remaining 8-bit chunk, if any */ 4604 if (toDo == 1) { 4605 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4606 NULL, /* original data */ 4607 mkPCastTo( mce, Ity_I8, curr ), 4608 d->guard ); 4609 toDo -= 1; 4610 } 4611 tl_assert(toDo == 0); 4612 } 4613 4614} 4615 4616 4617/* We have an ABI hint telling us that [base .. base+len-1] is to 4618 become undefined ("writable"). Generate code to call a helper to 4619 notify the A/V bit machinery of this fact. 4620 4621 We call 4622 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, 4623 Addr nia ); 4624*/ 4625static 4626void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia ) 4627{ 4628 IRDirty* di; 4629 /* Minor optimisation: if not doing origin tracking, ignore the 4630 supplied nia and pass zero instead. This is on the basis that 4631 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can 4632 almost always generate a shorter instruction to put zero into a 4633 register than any other value. */ 4634 if (MC_(clo_mc_level) < 3) 4635 nia = mkIRExpr_HWord(0); 4636 4637 di = unsafeIRDirty_0_N( 4638 0/*regparms*/, 4639 "MC_(helperc_MAKE_STACK_UNINIT)", 4640 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ), 4641 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia ) 4642 ); 4643 stmt( 'V', mce, IRStmt_Dirty(di) ); 4644} 4645 4646 4647/* ------ Dealing with IRCAS (big and complex) ------ */ 4648 4649/* FWDS */ 4650static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 4651 IRAtom* baseaddr, Int offset ); 4652static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ); 4653static void gen_store_b ( MCEnv* mce, Int szB, 4654 IRAtom* baseaddr, Int offset, IRAtom* dataB, 4655 IRAtom* guard ); 4656 4657static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ); 4658static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ); 4659 4660 4661/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both 4662 IRExpr.Consts, else this asserts. If they are both Consts, it 4663 doesn't do anything. So that just leaves the RdTmp case. 4664 4665 In which case: this assigns the shadow value SHADOW to the IR 4666 shadow temporary associated with ORIG. That is, ORIG, being an 4667 original temporary, will have a shadow temporary associated with 4668 it. However, in the case envisaged here, there will so far have 4669 been no IR emitted to actually write a shadow value into that 4670 temporary. What this routine does is to (emit IR to) copy the 4671 value in SHADOW into said temporary, so that after this call, 4672 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the 4673 value in SHADOW. 4674 4675 Point is to allow callers to compute "by hand" a shadow value for 4676 ORIG, and force it to be associated with ORIG. 4677 4678 How do we know that that shadow associated with ORIG has not so far 4679 been assigned to? Well, we don't per se know that, but supposing 4680 it had. Then this routine would create a second assignment to it, 4681 and later the IR sanity checker would barf. But that never 4682 happens. QED. 4683*/ 4684static void bind_shadow_tmp_to_orig ( UChar how, 4685 MCEnv* mce, 4686 IRAtom* orig, IRAtom* shadow ) 4687{ 4688 tl_assert(isOriginalAtom(mce, orig)); 4689 tl_assert(isShadowAtom(mce, shadow)); 4690 switch (orig->tag) { 4691 case Iex_Const: 4692 tl_assert(shadow->tag == Iex_Const); 4693 break; 4694 case Iex_RdTmp: 4695 tl_assert(shadow->tag == Iex_RdTmp); 4696 if (how == 'V') { 4697 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp), 4698 shadow); 4699 } else { 4700 tl_assert(how == 'B'); 4701 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp), 4702 shadow); 4703 } 4704 break; 4705 default: 4706 tl_assert(0); 4707 } 4708} 4709 4710 4711static 4712void do_shadow_CAS ( MCEnv* mce, IRCAS* cas ) 4713{ 4714 /* Scheme is (both single- and double- cases): 4715 4716 1. fetch data#,dataB (the proposed new value) 4717 4718 2. fetch expd#,expdB (what we expect to see at the address) 4719 4720 3. check definedness of address 4721 4722 4. load old#,oldB from shadow memory; this also checks 4723 addressibility of the address 4724 4725 5. the CAS itself 4726 4727 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below. 4728 4729 7. if "expected == old" (as computed by (6)) 4730 store data#,dataB to shadow memory 4731 4732 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores 4733 'data' but 7 stores 'data#'. Hence it is possible for the 4734 shadow data to be incorrectly checked and/or updated: 4735 4736 * 7 is at least gated correctly, since the 'expected == old' 4737 condition is derived from outputs of 5. However, the shadow 4738 write could happen too late: imagine after 5 we are 4739 descheduled, a different thread runs, writes a different 4740 (shadow) value at the address, and then we resume, hence 4741 overwriting the shadow value written by the other thread. 4742 4743 Because the original memory access is atomic, there's no way to 4744 make both the original and shadow accesses into a single atomic 4745 thing, hence this is unavoidable. 4746 4747 At least as Valgrind stands, I don't think it's a problem, since 4748 we're single threaded *and* we guarantee that there are no 4749 context switches during the execution of any specific superblock 4750 -- context switches can only happen at superblock boundaries. 4751 4752 If Valgrind ever becomes MT in the future, then it might be more 4753 of a problem. A possible kludge would be to artificially 4754 associate with the location, a lock, which we must acquire and 4755 release around the transaction as a whole. Hmm, that probably 4756 would't work properly since it only guards us against other 4757 threads doing CASs on the same location, not against other 4758 threads doing normal reads and writes. 4759 4760 ------------------------------------------------------------ 4761 4762 COMMENT_ON_CasCmpEQ: 4763 4764 Note two things. Firstly, in the sequence above, we compute 4765 "expected == old", but we don't check definedness of it. Why 4766 not? Also, the x86 and amd64 front ends use 4767 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent 4768 determination (expected == old ?) for themselves, and we also 4769 don't check definedness for those primops; we just say that the 4770 result is defined. Why? Details follow. 4771 4772 x86/amd64 contains various forms of locked insns: 4773 * lock prefix before all basic arithmetic insn; 4774 eg lock xorl %reg1,(%reg2) 4775 * atomic exchange reg-mem 4776 * compare-and-swaps 4777 4778 Rather than attempt to represent them all, which would be a 4779 royal PITA, I used a result from Maurice Herlihy 4780 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he 4781 demonstrates that compare-and-swap is a primitive more general 4782 than the other two, and so can be used to represent all of them. 4783 So the translation scheme for (eg) lock incl (%reg) is as 4784 follows: 4785 4786 again: 4787 old = * %reg 4788 new = old + 1 4789 atomically { if (* %reg == old) { * %reg = new } else { goto again } } 4790 4791 The "atomically" is the CAS bit. The scheme is always the same: 4792 get old value from memory, compute new value, atomically stuff 4793 new value back in memory iff the old value has not changed (iow, 4794 no other thread modified it in the meantime). If it has changed 4795 then we've been out-raced and we have to start over. 4796 4797 Now that's all very neat, but it has the bad side effect of 4798 introducing an explicit equality test into the translation. 4799 Consider the behaviour of said code on a memory location which 4800 is uninitialised. We will wind up doing a comparison on 4801 uninitialised data, and mc duly complains. 4802 4803 What's difficult about this is, the common case is that the 4804 location is uncontended, and so we're usually comparing the same 4805 value (* %reg) with itself. So we shouldn't complain even if it 4806 is undefined. But mc doesn't know that. 4807 4808 My solution is to mark the == in the IR specially, so as to tell 4809 mc that it almost certainly compares a value with itself, and we 4810 should just regard the result as always defined. Rather than 4811 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into 4812 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else. 4813 4814 So there's always the question of, can this give a false 4815 negative? eg, imagine that initially, * %reg is defined; and we 4816 read that; but then in the gap between the read and the CAS, a 4817 different thread writes an undefined (and different) value at 4818 the location. Then the CAS in this thread will fail and we will 4819 go back to "again:", but without knowing that the trip back 4820 there was based on an undefined comparison. No matter; at least 4821 the other thread won the race and the location is correctly 4822 marked as undefined. What if it wrote an uninitialised version 4823 of the same value that was there originally, though? 4824 4825 etc etc. Seems like there's a small corner case in which we 4826 might lose the fact that something's defined -- we're out-raced 4827 in between the "old = * reg" and the "atomically {", _and_ the 4828 other thread is writing in an undefined version of what's 4829 already there. Well, that seems pretty unlikely. 4830 4831 --- 4832 4833 If we ever need to reinstate it .. code which generates a 4834 definedness test for "expected == old" was removed at r10432 of 4835 this file. 4836 */ 4837 if (cas->oldHi == IRTemp_INVALID) { 4838 do_shadow_CAS_single( mce, cas ); 4839 } else { 4840 do_shadow_CAS_double( mce, cas ); 4841 } 4842} 4843 4844 4845static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ) 4846{ 4847 IRAtom *vdataLo = NULL, *bdataLo = NULL; 4848 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 4849 IRAtom *voldLo = NULL, *boldLo = NULL; 4850 IRAtom *expd_eq_old = NULL; 4851 IROp opCasCmpEQ; 4852 Int elemSzB; 4853 IRType elemTy; 4854 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 4855 4856 /* single CAS */ 4857 tl_assert(cas->oldHi == IRTemp_INVALID); 4858 tl_assert(cas->expdHi == NULL); 4859 tl_assert(cas->dataHi == NULL); 4860 4861 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 4862 switch (elemTy) { 4863 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break; 4864 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break; 4865 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break; 4866 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break; 4867 default: tl_assert(0); /* IR defn disallows any other types */ 4868 } 4869 4870 /* 1. fetch data# (the proposed new value) */ 4871 tl_assert(isOriginalAtom(mce, cas->dataLo)); 4872 vdataLo 4873 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 4874 tl_assert(isShadowAtom(mce, vdataLo)); 4875 if (otrak) { 4876 bdataLo 4877 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 4878 tl_assert(isShadowAtom(mce, bdataLo)); 4879 } 4880 4881 /* 2. fetch expected# (what we expect to see at the address) */ 4882 tl_assert(isOriginalAtom(mce, cas->expdLo)); 4883 vexpdLo 4884 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 4885 tl_assert(isShadowAtom(mce, vexpdLo)); 4886 if (otrak) { 4887 bexpdLo 4888 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 4889 tl_assert(isShadowAtom(mce, bexpdLo)); 4890 } 4891 4892 /* 3. check definedness of address */ 4893 /* 4. fetch old# from shadow memory; this also checks 4894 addressibility of the address */ 4895 voldLo 4896 = assignNew( 4897 'V', mce, elemTy, 4898 expr2vbits_Load( 4899 mce, 4900 cas->end, elemTy, cas->addr, 0/*Addr bias*/ 4901 )); 4902 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 4903 if (otrak) { 4904 boldLo 4905 = assignNew('B', mce, Ity_I32, 4906 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/)); 4907 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 4908 } 4909 4910 /* 5. the CAS itself */ 4911 stmt( 'C', mce, IRStmt_CAS(cas) ); 4912 4913 /* 6. compute "expected == old" */ 4914 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 4915 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 4916 tree, but it's not copied from the input block. */ 4917 expd_eq_old 4918 = assignNew('C', mce, Ity_I1, 4919 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo))); 4920 4921 /* 7. if "expected == old" 4922 store data# to shadow memory */ 4923 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/, 4924 NULL/*data*/, vdataLo/*vdata*/, 4925 expd_eq_old/*guard for store*/ ); 4926 if (otrak) { 4927 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/, 4928 bdataLo/*bdata*/, 4929 expd_eq_old/*guard for store*/ ); 4930 } 4931} 4932 4933 4934static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ) 4935{ 4936 IRAtom *vdataHi = NULL, *bdataHi = NULL; 4937 IRAtom *vdataLo = NULL, *bdataLo = NULL; 4938 IRAtom *vexpdHi = NULL, *bexpdHi = NULL; 4939 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 4940 IRAtom *voldHi = NULL, *boldHi = NULL; 4941 IRAtom *voldLo = NULL, *boldLo = NULL; 4942 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL; 4943 IRAtom *expd_eq_old = NULL, *zero = NULL; 4944 IROp opCasCmpEQ, opOr, opXor; 4945 Int elemSzB, memOffsLo, memOffsHi; 4946 IRType elemTy; 4947 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 4948 4949 /* double CAS */ 4950 tl_assert(cas->oldHi != IRTemp_INVALID); 4951 tl_assert(cas->expdHi != NULL); 4952 tl_assert(cas->dataHi != NULL); 4953 4954 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 4955 switch (elemTy) { 4956 case Ity_I8: 4957 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8; 4958 elemSzB = 1; zero = mkU8(0); 4959 break; 4960 case Ity_I16: 4961 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16; 4962 elemSzB = 2; zero = mkU16(0); 4963 break; 4964 case Ity_I32: 4965 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32; 4966 elemSzB = 4; zero = mkU32(0); 4967 break; 4968 case Ity_I64: 4969 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64; 4970 elemSzB = 8; zero = mkU64(0); 4971 break; 4972 default: 4973 tl_assert(0); /* IR defn disallows any other types */ 4974 } 4975 4976 /* 1. fetch data# (the proposed new value) */ 4977 tl_assert(isOriginalAtom(mce, cas->dataHi)); 4978 tl_assert(isOriginalAtom(mce, cas->dataLo)); 4979 vdataHi 4980 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi)); 4981 vdataLo 4982 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 4983 tl_assert(isShadowAtom(mce, vdataHi)); 4984 tl_assert(isShadowAtom(mce, vdataLo)); 4985 if (otrak) { 4986 bdataHi 4987 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi)); 4988 bdataLo 4989 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 4990 tl_assert(isShadowAtom(mce, bdataHi)); 4991 tl_assert(isShadowAtom(mce, bdataLo)); 4992 } 4993 4994 /* 2. fetch expected# (what we expect to see at the address) */ 4995 tl_assert(isOriginalAtom(mce, cas->expdHi)); 4996 tl_assert(isOriginalAtom(mce, cas->expdLo)); 4997 vexpdHi 4998 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi)); 4999 vexpdLo 5000 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 5001 tl_assert(isShadowAtom(mce, vexpdHi)); 5002 tl_assert(isShadowAtom(mce, vexpdLo)); 5003 if (otrak) { 5004 bexpdHi 5005 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi)); 5006 bexpdLo 5007 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 5008 tl_assert(isShadowAtom(mce, bexpdHi)); 5009 tl_assert(isShadowAtom(mce, bexpdLo)); 5010 } 5011 5012 /* 3. check definedness of address */ 5013 /* 4. fetch old# from shadow memory; this also checks 5014 addressibility of the address */ 5015 if (cas->end == Iend_LE) { 5016 memOffsLo = 0; 5017 memOffsHi = elemSzB; 5018 } else { 5019 tl_assert(cas->end == Iend_BE); 5020 memOffsLo = elemSzB; 5021 memOffsHi = 0; 5022 } 5023 voldHi 5024 = assignNew( 5025 'V', mce, elemTy, 5026 expr2vbits_Load( 5027 mce, 5028 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/ 5029 )); 5030 voldLo 5031 = assignNew( 5032 'V', mce, elemTy, 5033 expr2vbits_Load( 5034 mce, 5035 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/ 5036 )); 5037 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi); 5038 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 5039 if (otrak) { 5040 boldHi 5041 = assignNew('B', mce, Ity_I32, 5042 gen_load_b(mce, elemSzB, cas->addr, 5043 memOffsHi/*addr bias*/)); 5044 boldLo 5045 = assignNew('B', mce, Ity_I32, 5046 gen_load_b(mce, elemSzB, cas->addr, 5047 memOffsLo/*addr bias*/)); 5048 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi); 5049 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 5050 } 5051 5052 /* 5. the CAS itself */ 5053 stmt( 'C', mce, IRStmt_CAS(cas) ); 5054 5055 /* 6. compute "expected == old" */ 5056 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 5057 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 5058 tree, but it's not copied from the input block. */ 5059 /* 5060 xHi = oldHi ^ expdHi; 5061 xLo = oldLo ^ expdLo; 5062 xHL = xHi | xLo; 5063 expd_eq_old = xHL == 0; 5064 */ 5065 xHi = assignNew('C', mce, elemTy, 5066 binop(opXor, cas->expdHi, mkexpr(cas->oldHi))); 5067 xLo = assignNew('C', mce, elemTy, 5068 binop(opXor, cas->expdLo, mkexpr(cas->oldLo))); 5069 xHL = assignNew('C', mce, elemTy, 5070 binop(opOr, xHi, xLo)); 5071 expd_eq_old 5072 = assignNew('C', mce, Ity_I1, 5073 binop(opCasCmpEQ, xHL, zero)); 5074 5075 /* 7. if "expected == old" 5076 store data# to shadow memory */ 5077 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/, 5078 NULL/*data*/, vdataHi/*vdata*/, 5079 expd_eq_old/*guard for store*/ ); 5080 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/, 5081 NULL/*data*/, vdataLo/*vdata*/, 5082 expd_eq_old/*guard for store*/ ); 5083 if (otrak) { 5084 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/, 5085 bdataHi/*bdata*/, 5086 expd_eq_old/*guard for store*/ ); 5087 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/, 5088 bdataLo/*bdata*/, 5089 expd_eq_old/*guard for store*/ ); 5090 } 5091} 5092 5093 5094/* ------ Dealing with LL/SC (not difficult) ------ */ 5095 5096static void do_shadow_LLSC ( MCEnv* mce, 5097 IREndness stEnd, 5098 IRTemp stResult, 5099 IRExpr* stAddr, 5100 IRExpr* stStoredata ) 5101{ 5102 /* In short: treat a load-linked like a normal load followed by an 5103 assignment of the loaded (shadow) data to the result temporary. 5104 Treat a store-conditional like a normal store, and mark the 5105 result temporary as defined. */ 5106 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult); 5107 IRTemp resTmp = findShadowTmpV(mce, stResult); 5108 5109 tl_assert(isIRAtom(stAddr)); 5110 if (stStoredata) 5111 tl_assert(isIRAtom(stStoredata)); 5112 5113 if (stStoredata == NULL) { 5114 /* Load Linked */ 5115 /* Just treat this as a normal load, followed by an assignment of 5116 the value to .result. */ 5117 /* Stay sane */ 5118 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 5119 || resTy == Ity_I16 || resTy == Ity_I8); 5120 assign( 'V', mce, resTmp, 5121 expr2vbits_Load( 5122 mce, stEnd, resTy, stAddr, 0/*addr bias*/)); 5123 } else { 5124 /* Store Conditional */ 5125 /* Stay sane */ 5126 IRType dataTy = typeOfIRExpr(mce->sb->tyenv, 5127 stStoredata); 5128 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 5129 || dataTy == Ity_I16 || dataTy == Ity_I8); 5130 do_shadow_Store( mce, stEnd, 5131 stAddr, 0/* addr bias */, 5132 stStoredata, 5133 NULL /* shadow data */, 5134 NULL/*guard*/ ); 5135 /* This is a store conditional, so it writes to .result a value 5136 indicating whether or not the store succeeded. Just claim 5137 this value is always defined. In the PowerPC interpretation 5138 of store-conditional, definedness of the success indication 5139 depends on whether the address of the store matches the 5140 reservation address. But we can't tell that here (and 5141 anyway, we're not being PowerPC-specific). At least we are 5142 guaranteed that the definedness of the store address, and its 5143 addressibility, will be checked as per normal. So it seems 5144 pretty safe to just say that the success indication is always 5145 defined. 5146 5147 In schemeS, for origin tracking, we must correspondingly set 5148 a no-origin value for the origin shadow of .result. 5149 */ 5150 tl_assert(resTy == Ity_I1); 5151 assign( 'V', mce, resTmp, definedOfType(resTy) ); 5152 } 5153} 5154 5155 5156/*------------------------------------------------------------*/ 5157/*--- Memcheck main ---*/ 5158/*------------------------------------------------------------*/ 5159 5160static void schemeS ( MCEnv* mce, IRStmt* st ); 5161 5162static Bool isBogusAtom ( IRAtom* at ) 5163{ 5164 ULong n = 0; 5165 IRConst* con; 5166 tl_assert(isIRAtom(at)); 5167 if (at->tag == Iex_RdTmp) 5168 return False; 5169 tl_assert(at->tag == Iex_Const); 5170 con = at->Iex.Const.con; 5171 switch (con->tag) { 5172 case Ico_U1: return False; 5173 case Ico_U8: n = (ULong)con->Ico.U8; break; 5174 case Ico_U16: n = (ULong)con->Ico.U16; break; 5175 case Ico_U32: n = (ULong)con->Ico.U32; break; 5176 case Ico_U64: n = (ULong)con->Ico.U64; break; 5177 case Ico_F64: return False; 5178 case Ico_F32i: return False; 5179 case Ico_F64i: return False; 5180 case Ico_V128: return False; 5181 default: ppIRExpr(at); tl_assert(0); 5182 } 5183 /* VG_(printf)("%llx\n", n); */ 5184 return (/*32*/ n == 0xFEFEFEFFULL 5185 /*32*/ || n == 0x80808080ULL 5186 /*32*/ || n == 0x7F7F7F7FULL 5187 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL 5188 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL 5189 /*64*/ || n == 0x0000000000008080ULL 5190 /*64*/ || n == 0x8080808080808080ULL 5191 /*64*/ || n == 0x0101010101010101ULL 5192 ); 5193} 5194 5195static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 5196{ 5197 Int i; 5198 IRExpr* e; 5199 IRDirty* d; 5200 IRCAS* cas; 5201 switch (st->tag) { 5202 case Ist_WrTmp: 5203 e = st->Ist.WrTmp.data; 5204 switch (e->tag) { 5205 case Iex_Get: 5206 case Iex_RdTmp: 5207 return False; 5208 case Iex_Const: 5209 return isBogusAtom(e); 5210 case Iex_Unop: 5211 return isBogusAtom(e->Iex.Unop.arg); 5212 case Iex_GetI: 5213 return isBogusAtom(e->Iex.GetI.ix); 5214 case Iex_Binop: 5215 return isBogusAtom(e->Iex.Binop.arg1) 5216 || isBogusAtom(e->Iex.Binop.arg2); 5217 case Iex_Triop: 5218 return isBogusAtom(e->Iex.Triop.details->arg1) 5219 || isBogusAtom(e->Iex.Triop.details->arg2) 5220 || isBogusAtom(e->Iex.Triop.details->arg3); 5221 case Iex_Qop: 5222 return isBogusAtom(e->Iex.Qop.details->arg1) 5223 || isBogusAtom(e->Iex.Qop.details->arg2) 5224 || isBogusAtom(e->Iex.Qop.details->arg3) 5225 || isBogusAtom(e->Iex.Qop.details->arg4); 5226 case Iex_Mux0X: 5227 return isBogusAtom(e->Iex.Mux0X.cond) 5228 || isBogusAtom(e->Iex.Mux0X.expr0) 5229 || isBogusAtom(e->Iex.Mux0X.exprX); 5230 case Iex_Load: 5231 return isBogusAtom(e->Iex.Load.addr); 5232 case Iex_CCall: 5233 for (i = 0; e->Iex.CCall.args[i]; i++) 5234 if (isBogusAtom(e->Iex.CCall.args[i])) 5235 return True; 5236 return False; 5237 default: 5238 goto unhandled; 5239 } 5240 case Ist_Dirty: 5241 d = st->Ist.Dirty.details; 5242 for (i = 0; d->args[i]; i++) 5243 if (isBogusAtom(d->args[i])) 5244 return True; 5245 if (d->guard && isBogusAtom(d->guard)) 5246 return True; 5247 if (d->mAddr && isBogusAtom(d->mAddr)) 5248 return True; 5249 return False; 5250 case Ist_Put: 5251 return isBogusAtom(st->Ist.Put.data); 5252 case Ist_PutI: 5253 return isBogusAtom(st->Ist.PutI.details->ix) 5254 || isBogusAtom(st->Ist.PutI.details->data); 5255 case Ist_Store: 5256 return isBogusAtom(st->Ist.Store.addr) 5257 || isBogusAtom(st->Ist.Store.data); 5258 case Ist_Exit: 5259 return isBogusAtom(st->Ist.Exit.guard); 5260 case Ist_AbiHint: 5261 return isBogusAtom(st->Ist.AbiHint.base) 5262 || isBogusAtom(st->Ist.AbiHint.nia); 5263 case Ist_NoOp: 5264 case Ist_IMark: 5265 case Ist_MBE: 5266 return False; 5267 case Ist_CAS: 5268 cas = st->Ist.CAS.details; 5269 return isBogusAtom(cas->addr) 5270 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False) 5271 || isBogusAtom(cas->expdLo) 5272 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False) 5273 || isBogusAtom(cas->dataLo); 5274 case Ist_LLSC: 5275 return isBogusAtom(st->Ist.LLSC.addr) 5276 || (st->Ist.LLSC.storedata 5277 ? isBogusAtom(st->Ist.LLSC.storedata) 5278 : False); 5279 default: 5280 unhandled: 5281 ppIRStmt(st); 5282 VG_(tool_panic)("hasBogusLiterals"); 5283 } 5284} 5285 5286 5287IRSB* MC_(instrument) ( VgCallbackClosure* closure, 5288 IRSB* sb_in, 5289 VexGuestLayout* layout, 5290 VexGuestExtents* vge, 5291 IRType gWordTy, IRType hWordTy ) 5292{ 5293 Bool verboze = 0||False; 5294 Bool bogus; 5295 Int i, j, first_stmt; 5296 IRStmt* st; 5297 MCEnv mce; 5298 IRSB* sb_out; 5299 5300 if (gWordTy != hWordTy) { 5301 /* We don't currently support this case. */ 5302 VG_(tool_panic)("host/guest word size mismatch"); 5303 } 5304 5305 /* Check we're not completely nuts */ 5306 tl_assert(sizeof(UWord) == sizeof(void*)); 5307 tl_assert(sizeof(Word) == sizeof(void*)); 5308 tl_assert(sizeof(Addr) == sizeof(void*)); 5309 tl_assert(sizeof(ULong) == 8); 5310 tl_assert(sizeof(Long) == 8); 5311 tl_assert(sizeof(Addr64) == 8); 5312 tl_assert(sizeof(UInt) == 4); 5313 tl_assert(sizeof(Int) == 4); 5314 5315 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3); 5316 5317 /* Set up SB */ 5318 sb_out = deepCopyIRSBExceptStmts(sb_in); 5319 5320 /* Set up the running environment. Both .sb and .tmpMap are 5321 modified as we go along. Note that tmps are added to both 5322 .sb->tyenv and .tmpMap together, so the valid index-set for 5323 those two arrays should always be identical. */ 5324 VG_(memset)(&mce, 0, sizeof(mce)); 5325 mce.sb = sb_out; 5326 mce.trace = verboze; 5327 mce.layout = layout; 5328 mce.hWordTy = hWordTy; 5329 mce.bogusLiterals = False; 5330 5331 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on 5332 Darwin. 10.7 is mostly built with LLVM, which uses these for 5333 bitfield inserts, and we get a lot of false errors if the cheap 5334 interpretation is used, alas. Could solve this much better if 5335 we knew which of such adds came from x86/amd64 LEA instructions, 5336 since these are the only ones really needing the expensive 5337 interpretation, but that would require some way to tag them in 5338 the _toIR.c front ends, which is a lot of faffing around. So 5339 for now just use the slow and blunt-instrument solution. */ 5340 mce.useLLVMworkarounds = False; 5341# if defined(VGO_darwin) 5342 mce.useLLVMworkarounds = True; 5343# endif 5344 5345 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free), 5346 sizeof(TempMapEnt)); 5347 for (i = 0; i < sb_in->tyenv->types_used; i++) { 5348 TempMapEnt ent; 5349 ent.kind = Orig; 5350 ent.shadowV = IRTemp_INVALID; 5351 ent.shadowB = IRTemp_INVALID; 5352 VG_(addToXA)( mce.tmpMap, &ent ); 5353 } 5354 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used ); 5355 5356 /* Make a preliminary inspection of the statements, to see if there 5357 are any dodgy-looking literals. If there are, we generate 5358 extra-detailed (hence extra-expensive) instrumentation in 5359 places. Scan the whole bb even if dodgyness is found earlier, 5360 so that the flatness assertion is applied to all stmts. */ 5361 5362 bogus = False; 5363 5364 for (i = 0; i < sb_in->stmts_used; i++) { 5365 5366 st = sb_in->stmts[i]; 5367 tl_assert(st); 5368 tl_assert(isFlatIRStmt(st)); 5369 5370 if (!bogus) { 5371 bogus = checkForBogusLiterals(st); 5372 if (0 && bogus) { 5373 VG_(printf)("bogus: "); 5374 ppIRStmt(st); 5375 VG_(printf)("\n"); 5376 } 5377 } 5378 5379 } 5380 5381 mce.bogusLiterals = bogus; 5382 5383 /* Copy verbatim any IR preamble preceding the first IMark */ 5384 5385 tl_assert(mce.sb == sb_out); 5386 tl_assert(mce.sb != sb_in); 5387 5388 i = 0; 5389 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) { 5390 5391 st = sb_in->stmts[i]; 5392 tl_assert(st); 5393 tl_assert(isFlatIRStmt(st)); 5394 5395 stmt( 'C', &mce, sb_in->stmts[i] ); 5396 i++; 5397 } 5398 5399 /* Nasty problem. IR optimisation of the pre-instrumented IR may 5400 cause the IR following the preamble to contain references to IR 5401 temporaries defined in the preamble. Because the preamble isn't 5402 instrumented, these temporaries don't have any shadows. 5403 Nevertheless uses of them following the preamble will cause 5404 memcheck to generate references to their shadows. End effect is 5405 to cause IR sanity check failures, due to references to 5406 non-existent shadows. This is only evident for the complex 5407 preambles used for function wrapping on TOC-afflicted platforms 5408 (ppc64-linux). 5409 5410 The following loop therefore scans the preamble looking for 5411 assignments to temporaries. For each one found it creates an 5412 assignment to the corresponding (V) shadow temp, marking it as 5413 'defined'. This is the same resulting IR as if the main 5414 instrumentation loop before had been applied to the statement 5415 'tmp = CONSTANT'. 5416 5417 Similarly, if origin tracking is enabled, we must generate an 5418 assignment for the corresponding origin (B) shadow, claiming 5419 no-origin, as appropriate for a defined value. 5420 */ 5421 for (j = 0; j < i; j++) { 5422 if (sb_in->stmts[j]->tag == Ist_WrTmp) { 5423 /* findShadowTmpV checks its arg is an original tmp; 5424 no need to assert that here. */ 5425 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp; 5426 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o); 5427 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v); 5428 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) ); 5429 if (MC_(clo_mc_level) == 3) { 5430 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o); 5431 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32); 5432 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); 5433 } 5434 if (0) { 5435 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j); 5436 ppIRType( ty_v ); 5437 VG_(printf)("\n"); 5438 } 5439 } 5440 } 5441 5442 /* Iterate over the remaining stmts to generate instrumentation. */ 5443 5444 tl_assert(sb_in->stmts_used > 0); 5445 tl_assert(i >= 0); 5446 tl_assert(i < sb_in->stmts_used); 5447 tl_assert(sb_in->stmts[i]->tag == Ist_IMark); 5448 5449 for (/* use current i*/; i < sb_in->stmts_used; i++) { 5450 5451 st = sb_in->stmts[i]; 5452 first_stmt = sb_out->stmts_used; 5453 5454 if (verboze) { 5455 VG_(printf)("\n"); 5456 ppIRStmt(st); 5457 VG_(printf)("\n"); 5458 } 5459 5460 if (MC_(clo_mc_level) == 3) { 5461 /* See comments on case Ist_CAS below. */ 5462 if (st->tag != Ist_CAS) 5463 schemeS( &mce, st ); 5464 } 5465 5466 /* Generate instrumentation code for each stmt ... */ 5467 5468 switch (st->tag) { 5469 5470 case Ist_WrTmp: 5471 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp), 5472 expr2vbits( &mce, st->Ist.WrTmp.data) ); 5473 break; 5474 5475 case Ist_Put: 5476 do_shadow_PUT( &mce, 5477 st->Ist.Put.offset, 5478 st->Ist.Put.data, 5479 NULL /* shadow atom */, NULL /* guard */ ); 5480 break; 5481 5482 case Ist_PutI: 5483 do_shadow_PUTI( &mce, st->Ist.PutI.details); 5484 break; 5485 5486 case Ist_Store: 5487 do_shadow_Store( &mce, st->Ist.Store.end, 5488 st->Ist.Store.addr, 0/* addr bias */, 5489 st->Ist.Store.data, 5490 NULL /* shadow data */, 5491 NULL/*guard*/ ); 5492 break; 5493 5494 case Ist_Exit: 5495 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL ); 5496 break; 5497 5498 case Ist_IMark: 5499 break; 5500 5501 case Ist_NoOp: 5502 case Ist_MBE: 5503 break; 5504 5505 case Ist_Dirty: 5506 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 5507 break; 5508 5509 case Ist_AbiHint: 5510 do_AbiHint( &mce, st->Ist.AbiHint.base, 5511 st->Ist.AbiHint.len, 5512 st->Ist.AbiHint.nia ); 5513 break; 5514 5515 case Ist_CAS: 5516 do_shadow_CAS( &mce, st->Ist.CAS.details ); 5517 /* Note, do_shadow_CAS copies the CAS itself to the output 5518 block, because it needs to add instrumentation both 5519 before and after it. Hence skip the copy below. Also 5520 skip the origin-tracking stuff (call to schemeS) above, 5521 since that's all tangled up with it too; do_shadow_CAS 5522 does it all. */ 5523 break; 5524 5525 case Ist_LLSC: 5526 do_shadow_LLSC( &mce, 5527 st->Ist.LLSC.end, 5528 st->Ist.LLSC.result, 5529 st->Ist.LLSC.addr, 5530 st->Ist.LLSC.storedata ); 5531 break; 5532 5533 default: 5534 VG_(printf)("\n"); 5535 ppIRStmt(st); 5536 VG_(printf)("\n"); 5537 VG_(tool_panic)("memcheck: unhandled IRStmt"); 5538 5539 } /* switch (st->tag) */ 5540 5541 if (0 && verboze) { 5542 for (j = first_stmt; j < sb_out->stmts_used; j++) { 5543 VG_(printf)(" "); 5544 ppIRStmt(sb_out->stmts[j]); 5545 VG_(printf)("\n"); 5546 } 5547 VG_(printf)("\n"); 5548 } 5549 5550 /* ... and finally copy the stmt itself to the output. Except, 5551 skip the copy of IRCASs; see comments on case Ist_CAS 5552 above. */ 5553 if (st->tag != Ist_CAS) 5554 stmt('C', &mce, st); 5555 } 5556 5557 /* Now we need to complain if the jump target is undefined. */ 5558 first_stmt = sb_out->stmts_used; 5559 5560 if (verboze) { 5561 VG_(printf)("sb_in->next = "); 5562 ppIRExpr(sb_in->next); 5563 VG_(printf)("\n\n"); 5564 } 5565 5566 complainIfUndefined( &mce, sb_in->next, NULL ); 5567 5568 if (0 && verboze) { 5569 for (j = first_stmt; j < sb_out->stmts_used; j++) { 5570 VG_(printf)(" "); 5571 ppIRStmt(sb_out->stmts[j]); 5572 VG_(printf)("\n"); 5573 } 5574 VG_(printf)("\n"); 5575 } 5576 5577 /* If this fails, there's been some serious snafu with tmp management, 5578 that should be investigated. */ 5579 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used ); 5580 VG_(deleteXA)( mce.tmpMap ); 5581 5582 tl_assert(mce.sb == sb_out); 5583 return sb_out; 5584} 5585 5586/*------------------------------------------------------------*/ 5587/*--- Post-tree-build final tidying ---*/ 5588/*------------------------------------------------------------*/ 5589 5590/* This exploits the observation that Memcheck often produces 5591 repeated conditional calls of the form 5592 5593 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag) 5594 5595 with the same guard expression G guarding the same helper call. 5596 The second and subsequent calls are redundant. This usually 5597 results from instrumentation of guest code containing multiple 5598 memory references at different constant offsets from the same base 5599 register. After optimisation of the instrumentation, you get a 5600 test for the definedness of the base register for each memory 5601 reference, which is kinda pointless. MC_(final_tidy) therefore 5602 looks for such repeated calls and removes all but the first. */ 5603 5604/* A struct for recording which (helper, guard) pairs we have already 5605 seen. */ 5606typedef 5607 struct { void* entry; IRExpr* guard; } 5608 Pair; 5609 5610/* Return True if e1 and e2 definitely denote the same value (used to 5611 compare guards). Return False if unknown; False is the safe 5612 answer. Since guest registers and guest memory do not have the 5613 SSA property we must return False if any Gets or Loads appear in 5614 the expression. */ 5615 5616static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) 5617{ 5618 if (e1->tag != e2->tag) 5619 return False; 5620 switch (e1->tag) { 5621 case Iex_Const: 5622 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con ); 5623 case Iex_Binop: 5624 return e1->Iex.Binop.op == e2->Iex.Binop.op 5625 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1) 5626 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2); 5627 case Iex_Unop: 5628 return e1->Iex.Unop.op == e2->Iex.Unop.op 5629 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg); 5630 case Iex_RdTmp: 5631 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp; 5632 case Iex_Mux0X: 5633 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond ) 5634 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 ) 5635 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX ); 5636 case Iex_Qop: 5637 case Iex_Triop: 5638 case Iex_CCall: 5639 /* be lazy. Could define equality for these, but they never 5640 appear to be used. */ 5641 return False; 5642 case Iex_Get: 5643 case Iex_GetI: 5644 case Iex_Load: 5645 /* be conservative - these may not give the same value each 5646 time */ 5647 return False; 5648 case Iex_Binder: 5649 /* should never see this */ 5650 /* fallthrough */ 5651 default: 5652 VG_(printf)("mc_translate.c: sameIRValue: unhandled: "); 5653 ppIRExpr(e1); 5654 VG_(tool_panic)("memcheck:sameIRValue"); 5655 return False; 5656 } 5657} 5658 5659/* See if 'pairs' already has an entry for (entry, guard). Return 5660 True if so. If not, add an entry. */ 5661 5662static 5663Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry ) 5664{ 5665 Pair p; 5666 Pair* pp; 5667 Int i, n = VG_(sizeXA)( pairs ); 5668 for (i = 0; i < n; i++) { 5669 pp = VG_(indexXA)( pairs, i ); 5670 if (pp->entry == entry && sameIRValue(pp->guard, guard)) 5671 return True; 5672 } 5673 p.guard = guard; 5674 p.entry = entry; 5675 VG_(addToXA)( pairs, &p ); 5676 return False; 5677} 5678 5679static Bool is_helperc_value_checkN_fail ( HChar* name ) 5680{ 5681 return 5682 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)") 5683 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)") 5684 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)") 5685 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)") 5686 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)") 5687 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)") 5688 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)") 5689 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)"); 5690} 5691 5692IRSB* MC_(final_tidy) ( IRSB* sb_in ) 5693{ 5694 Int i; 5695 IRStmt* st; 5696 IRDirty* di; 5697 IRExpr* guard; 5698 IRCallee* cee; 5699 Bool alreadyPresent; 5700 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1", 5701 VG_(free), sizeof(Pair) ); 5702 /* Scan forwards through the statements. Each time a call to one 5703 of the relevant helpers is seen, check if we have made a 5704 previous call to the same helper using the same guard 5705 expression, and if so, delete the call. */ 5706 for (i = 0; i < sb_in->stmts_used; i++) { 5707 st = sb_in->stmts[i]; 5708 tl_assert(st); 5709 if (st->tag != Ist_Dirty) 5710 continue; 5711 di = st->Ist.Dirty.details; 5712 guard = di->guard; 5713 if (!guard) 5714 continue; 5715 if (0) { ppIRExpr(guard); VG_(printf)("\n"); } 5716 cee = di->cee; 5717 if (!is_helperc_value_checkN_fail( cee->name )) 5718 continue; 5719 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with 5720 guard 'guard'. Check if we have already seen a call to this 5721 function with the same guard. If so, delete it. If not, 5722 add it to the set of calls we do know about. */ 5723 alreadyPresent = check_or_add( pairs, guard, cee->addr ); 5724 if (alreadyPresent) { 5725 sb_in->stmts[i] = IRStmt_NoOp(); 5726 if (0) VG_(printf)("XX\n"); 5727 } 5728 } 5729 VG_(deleteXA)( pairs ); 5730 return sb_in; 5731} 5732 5733 5734/*------------------------------------------------------------*/ 5735/*--- Origin tracking stuff ---*/ 5736/*------------------------------------------------------------*/ 5737 5738/* Almost identical to findShadowTmpV. */ 5739static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig ) 5740{ 5741 TempMapEnt* ent; 5742 /* VG_(indexXA) range-checks 'orig', hence no need to check 5743 here. */ 5744 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5745 tl_assert(ent->kind == Orig); 5746 if (ent->shadowB == IRTemp_INVALID) { 5747 IRTemp tmpB 5748 = newTemp( mce, Ity_I32, BSh ); 5749 /* newTemp may cause mce->tmpMap to resize, hence previous results 5750 from VG_(indexXA) are invalid. */ 5751 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5752 tl_assert(ent->kind == Orig); 5753 tl_assert(ent->shadowB == IRTemp_INVALID); 5754 ent->shadowB = tmpB; 5755 } 5756 return ent->shadowB; 5757} 5758 5759static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ) 5760{ 5761 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) ); 5762} 5763 5764static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 5765 IRAtom* baseaddr, Int offset ) 5766{ 5767 void* hFun; 5768 HChar* hName; 5769 IRTemp bTmp; 5770 IRDirty* di; 5771 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5772 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5773 IRAtom* ea = baseaddr; 5774 if (offset != 0) { 5775 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5776 : mkU64( (Long)(Int)offset ); 5777 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5778 } 5779 bTmp = newTemp(mce, mce->hWordTy, BSh); 5780 5781 switch (szB) { 5782 case 1: hFun = (void*)&MC_(helperc_b_load1); 5783 hName = "MC_(helperc_b_load1)"; 5784 break; 5785 case 2: hFun = (void*)&MC_(helperc_b_load2); 5786 hName = "MC_(helperc_b_load2)"; 5787 break; 5788 case 4: hFun = (void*)&MC_(helperc_b_load4); 5789 hName = "MC_(helperc_b_load4)"; 5790 break; 5791 case 8: hFun = (void*)&MC_(helperc_b_load8); 5792 hName = "MC_(helperc_b_load8)"; 5793 break; 5794 case 16: hFun = (void*)&MC_(helperc_b_load16); 5795 hName = "MC_(helperc_b_load16)"; 5796 break; 5797 case 32: hFun = (void*)&MC_(helperc_b_load32); 5798 hName = "MC_(helperc_b_load32)"; 5799 break; 5800 default: 5801 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB); 5802 tl_assert(0); 5803 } 5804 di = unsafeIRDirty_1_N( 5805 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ), 5806 mkIRExprVec_1( ea ) 5807 ); 5808 /* no need to mess with any annotations. This call accesses 5809 neither guest state nor guest memory. */ 5810 stmt( 'B', mce, IRStmt_Dirty(di) ); 5811 if (mce->hWordTy == Ity_I64) { 5812 /* 64-bit host */ 5813 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh); 5814 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) ); 5815 return mkexpr(bTmp32); 5816 } else { 5817 /* 32-bit host */ 5818 return mkexpr(bTmp); 5819 } 5820} 5821 5822static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr, 5823 Int offset, IRAtom* guard ) 5824{ 5825 if (guard) { 5826 IRAtom *cond, *iffalse, *iftrue; 5827 5828 cond = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard)); 5829 iftrue = assignNew('B', mce, Ity_I32, 5830 gen_load_b(mce, szB, baseaddr, offset)); 5831 iffalse = mkU32(0); 5832 5833 return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue)); 5834 } 5835 5836 return gen_load_b(mce, szB, baseaddr, offset); 5837} 5838 5839/* Generate a shadow store. guard :: Ity_I1 controls whether the 5840 store really happens; NULL means it unconditionally does. */ 5841static void gen_store_b ( MCEnv* mce, Int szB, 5842 IRAtom* baseaddr, Int offset, IRAtom* dataB, 5843 IRAtom* guard ) 5844{ 5845 void* hFun; 5846 HChar* hName; 5847 IRDirty* di; 5848 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5849 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5850 IRAtom* ea = baseaddr; 5851 if (guard) { 5852 tl_assert(isOriginalAtom(mce, guard)); 5853 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 5854 } 5855 if (offset != 0) { 5856 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5857 : mkU64( (Long)(Int)offset ); 5858 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5859 } 5860 if (mce->hWordTy == Ity_I64) 5861 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB)); 5862 5863 switch (szB) { 5864 case 1: hFun = (void*)&MC_(helperc_b_store1); 5865 hName = "MC_(helperc_b_store1)"; 5866 break; 5867 case 2: hFun = (void*)&MC_(helperc_b_store2); 5868 hName = "MC_(helperc_b_store2)"; 5869 break; 5870 case 4: hFun = (void*)&MC_(helperc_b_store4); 5871 hName = "MC_(helperc_b_store4)"; 5872 break; 5873 case 8: hFun = (void*)&MC_(helperc_b_store8); 5874 hName = "MC_(helperc_b_store8)"; 5875 break; 5876 case 16: hFun = (void*)&MC_(helperc_b_store16); 5877 hName = "MC_(helperc_b_store16)"; 5878 break; 5879 case 32: hFun = (void*)&MC_(helperc_b_store32); 5880 hName = "MC_(helperc_b_store32)"; 5881 break; 5882 default: 5883 tl_assert(0); 5884 } 5885 di = unsafeIRDirty_0_N( 2/*regparms*/, 5886 hName, VG_(fnptr_to_fnentry)( hFun ), 5887 mkIRExprVec_2( ea, dataB ) 5888 ); 5889 /* no need to mess with any annotations. This call accesses 5890 neither guest state nor guest memory. */ 5891 if (guard) di->guard = guard; 5892 stmt( 'B', mce, IRStmt_Dirty(di) ); 5893} 5894 5895static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { 5896 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 5897 if (eTy == Ity_I64) 5898 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) ); 5899 if (eTy == Ity_I32) 5900 return e; 5901 tl_assert(0); 5902} 5903 5904static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) { 5905 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 5906 tl_assert(eTy == Ity_I32); 5907 if (dstTy == Ity_I64) 5908 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) ); 5909 tl_assert(0); 5910} 5911 5912 5913static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) 5914{ 5915 tl_assert(MC_(clo_mc_level) == 3); 5916 5917 switch (e->tag) { 5918 5919 case Iex_GetI: { 5920 IRRegArray* descr_b; 5921 IRAtom *t1, *t2, *t3, *t4; 5922 IRRegArray* descr = e->Iex.GetI.descr; 5923 IRType equivIntTy 5924 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 5925 /* If this array is unshadowable for whatever reason, use the 5926 usual approximation. */ 5927 if (equivIntTy == Ity_INVALID) 5928 return mkU32(0); 5929 tl_assert(sizeofIRType(equivIntTy) >= 4); 5930 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 5931 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 5932 equivIntTy, descr->nElems ); 5933 /* Do a shadow indexed get of the same size, giving t1. Take 5934 the bottom 32 bits of it, giving t2. Compute into t3 the 5935 origin for the index (almost certainly zero, but there's 5936 no harm in being completely general here, since iropt will 5937 remove any useless code), and fold it in, giving a final 5938 value t4. */ 5939 t1 = assignNew( 'B', mce, equivIntTy, 5940 IRExpr_GetI( descr_b, e->Iex.GetI.ix, 5941 e->Iex.GetI.bias )); 5942 t2 = narrowTo32( mce, t1 ); 5943 t3 = schemeE( mce, e->Iex.GetI.ix ); 5944 t4 = gen_maxU32( mce, t2, t3 ); 5945 return t4; 5946 } 5947 case Iex_CCall: { 5948 Int i; 5949 IRAtom* here; 5950 IRExpr** args = e->Iex.CCall.args; 5951 IRAtom* curr = mkU32(0); 5952 for (i = 0; args[i]; i++) { 5953 tl_assert(i < 32); 5954 tl_assert(isOriginalAtom(mce, args[i])); 5955 /* Only take notice of this arg if the callee's 5956 mc-exclusion mask does not say it is to be excluded. */ 5957 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) { 5958 /* the arg is to be excluded from definedness checking. 5959 Do nothing. */ 5960 if (0) VG_(printf)("excluding %s(%d)\n", 5961 e->Iex.CCall.cee->name, i); 5962 } else { 5963 /* calculate the arg's definedness, and pessimistically 5964 merge it in. */ 5965 here = schemeE( mce, args[i] ); 5966 curr = gen_maxU32( mce, curr, here ); 5967 } 5968 } 5969 return curr; 5970 } 5971 case Iex_Load: { 5972 Int dszB; 5973 dszB = sizeofIRType(e->Iex.Load.ty); 5974 /* assert that the B value for the address is already 5975 available (somewhere) */ 5976 tl_assert(isIRAtom(e->Iex.Load.addr)); 5977 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64); 5978 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 ); 5979 } 5980 case Iex_Mux0X: { 5981 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond ); 5982 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 ); 5983 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX ); 5984 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 )); 5985 } 5986 case Iex_Qop: { 5987 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 ); 5988 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 ); 5989 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 ); 5990 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 ); 5991 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ), 5992 gen_maxU32( mce, b3, b4 ) ); 5993 } 5994 case Iex_Triop: { 5995 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 ); 5996 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 ); 5997 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 ); 5998 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) ); 5999 } 6000 case Iex_Binop: { 6001 switch (e->Iex.Binop.op) { 6002 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 6003 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 6004 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 6005 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 6006 /* Just say these all produce a defined result, 6007 regardless of their arguments. See 6008 COMMENT_ON_CasCmpEQ in this file. */ 6009 return mkU32(0); 6010 default: { 6011 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 ); 6012 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 ); 6013 return gen_maxU32( mce, b1, b2 ); 6014 } 6015 } 6016 tl_assert(0); 6017 /*NOTREACHED*/ 6018 } 6019 case Iex_Unop: { 6020 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg ); 6021 return b1; 6022 } 6023 case Iex_Const: 6024 return mkU32(0); 6025 case Iex_RdTmp: 6026 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp )); 6027 case Iex_Get: { 6028 Int b_offset = MC_(get_otrack_shadow_offset)( 6029 e->Iex.Get.offset, 6030 sizeofIRType(e->Iex.Get.ty) 6031 ); 6032 tl_assert(b_offset >= -1 6033 && b_offset <= mce->layout->total_sizeB -4); 6034 if (b_offset >= 0) { 6035 /* FIXME: this isn't an atom! */ 6036 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB, 6037 Ity_I32 ); 6038 } 6039 return mkU32(0); 6040 } 6041 default: 6042 VG_(printf)("mc_translate.c: schemeE: unhandled: "); 6043 ppIRExpr(e); 6044 VG_(tool_panic)("memcheck:schemeE"); 6045 } 6046} 6047 6048 6049static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) 6050{ 6051 // This is a hacked version of do_shadow_Dirty 6052 Int i, k, n, toDo, gSz, gOff; 6053 IRAtom *here, *curr; 6054 IRTemp dst; 6055 6056 /* First check the guard. */ 6057 curr = schemeE( mce, d->guard ); 6058 6059 /* Now round up all inputs and maxU32 over them. */ 6060 6061 /* Inputs: unmasked args 6062 Note: arguments are evaluated REGARDLESS of the guard expression */ 6063 for (i = 0; d->args[i]; i++) { 6064 if (d->cee->mcx_mask & (1<<i)) { 6065 /* ignore this arg */ 6066 } else { 6067 here = schemeE( mce, d->args[i] ); 6068 curr = gen_maxU32( mce, curr, here ); 6069 } 6070 } 6071 6072 /* Inputs: guest state that we read. */ 6073 for (i = 0; i < d->nFxState; i++) { 6074 tl_assert(d->fxState[i].fx != Ifx_None); 6075 if (d->fxState[i].fx == Ifx_Write) 6076 continue; 6077 6078 /* Enumerate the described state segments */ 6079 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 6080 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 6081 gSz = d->fxState[i].size; 6082 6083 /* Ignore any sections marked as 'always defined'. */ 6084 if (isAlwaysDefd(mce, gOff, gSz)) { 6085 if (0) 6086 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 6087 gOff, gSz); 6088 continue; 6089 } 6090 6091 /* This state element is read or modified. So we need to 6092 consider it. If larger than 4 bytes, deal with it in 6093 4-byte chunks. */ 6094 while (True) { 6095 Int b_offset; 6096 tl_assert(gSz >= 0); 6097 if (gSz == 0) break; 6098 n = gSz <= 4 ? gSz : 4; 6099 /* update 'curr' with maxU32 of the state slice 6100 gOff .. gOff+n-1 */ 6101 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 6102 if (b_offset != -1) { 6103 /* Observe the guard expression. If it is false use 0, i.e. 6104 nothing is known about the origin */ 6105 IRAtom *cond, *iffalse, *iftrue; 6106 6107 cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard)); 6108 iffalse = mkU32(0); 6109 iftrue = assignNew( 'B', mce, Ity_I32, 6110 IRExpr_Get(b_offset 6111 + 2*mce->layout->total_sizeB, 6112 Ity_I32)); 6113 here = assignNew( 'B', mce, Ity_I32, 6114 IRExpr_Mux0X(cond, iffalse, iftrue)); 6115 curr = gen_maxU32( mce, curr, here ); 6116 } 6117 gSz -= n; 6118 gOff += n; 6119 } 6120 } 6121 } 6122 6123 /* Inputs: memory */ 6124 6125 if (d->mFx != Ifx_None) { 6126 /* Because we may do multiple shadow loads/stores from the same 6127 base address, it's best to do a single test of its 6128 definedness right now. Post-instrumentation optimisation 6129 should remove all but this test. */ 6130 tl_assert(d->mAddr); 6131 here = schemeE( mce, d->mAddr ); 6132 curr = gen_maxU32( mce, curr, here ); 6133 } 6134 6135 /* Deal with memory inputs (reads or modifies) */ 6136 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 6137 toDo = d->mSize; 6138 /* chew off 32-bit chunks. We don't care about the endianness 6139 since it's all going to be condensed down to a single bit, 6140 but nevertheless choose an endianness which is hopefully 6141 native to the platform. */ 6142 while (toDo >= 4) { 6143 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo, 6144 d->guard ); 6145 curr = gen_maxU32( mce, curr, here ); 6146 toDo -= 4; 6147 } 6148 /* handle possible 16-bit excess */ 6149 while (toDo >= 2) { 6150 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo, 6151 d->guard ); 6152 curr = gen_maxU32( mce, curr, here ); 6153 toDo -= 2; 6154 } 6155 /* chew off the remaining 8-bit chunk, if any */ 6156 if (toDo == 1) { 6157 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo, 6158 d->guard ); 6159 curr = gen_maxU32( mce, curr, here ); 6160 toDo -= 1; 6161 } 6162 tl_assert(toDo == 0); 6163 } 6164 6165 /* Whew! So curr is a 32-bit B-value which should give an origin 6166 of some use if any of the inputs to the helper are undefined. 6167 Now we need to re-distribute the results to all destinations. */ 6168 6169 /* Outputs: the destination temporary, if there is one. */ 6170 if (d->tmp != IRTemp_INVALID) { 6171 dst = findShadowTmpB(mce, d->tmp); 6172 assign( 'V', mce, dst, curr ); 6173 } 6174 6175 /* Outputs: guest state that we write or modify. */ 6176 for (i = 0; i < d->nFxState; i++) { 6177 tl_assert(d->fxState[i].fx != Ifx_None); 6178 if (d->fxState[i].fx == Ifx_Read) 6179 continue; 6180 6181 /* Enumerate the described state segments */ 6182 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 6183 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 6184 gSz = d->fxState[i].size; 6185 6186 /* Ignore any sections marked as 'always defined'. */ 6187 if (isAlwaysDefd(mce, gOff, gSz)) 6188 continue; 6189 6190 /* This state element is written or modified. So we need to 6191 consider it. If larger than 4 bytes, deal with it in 6192 4-byte chunks. */ 6193 while (True) { 6194 Int b_offset; 6195 tl_assert(gSz >= 0); 6196 if (gSz == 0) break; 6197 n = gSz <= 4 ? gSz : 4; 6198 /* Write 'curr' to the state slice gOff .. gOff+n-1 */ 6199 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 6200 if (b_offset != -1) { 6201 if (d->guard) { 6202 /* If the guard expression evaluates to false we simply Put 6203 the value that is already stored in the guest state slot */ 6204 IRAtom *cond, *iffalse; 6205 6206 cond = assignNew('B', mce, Ity_I8, 6207 unop(Iop_1Uto8, d->guard)); 6208 iffalse = assignNew('B', mce, Ity_I32, 6209 IRExpr_Get(b_offset + 6210 2*mce->layout->total_sizeB, 6211 Ity_I32)); 6212 curr = assignNew('V', mce, Ity_I32, 6213 IRExpr_Mux0X(cond, iffalse, curr)); 6214 } 6215 stmt( 'B', mce, IRStmt_Put(b_offset 6216 + 2*mce->layout->total_sizeB, 6217 curr )); 6218 } 6219 gSz -= n; 6220 gOff += n; 6221 } 6222 } 6223 } 6224 6225 /* Outputs: memory that we write or modify. Same comments about 6226 endianness as above apply. */ 6227 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 6228 toDo = d->mSize; 6229 /* chew off 32-bit chunks */ 6230 while (toDo >= 4) { 6231 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr, 6232 d->guard ); 6233 toDo -= 4; 6234 } 6235 /* handle possible 16-bit excess */ 6236 while (toDo >= 2) { 6237 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr, 6238 d->guard ); 6239 toDo -= 2; 6240 } 6241 /* chew off the remaining 8-bit chunk, if any */ 6242 if (toDo == 1) { 6243 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr, 6244 d->guard ); 6245 toDo -= 1; 6246 } 6247 tl_assert(toDo == 0); 6248 } 6249} 6250 6251 6252static void do_origins_Store ( MCEnv* mce, 6253 IREndness stEnd, 6254 IRExpr* stAddr, 6255 IRExpr* stData ) 6256{ 6257 Int dszB; 6258 IRAtom* dataB; 6259 /* assert that the B value for the address is already available 6260 (somewhere), since the call to schemeE will want to see it. 6261 XXXX how does this actually ensure that?? */ 6262 tl_assert(isIRAtom(stAddr)); 6263 tl_assert(isIRAtom(stData)); 6264 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) ); 6265 dataB = schemeE( mce, stData ); 6266 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, 6267 NULL/*guard*/ ); 6268} 6269 6270 6271static void schemeS ( MCEnv* mce, IRStmt* st ) 6272{ 6273 tl_assert(MC_(clo_mc_level) == 3); 6274 6275 switch (st->tag) { 6276 6277 case Ist_AbiHint: 6278 /* The value-check instrumenter handles this - by arranging 6279 to pass the address of the next instruction to 6280 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to 6281 happen for origin tracking w.r.t. AbiHints. So there is 6282 nothing to do here. */ 6283 break; 6284 6285 case Ist_PutI: { 6286 IRPutI *puti = st->Ist.PutI.details; 6287 IRRegArray* descr_b; 6288 IRAtom *t1, *t2, *t3, *t4; 6289 IRRegArray* descr = puti->descr; 6290 IRType equivIntTy 6291 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 6292 /* If this array is unshadowable for whatever reason, 6293 generate no code. */ 6294 if (equivIntTy == Ity_INVALID) 6295 break; 6296 tl_assert(sizeofIRType(equivIntTy) >= 4); 6297 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 6298 descr_b 6299 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 6300 equivIntTy, descr->nElems ); 6301 /* Compute a value to Put - the conjoinment of the origin for 6302 the data to be Put-ted (obviously) and of the index value 6303 (not so obviously). */ 6304 t1 = schemeE( mce, puti->data ); 6305 t2 = schemeE( mce, puti->ix ); 6306 t3 = gen_maxU32( mce, t1, t2 ); 6307 t4 = zWidenFrom32( mce, equivIntTy, t3 ); 6308 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix, 6309 puti->bias, t4) )); 6310 break; 6311 } 6312 6313 case Ist_Dirty: 6314 do_origins_Dirty( mce, st->Ist.Dirty.details ); 6315 break; 6316 6317 case Ist_Store: 6318 do_origins_Store( mce, st->Ist.Store.end, 6319 st->Ist.Store.addr, 6320 st->Ist.Store.data ); 6321 break; 6322 6323 case Ist_LLSC: { 6324 /* In short: treat a load-linked like a normal load followed 6325 by an assignment of the loaded (shadow) data the result 6326 temporary. Treat a store-conditional like a normal store, 6327 and mark the result temporary as defined. */ 6328 if (st->Ist.LLSC.storedata == NULL) { 6329 /* Load Linked */ 6330 IRType resTy 6331 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); 6332 IRExpr* vanillaLoad 6333 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); 6334 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 6335 || resTy == Ity_I16 || resTy == Ity_I8); 6336 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 6337 schemeE(mce, vanillaLoad)); 6338 } else { 6339 /* Store conditional */ 6340 do_origins_Store( mce, st->Ist.LLSC.end, 6341 st->Ist.LLSC.addr, 6342 st->Ist.LLSC.storedata ); 6343 /* For the rationale behind this, see comments at the 6344 place where the V-shadow for .result is constructed, in 6345 do_shadow_LLSC. In short, we regard .result as 6346 always-defined. */ 6347 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 6348 mkU32(0) ); 6349 } 6350 break; 6351 } 6352 6353 case Ist_Put: { 6354 Int b_offset 6355 = MC_(get_otrack_shadow_offset)( 6356 st->Ist.Put.offset, 6357 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data)) 6358 ); 6359 if (b_offset >= 0) { 6360 /* FIXME: this isn't an atom! */ 6361 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 6362 schemeE( mce, st->Ist.Put.data )) ); 6363 } 6364 break; 6365 } 6366 6367 case Ist_WrTmp: 6368 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp), 6369 schemeE(mce, st->Ist.WrTmp.data) ); 6370 break; 6371 6372 case Ist_MBE: 6373 case Ist_NoOp: 6374 case Ist_Exit: 6375 case Ist_IMark: 6376 break; 6377 6378 default: 6379 VG_(printf)("mc_translate.c: schemeS: unhandled: "); 6380 ppIRStmt(st); 6381 VG_(tool_panic)("memcheck:schemeS"); 6382 } 6383} 6384 6385 6386/*--------------------------------------------------------------------*/ 6387/*--- end mc_translate.c ---*/ 6388/*--------------------------------------------------------------------*/ 6389