mc_translate.c revision e739ac0589b4fb43561f801c4faba8c1b89f8680
1 2/*--------------------------------------------------------------------*/ 3/*--- Instrument IR to perform memory checking operations. ---*/ 4/*--- mc_translate.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of MemCheck, a heavyweight Valgrind tool for 9 detecting memory errors. 10 11 Copyright (C) 2000-2010 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_tool_basics.h" 33#include "pub_tool_hashtable.h" // For mc_include.h 34#include "pub_tool_libcassert.h" 35#include "pub_tool_libcprint.h" 36#include "pub_tool_tooliface.h" 37#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 38#include "pub_tool_xarray.h" 39#include "pub_tool_mallocfree.h" 40#include "pub_tool_libcbase.h" 41 42#include "mc_include.h" 43 44 45/* This file implements the Memcheck instrumentation, and in 46 particular contains the core of its undefined value detection 47 machinery. For a comprehensive background of the terminology, 48 algorithms and rationale used herein, read: 49 50 Using Valgrind to detect undefined value errors with 51 bit-precision 52 53 Julian Seward and Nicholas Nethercote 54 55 2005 USENIX Annual Technical Conference (General Track), 56 Anaheim, CA, USA, April 10-15, 2005. 57 58 ---- 59 60 Here is as good a place as any to record exactly when V bits are and 61 should be checked, why, and what function is responsible. 62 63 64 Memcheck complains when an undefined value is used: 65 66 1. In the condition of a conditional branch. Because it could cause 67 incorrect control flow, and thus cause incorrect externally-visible 68 behaviour. [mc_translate.c:complainIfUndefined] 69 70 2. As an argument to a system call, or as the value that specifies 71 the system call number. Because it could cause an incorrect 72 externally-visible side effect. [mc_translate.c:mc_pre_reg_read] 73 74 3. As the address in a load or store. Because it could cause an 75 incorrect value to be used later, which could cause externally-visible 76 behaviour (eg. via incorrect control flow or an incorrect system call 77 argument) [complainIfUndefined] 78 79 4. As the target address of a branch. Because it could cause incorrect 80 control flow. [complainIfUndefined] 81 82 5. As an argument to setenv, unsetenv, or putenv. Because it could put 83 an incorrect value into the external environment. 84 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)] 85 86 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn). 87 [complainIfUndefined] 88 89 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and 90 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user 91 requested it. [in memcheck.h] 92 93 94 Memcheck also complains, but should not, when an undefined value is used: 95 96 8. As the shift value in certain SIMD shift operations (but not in the 97 standard integer shift operations). This inconsistency is due to 98 historical reasons.) [complainIfUndefined] 99 100 101 Memcheck does not complain, but should, when an undefined value is used: 102 103 9. As an input to a client request. Because the client request may 104 affect the visible behaviour -- see bug #144362 for an example 105 involving the malloc replacements in vg_replace_malloc.c and 106 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument 107 isn't identified. That bug report also has some info on how to solve 108 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST] 109 110 111 In practice, 1 and 2 account for the vast majority of cases. 112*/ 113 114/*------------------------------------------------------------*/ 115/*--- Forward decls ---*/ 116/*------------------------------------------------------------*/ 117 118struct _MCEnv; 119 120static IRType shadowTypeV ( IRType ty ); 121static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 122static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig ); 123 124 125/*------------------------------------------------------------*/ 126/*--- Memcheck running state, and tmp management. ---*/ 127/*------------------------------------------------------------*/ 128 129/* Carries info about a particular tmp. The tmp's number is not 130 recorded, as this is implied by (equal to) its index in the tmpMap 131 in MCEnv. The tmp's type is also not recorded, as this is present 132 in MCEnv.sb->tyenv. 133 134 When .kind is Orig, .shadowV and .shadowB may give the identities 135 of the temps currently holding the associated definedness (shadowV) 136 and origin (shadowB) values, or these may be IRTemp_INVALID if code 137 to compute such values has not yet been emitted. 138 139 When .kind is VSh or BSh then the tmp is holds a V- or B- value, 140 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is 141 illogical for a shadow tmp itself to be shadowed. 142*/ 143typedef 144 enum { Orig=1, VSh=2, BSh=3 } 145 TempKind; 146 147typedef 148 struct { 149 TempKind kind; 150 IRTemp shadowV; 151 IRTemp shadowB; 152 } 153 TempMapEnt; 154 155 156/* Carries around state during memcheck instrumentation. */ 157typedef 158 struct _MCEnv { 159 /* MODIFIED: the superblock being constructed. IRStmts are 160 added. */ 161 IRSB* sb; 162 Bool trace; 163 164 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the 165 current kind and possibly shadow temps for each temp in the 166 IRSB being constructed. Note that it does not contain the 167 type of each tmp. If you want to know the type, look at the 168 relevant entry in sb->tyenv. It follows that at all times 169 during the instrumentation process, the valid indices for 170 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is 171 total number of Orig, V- and B- temps allocated so far. 172 173 The reason for this strange split (types in one place, all 174 other info in another) is that we need the types to be 175 attached to sb so as to make it possible to do 176 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the 177 instrumentation process. */ 178 XArray* /* of TempMapEnt */ tmpMap; 179 180 /* MODIFIED: indicates whether "bogus" literals have so far been 181 found. Starts off False, and may change to True. */ 182 Bool bogusLiterals; 183 184 /* READONLY: the guest layout. This indicates which parts of 185 the guest state should be regarded as 'always defined'. */ 186 VexGuestLayout* layout; 187 188 /* READONLY: the host word type. Needed for constructing 189 arguments of type 'HWord' to be passed to helper functions. 190 Ity_I32 or Ity_I64 only. */ 191 IRType hWordTy; 192 } 193 MCEnv; 194 195/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 196 demand), as they are encountered. This is for two reasons. 197 198 (1) (less important reason): Many original tmps are unused due to 199 initial IR optimisation, and we do not want to spaces in tables 200 tracking them. 201 202 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 203 table indexed [0 .. n_types-1], which gives the current shadow for 204 each original tmp, or INVALID_IRTEMP if none is so far assigned. 205 It is necessary to support making multiple assignments to a shadow 206 -- specifically, after testing a shadow for definedness, it needs 207 to be made defined. But IR's SSA property disallows this. 208 209 (2) (more important reason): Therefore, when a shadow needs to get 210 a new value, a new temporary is created, the value is assigned to 211 that, and the tmpMap is updated to reflect the new binding. 212 213 A corollary is that if the tmpMap maps a given tmp to 214 IRTemp_INVALID and we are hoping to read that shadow tmp, it means 215 there's a read-before-write error in the original tmps. The IR 216 sanity checker should catch all such anomalies, however. 217*/ 218 219/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to 220 both the table in mce->sb and to our auxiliary mapping. Note that 221 newTemp may cause mce->tmpMap to resize, hence previous results 222 from VG_(indexXA)(mce->tmpMap) are invalidated. */ 223static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) 224{ 225 Word newIx; 226 TempMapEnt ent; 227 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty); 228 ent.kind = kind; 229 ent.shadowV = IRTemp_INVALID; 230 ent.shadowB = IRTemp_INVALID; 231 newIx = VG_(addToXA)( mce->tmpMap, &ent ); 232 tl_assert(newIx == (Word)tmp); 233 return tmp; 234} 235 236 237/* Find the tmp currently shadowing the given original tmp. If none 238 so far exists, allocate one. */ 239static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) 240{ 241 TempMapEnt* ent; 242 /* VG_(indexXA) range-checks 'orig', hence no need to check 243 here. */ 244 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 245 tl_assert(ent->kind == Orig); 246 if (ent->shadowV == IRTemp_INVALID) { 247 IRTemp tmpV 248 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 249 /* newTemp may cause mce->tmpMap to resize, hence previous results 250 from VG_(indexXA) are invalid. */ 251 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 252 tl_assert(ent->kind == Orig); 253 tl_assert(ent->shadowV == IRTemp_INVALID); 254 ent->shadowV = tmpV; 255 } 256 return ent->shadowV; 257} 258 259/* Allocate a new shadow for the given original tmp. This means any 260 previous shadow is abandoned. This is needed because it is 261 necessary to give a new value to a shadow once it has been tested 262 for undefinedness, but unfortunately IR's SSA property disallows 263 this. Instead we must abandon the old shadow, allocate a new one 264 and use that instead. 265 266 This is the same as findShadowTmpV, except we don't bother to see 267 if a shadow temp already existed -- we simply allocate a new one 268 regardless. */ 269static void newShadowTmpV ( MCEnv* mce, IRTemp orig ) 270{ 271 TempMapEnt* ent; 272 /* VG_(indexXA) range-checks 'orig', hence no need to check 273 here. */ 274 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 275 tl_assert(ent->kind == Orig); 276 if (1) { 277 IRTemp tmpV 278 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 279 /* newTemp may cause mce->tmpMap to resize, hence previous results 280 from VG_(indexXA) are invalid. */ 281 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 282 tl_assert(ent->kind == Orig); 283 ent->shadowV = tmpV; 284 } 285} 286 287 288/*------------------------------------------------------------*/ 289/*--- IRAtoms -- a subset of IRExprs ---*/ 290/*------------------------------------------------------------*/ 291 292/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 293 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 294 input, most of this code deals in atoms. Usefully, a value atom 295 always has a V-value which is also an atom: constants are shadowed 296 by constants, and temps are shadowed by the corresponding shadow 297 temporary. */ 298 299typedef IRExpr IRAtom; 300 301/* (used for sanity checks only): is this an atom which looks 302 like it's from original code? */ 303static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 304{ 305 if (a1->tag == Iex_Const) 306 return True; 307 if (a1->tag == Iex_RdTmp) { 308 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 309 return ent->kind == Orig; 310 } 311 return False; 312} 313 314/* (used for sanity checks only): is this an atom which looks 315 like it's from shadow code? */ 316static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 317{ 318 if (a1->tag == Iex_Const) 319 return True; 320 if (a1->tag == Iex_RdTmp) { 321 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 322 return ent->kind == VSh || ent->kind == BSh; 323 } 324 return False; 325} 326 327/* (used for sanity checks only): check that both args are atoms and 328 are identically-kinded. */ 329static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 330{ 331 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp) 332 return True; 333 if (a1->tag == Iex_Const && a2->tag == Iex_Const) 334 return True; 335 return False; 336} 337 338 339/*------------------------------------------------------------*/ 340/*--- Type management ---*/ 341/*------------------------------------------------------------*/ 342 343/* Shadow state is always accessed using integer types. This returns 344 an integer type with the same size (as per sizeofIRType) as the 345 given type. The only valid shadow types are Bit, I8, I16, I32, 346 I64, V128. */ 347 348static IRType shadowTypeV ( IRType ty ) 349{ 350 switch (ty) { 351 case Ity_I1: 352 case Ity_I8: 353 case Ity_I16: 354 case Ity_I32: 355 case Ity_I64: 356 case Ity_I128: return ty; 357 case Ity_F32: return Ity_I32; 358 case Ity_F64: return Ity_I64; 359 case Ity_V128: return Ity_V128; 360 default: ppIRType(ty); 361 VG_(tool_panic)("memcheck:shadowTypeV"); 362 } 363} 364 365/* Produce a 'defined' value of the given shadow type. Should only be 366 supplied shadow types (Bit/I8/I16/I32/UI64). */ 367static IRExpr* definedOfType ( IRType ty ) { 368 switch (ty) { 369 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 370 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 371 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 372 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 373 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 374 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 375 default: VG_(tool_panic)("memcheck:definedOfType"); 376 } 377} 378 379 380/*------------------------------------------------------------*/ 381/*--- Constructing IR fragments ---*/ 382/*------------------------------------------------------------*/ 383 384/* add stmt to a bb */ 385static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) { 386 if (mce->trace) { 387 VG_(printf)(" %c: ", cat); 388 ppIRStmt(st); 389 VG_(printf)("\n"); 390 } 391 addStmtToIRSB(mce->sb, st); 392} 393 394/* assign value to tmp */ 395static inline 396void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) { 397 stmt(cat, mce, IRStmt_WrTmp(tmp,expr)); 398} 399 400/* build various kinds of expressions */ 401#define triop(_op, _arg1, _arg2, _arg3) \ 402 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3)) 403#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 404#define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 405#define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 406#define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 407#define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 408#define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 409#define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 410#define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) 411 412/* Bind the given expression to a new temporary, and return the 413 temporary. This effectively converts an arbitrary expression into 414 an atom. 415 416 'ty' is the type of 'e' and hence the type that the new temporary 417 needs to be. But passing it in is redundant, since we can deduce 418 the type merely by inspecting 'e'. So at least use that fact to 419 assert that the two types agree. */ 420static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) 421{ 422 TempKind k; 423 IRTemp t; 424 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e); 425 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */ 426 switch (cat) { 427 case 'V': k = VSh; break; 428 case 'B': k = BSh; break; 429 case 'C': k = Orig; break; 430 /* happens when we are making up new "orig" 431 expressions, for IRCAS handling */ 432 default: tl_assert(0); 433 } 434 t = newTemp(mce, ty, k); 435 assign(cat, mce, t, e); 436 return mkexpr(t); 437} 438 439 440/*------------------------------------------------------------*/ 441/*--- Constructing definedness primitive ops ---*/ 442/*------------------------------------------------------------*/ 443 444/* --------- Defined-if-either-defined --------- */ 445 446static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 447 tl_assert(isShadowAtom(mce,a1)); 448 tl_assert(isShadowAtom(mce,a2)); 449 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2)); 450} 451 452static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 453 tl_assert(isShadowAtom(mce,a1)); 454 tl_assert(isShadowAtom(mce,a2)); 455 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2)); 456} 457 458static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 459 tl_assert(isShadowAtom(mce,a1)); 460 tl_assert(isShadowAtom(mce,a2)); 461 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2)); 462} 463 464static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 465 tl_assert(isShadowAtom(mce,a1)); 466 tl_assert(isShadowAtom(mce,a2)); 467 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2)); 468} 469 470static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 471 tl_assert(isShadowAtom(mce,a1)); 472 tl_assert(isShadowAtom(mce,a2)); 473 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 474} 475 476/* --------- Undefined-if-either-undefined --------- */ 477 478static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 479 tl_assert(isShadowAtom(mce,a1)); 480 tl_assert(isShadowAtom(mce,a2)); 481 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2)); 482} 483 484static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 485 tl_assert(isShadowAtom(mce,a1)); 486 tl_assert(isShadowAtom(mce,a2)); 487 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2)); 488} 489 490static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 491 tl_assert(isShadowAtom(mce,a1)); 492 tl_assert(isShadowAtom(mce,a2)); 493 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2)); 494} 495 496static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 497 tl_assert(isShadowAtom(mce,a1)); 498 tl_assert(isShadowAtom(mce,a2)); 499 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2)); 500} 501 502static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 503 tl_assert(isShadowAtom(mce,a1)); 504 tl_assert(isShadowAtom(mce,a2)); 505 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 506} 507 508static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 509 switch (vty) { 510 case Ity_I8: return mkUifU8(mce, a1, a2); 511 case Ity_I16: return mkUifU16(mce, a1, a2); 512 case Ity_I32: return mkUifU32(mce, a1, a2); 513 case Ity_I64: return mkUifU64(mce, a1, a2); 514 case Ity_V128: return mkUifUV128(mce, a1, a2); 515 default: 516 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 517 VG_(tool_panic)("memcheck:mkUifU"); 518 } 519} 520 521/* --------- The Left-family of operations. --------- */ 522 523static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 524 tl_assert(isShadowAtom(mce,a1)); 525 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1)); 526} 527 528static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 529 tl_assert(isShadowAtom(mce,a1)); 530 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1)); 531} 532 533static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 534 tl_assert(isShadowAtom(mce,a1)); 535 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1)); 536} 537 538static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) { 539 tl_assert(isShadowAtom(mce,a1)); 540 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1)); 541} 542 543/* --------- 'Improvement' functions for AND/OR. --------- */ 544 545/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 546 defined (0); all other -> undefined (1). 547*/ 548static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 549{ 550 tl_assert(isOriginalAtom(mce, data)); 551 tl_assert(isShadowAtom(mce, vbits)); 552 tl_assert(sameKindedAtoms(data, vbits)); 553 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits)); 554} 555 556static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 557{ 558 tl_assert(isOriginalAtom(mce, data)); 559 tl_assert(isShadowAtom(mce, vbits)); 560 tl_assert(sameKindedAtoms(data, vbits)); 561 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits)); 562} 563 564static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 565{ 566 tl_assert(isOriginalAtom(mce, data)); 567 tl_assert(isShadowAtom(mce, vbits)); 568 tl_assert(sameKindedAtoms(data, vbits)); 569 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits)); 570} 571 572static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 573{ 574 tl_assert(isOriginalAtom(mce, data)); 575 tl_assert(isShadowAtom(mce, vbits)); 576 tl_assert(sameKindedAtoms(data, vbits)); 577 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits)); 578} 579 580static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 581{ 582 tl_assert(isOriginalAtom(mce, data)); 583 tl_assert(isShadowAtom(mce, vbits)); 584 tl_assert(sameKindedAtoms(data, vbits)); 585 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 586} 587 588/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 589 defined (0); all other -> undefined (1). 590*/ 591static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 592{ 593 tl_assert(isOriginalAtom(mce, data)); 594 tl_assert(isShadowAtom(mce, vbits)); 595 tl_assert(sameKindedAtoms(data, vbits)); 596 return assignNew( 597 'V', mce, Ity_I8, 598 binop(Iop_Or8, 599 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)), 600 vbits) ); 601} 602 603static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 604{ 605 tl_assert(isOriginalAtom(mce, data)); 606 tl_assert(isShadowAtom(mce, vbits)); 607 tl_assert(sameKindedAtoms(data, vbits)); 608 return assignNew( 609 'V', mce, Ity_I16, 610 binop(Iop_Or16, 611 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)), 612 vbits) ); 613} 614 615static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 616{ 617 tl_assert(isOriginalAtom(mce, data)); 618 tl_assert(isShadowAtom(mce, vbits)); 619 tl_assert(sameKindedAtoms(data, vbits)); 620 return assignNew( 621 'V', mce, Ity_I32, 622 binop(Iop_Or32, 623 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)), 624 vbits) ); 625} 626 627static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 628{ 629 tl_assert(isOriginalAtom(mce, data)); 630 tl_assert(isShadowAtom(mce, vbits)); 631 tl_assert(sameKindedAtoms(data, vbits)); 632 return assignNew( 633 'V', mce, Ity_I64, 634 binop(Iop_Or64, 635 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)), 636 vbits) ); 637} 638 639static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 640{ 641 tl_assert(isOriginalAtom(mce, data)); 642 tl_assert(isShadowAtom(mce, vbits)); 643 tl_assert(sameKindedAtoms(data, vbits)); 644 return assignNew( 645 'V', mce, Ity_V128, 646 binop(Iop_OrV128, 647 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)), 648 vbits) ); 649} 650 651/* --------- Pessimising casts. --------- */ 652 653static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 654{ 655 IRType src_ty; 656 IRAtom* tmp1; 657 /* Note, dst_ty is a shadow type, not an original type. */ 658 /* First of all, collapse vbits down to a single bit. */ 659 tl_assert(isShadowAtom(mce,vbits)); 660 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits); 661 662 /* Fast-track some common cases */ 663 if (src_ty == Ity_I32 && dst_ty == Ity_I32) 664 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 665 666 if (src_ty == Ity_I64 && dst_ty == Ity_I64) 667 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 668 669 if (src_ty == Ity_I32 && dst_ty == Ity_I64) { 670 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 671 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 672 } 673 674 /* Else do it the slow way .. */ 675 tmp1 = NULL; 676 switch (src_ty) { 677 case Ity_I1: 678 tmp1 = vbits; 679 break; 680 case Ity_I8: 681 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits)); 682 break; 683 case Ity_I16: 684 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits)); 685 break; 686 case Ity_I32: 687 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits)); 688 break; 689 case Ity_I64: 690 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits)); 691 break; 692 case Ity_I128: { 693 /* Gah. Chop it in half, OR the halves together, and compare 694 that with zero. */ 695 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits)); 696 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits)); 697 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 698 tmp1 = assignNew('V', mce, Ity_I1, 699 unop(Iop_CmpNEZ64, tmp4)); 700 break; 701 } 702 default: 703 ppIRType(src_ty); 704 VG_(tool_panic)("mkPCastTo(1)"); 705 } 706 tl_assert(tmp1); 707 /* Now widen up to the dst type. */ 708 switch (dst_ty) { 709 case Ity_I1: 710 return tmp1; 711 case Ity_I8: 712 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 713 case Ity_I16: 714 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 715 case Ity_I32: 716 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 717 case Ity_I64: 718 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 719 case Ity_V128: 720 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 721 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 722 return tmp1; 723 case Ity_I128: 724 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 725 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1)); 726 return tmp1; 727 default: 728 ppIRType(dst_ty); 729 VG_(tool_panic)("mkPCastTo(2)"); 730 } 731} 732 733/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */ 734/* 735 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and 736 PCasting to Ity_U1. However, sometimes it is necessary to be more 737 accurate. The insight is that the result is defined if two 738 corresponding bits can be found, one from each argument, so that 739 both bits are defined but are different -- that makes EQ say "No" 740 and NE say "Yes". Hence, we compute an improvement term and DifD 741 it onto the "normal" (UifU) result. 742 743 The result is: 744 745 PCastTo<1> ( 746 -- naive version 747 PCastTo<sz>( UifU<sz>(vxx, vyy) ) 748 749 `DifD<sz>` 750 751 -- improvement term 752 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) ) 753 ) 754 755 where 756 vec contains 0 (defined) bits where the corresponding arg bits 757 are defined but different, and 1 bits otherwise. 758 759 vec = Or<sz>( vxx, // 0 iff bit defined 760 vyy, // 0 iff bit defined 761 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different 762 ) 763 764 If any bit of vec is 0, the result is defined and so the 765 improvement term should produce 0...0, else it should produce 766 1...1. 767 768 Hence require for the improvement term: 769 770 if vec == 1...1 then 1...1 else 0...0 771 -> 772 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) 773 774 This was extensively re-analysed and checked on 6 July 05. 775*/ 776static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, 777 IRType ty, 778 IRAtom* vxx, IRAtom* vyy, 779 IRAtom* xx, IRAtom* yy ) 780{ 781 IRAtom *naive, *vec, *improvement_term; 782 IRAtom *improved, *final_cast, *top; 783 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR; 784 785 tl_assert(isShadowAtom(mce,vxx)); 786 tl_assert(isShadowAtom(mce,vyy)); 787 tl_assert(isOriginalAtom(mce,xx)); 788 tl_assert(isOriginalAtom(mce,yy)); 789 tl_assert(sameKindedAtoms(vxx,xx)); 790 tl_assert(sameKindedAtoms(vyy,yy)); 791 792 switch (ty) { 793 case Ity_I32: 794 opOR = Iop_Or32; 795 opDIFD = Iop_And32; 796 opUIFU = Iop_Or32; 797 opNOT = Iop_Not32; 798 opXOR = Iop_Xor32; 799 opCMP = Iop_CmpEQ32; 800 top = mkU32(0xFFFFFFFF); 801 break; 802 case Ity_I64: 803 opOR = Iop_Or64; 804 opDIFD = Iop_And64; 805 opUIFU = Iop_Or64; 806 opNOT = Iop_Not64; 807 opXOR = Iop_Xor64; 808 opCMP = Iop_CmpEQ64; 809 top = mkU64(0xFFFFFFFFFFFFFFFFULL); 810 break; 811 default: 812 VG_(tool_panic)("expensiveCmpEQorNE"); 813 } 814 815 naive 816 = mkPCastTo(mce,ty, 817 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy))); 818 819 vec 820 = assignNew( 821 'V', mce,ty, 822 binop( opOR, 823 assignNew('V', mce,ty, binop(opOR, vxx, vyy)), 824 assignNew( 825 'V', mce,ty, 826 unop( opNOT, 827 assignNew('V', mce,ty, binop(opXOR, xx, yy)))))); 828 829 improvement_term 830 = mkPCastTo( mce,ty, 831 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top))); 832 833 improved 834 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) ); 835 836 final_cast 837 = mkPCastTo( mce, Ity_I1, improved ); 838 839 return final_cast; 840} 841 842 843/* --------- Semi-accurate interpretation of CmpORD. --------- */ 844 845/* CmpORD32{S,U} does PowerPC-style 3-way comparisons: 846 847 CmpORD32S(x,y) = 1<<3 if x <s y 848 = 1<<2 if x >s y 849 = 1<<1 if x == y 850 851 and similarly the unsigned variant. The default interpretation is: 852 853 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#) 854 & (7<<1) 855 856 The "& (7<<1)" reflects the fact that all result bits except 3,2,1 857 are zero and therefore defined (viz, zero). 858 859 Also deal with a special case better: 860 861 CmpORD32S(x,0) 862 863 Here, bit 3 (LT) of the result is a copy of the top bit of x and 864 will be defined even if the rest of x isn't. In which case we do: 865 866 CmpORD32S#(x,x#,0,{impliedly 0}#) 867 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ# 868 | (x# >>u 31) << 3 -- LT# = x#[31] 869 870 Analogous handling for CmpORD64{S,U}. 871*/ 872static Bool isZeroU32 ( IRAtom* e ) 873{ 874 return 875 toBool( e->tag == Iex_Const 876 && e->Iex.Const.con->tag == Ico_U32 877 && e->Iex.Const.con->Ico.U32 == 0 ); 878} 879 880static Bool isZeroU64 ( IRAtom* e ) 881{ 882 return 883 toBool( e->tag == Iex_Const 884 && e->Iex.Const.con->tag == Ico_U64 885 && e->Iex.Const.con->Ico.U64 == 0 ); 886} 887 888static IRAtom* doCmpORD ( MCEnv* mce, 889 IROp cmp_op, 890 IRAtom* xxhash, IRAtom* yyhash, 891 IRAtom* xx, IRAtom* yy ) 892{ 893 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U; 894 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S; 895 IROp opOR = m64 ? Iop_Or64 : Iop_Or32; 896 IROp opAND = m64 ? Iop_And64 : Iop_And32; 897 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32; 898 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32; 899 IRType ty = m64 ? Ity_I64 : Ity_I32; 900 Int width = m64 ? 64 : 32; 901 902 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32; 903 904 IRAtom* threeLeft1 = NULL; 905 IRAtom* sevenLeft1 = NULL; 906 907 tl_assert(isShadowAtom(mce,xxhash)); 908 tl_assert(isShadowAtom(mce,yyhash)); 909 tl_assert(isOriginalAtom(mce,xx)); 910 tl_assert(isOriginalAtom(mce,yy)); 911 tl_assert(sameKindedAtoms(xxhash,xx)); 912 tl_assert(sameKindedAtoms(yyhash,yy)); 913 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U 914 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U); 915 916 if (0) { 917 ppIROp(cmp_op); VG_(printf)(" "); 918 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n"); 919 } 920 921 if (syned && isZero(yy)) { 922 /* fancy interpretation */ 923 /* if yy is zero, then it must be fully defined (zero#). */ 924 tl_assert(isZero(yyhash)); 925 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1); 926 return 927 binop( 928 opOR, 929 assignNew( 930 'V', mce,ty, 931 binop( 932 opAND, 933 mkPCastTo(mce,ty, xxhash), 934 threeLeft1 935 )), 936 assignNew( 937 'V', mce,ty, 938 binop( 939 opSHL, 940 assignNew( 941 'V', mce,ty, 942 binop(opSHR, xxhash, mkU8(width-1))), 943 mkU8(3) 944 )) 945 ); 946 } else { 947 /* standard interpretation */ 948 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1); 949 return 950 binop( 951 opAND, 952 mkPCastTo( mce,ty, 953 mkUifU(mce,ty, xxhash,yyhash)), 954 sevenLeft1 955 ); 956 } 957} 958 959 960/*------------------------------------------------------------*/ 961/*--- Emit a test and complaint if something is undefined. ---*/ 962/*------------------------------------------------------------*/ 963 964static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */ 965 966 967/* Set the annotations on a dirty helper to indicate that the stack 968 pointer and instruction pointers might be read. This is the 969 behaviour of all 'emit-a-complaint' style functions we might 970 call. */ 971 972static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 973 di->nFxState = 2; 974 di->fxState[0].fx = Ifx_Read; 975 di->fxState[0].offset = mce->layout->offset_SP; 976 di->fxState[0].size = mce->layout->sizeof_SP; 977 di->fxState[1].fx = Ifx_Read; 978 di->fxState[1].offset = mce->layout->offset_IP; 979 di->fxState[1].size = mce->layout->sizeof_IP; 980} 981 982 983/* Check the supplied **original** atom for undefinedness, and emit a 984 complaint if so. Once that happens, mark it as defined. This is 985 possible because the atom is either a tmp or literal. If it's a 986 tmp, it will be shadowed by a tmp, and so we can set the shadow to 987 be defined. In fact as mentioned above, we will have to allocate a 988 new tmp to carry the new 'defined' shadow value, and update the 989 original->tmp mapping accordingly; we cannot simply assign a new 990 value to an existing shadow tmp as this breaks SSAness -- resulting 991 in the post-instrumentation sanity checker spluttering in disapproval. 992*/ 993static void complainIfUndefined ( MCEnv* mce, IRAtom* atom ) 994{ 995 IRAtom* vatom; 996 IRType ty; 997 Int sz; 998 IRDirty* di; 999 IRAtom* cond; 1000 IRAtom* origin; 1001 void* fn; 1002 HChar* nm; 1003 IRExpr** args; 1004 Int nargs; 1005 1006 // Don't do V bit tests if we're not reporting undefined value errors. 1007 if (MC_(clo_mc_level) == 1) 1008 return; 1009 1010 /* Since the original expression is atomic, there's no duplicated 1011 work generated by making multiple V-expressions for it. So we 1012 don't really care about the possibility that someone else may 1013 also create a V-interpretion for it. */ 1014 tl_assert(isOriginalAtom(mce, atom)); 1015 vatom = expr2vbits( mce, atom ); 1016 tl_assert(isShadowAtom(mce, vatom)); 1017 tl_assert(sameKindedAtoms(atom, vatom)); 1018 1019 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1020 1021 /* sz is only used for constructing the error message */ 1022 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 1023 1024 cond = mkPCastTo( mce, Ity_I1, vatom ); 1025 /* cond will be 0 if all defined, and 1 if any not defined. */ 1026 1027 /* Get the origin info for the value we are about to check. At 1028 least, if we are doing origin tracking. If not, use a dummy 1029 zero origin. */ 1030 if (MC_(clo_mc_level) == 3) { 1031 origin = schemeE( mce, atom ); 1032 if (mce->hWordTy == Ity_I64) { 1033 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) ); 1034 } 1035 } else { 1036 origin = NULL; 1037 } 1038 1039 fn = NULL; 1040 nm = NULL; 1041 args = NULL; 1042 nargs = -1; 1043 1044 switch (sz) { 1045 case 0: 1046 if (origin) { 1047 fn = &MC_(helperc_value_check0_fail_w_o); 1048 nm = "MC_(helperc_value_check0_fail_w_o)"; 1049 args = mkIRExprVec_1(origin); 1050 nargs = 1; 1051 } else { 1052 fn = &MC_(helperc_value_check0_fail_no_o); 1053 nm = "MC_(helperc_value_check0_fail_no_o)"; 1054 args = mkIRExprVec_0(); 1055 nargs = 0; 1056 } 1057 break; 1058 case 1: 1059 if (origin) { 1060 fn = &MC_(helperc_value_check1_fail_w_o); 1061 nm = "MC_(helperc_value_check1_fail_w_o)"; 1062 args = mkIRExprVec_1(origin); 1063 nargs = 1; 1064 } else { 1065 fn = &MC_(helperc_value_check1_fail_no_o); 1066 nm = "MC_(helperc_value_check1_fail_no_o)"; 1067 args = mkIRExprVec_0(); 1068 nargs = 0; 1069 } 1070 break; 1071 case 4: 1072 if (origin) { 1073 fn = &MC_(helperc_value_check4_fail_w_o); 1074 nm = "MC_(helperc_value_check4_fail_w_o)"; 1075 args = mkIRExprVec_1(origin); 1076 nargs = 1; 1077 } else { 1078 fn = &MC_(helperc_value_check4_fail_no_o); 1079 nm = "MC_(helperc_value_check4_fail_no_o)"; 1080 args = mkIRExprVec_0(); 1081 nargs = 0; 1082 } 1083 break; 1084 case 8: 1085 if (origin) { 1086 fn = &MC_(helperc_value_check8_fail_w_o); 1087 nm = "MC_(helperc_value_check8_fail_w_o)"; 1088 args = mkIRExprVec_1(origin); 1089 nargs = 1; 1090 } else { 1091 fn = &MC_(helperc_value_check8_fail_no_o); 1092 nm = "MC_(helperc_value_check8_fail_no_o)"; 1093 args = mkIRExprVec_0(); 1094 nargs = 0; 1095 } 1096 break; 1097 case 2: 1098 case 16: 1099 if (origin) { 1100 fn = &MC_(helperc_value_checkN_fail_w_o); 1101 nm = "MC_(helperc_value_checkN_fail_w_o)"; 1102 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin); 1103 nargs = 2; 1104 } else { 1105 fn = &MC_(helperc_value_checkN_fail_no_o); 1106 nm = "MC_(helperc_value_checkN_fail_no_o)"; 1107 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) ); 1108 nargs = 1; 1109 } 1110 break; 1111 default: 1112 VG_(tool_panic)("unexpected szB"); 1113 } 1114 1115 tl_assert(fn); 1116 tl_assert(nm); 1117 tl_assert(args); 1118 tl_assert(nargs >= 0 && nargs <= 2); 1119 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL) 1120 || (MC_(clo_mc_level) == 2 && origin == NULL) ); 1121 1122 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm, 1123 VG_(fnptr_to_fnentry)( fn ), args ); 1124 di->guard = cond; 1125 setHelperAnns( mce, di ); 1126 stmt( 'V', mce, IRStmt_Dirty(di)); 1127 1128 /* Set the shadow tmp to be defined. First, update the 1129 orig->shadow tmp mapping to reflect the fact that this shadow is 1130 getting a new value. */ 1131 tl_assert(isIRAtom(vatom)); 1132 /* sameKindedAtoms ... */ 1133 if (vatom->tag == Iex_RdTmp) { 1134 tl_assert(atom->tag == Iex_RdTmp); 1135 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1136 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), 1137 definedOfType(ty)); 1138 } 1139} 1140 1141 1142/*------------------------------------------------------------*/ 1143/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 1144/*------------------------------------------------------------*/ 1145 1146/* Examine the always-defined sections declared in layout to see if 1147 the (offset,size) section is within one. Note, is is an error to 1148 partially fall into such a region: (offset,size) should either be 1149 completely in such a region or completely not-in such a region. 1150*/ 1151static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 1152{ 1153 Int minoffD, maxoffD, i; 1154 Int minoff = offset; 1155 Int maxoff = minoff + size - 1; 1156 tl_assert((minoff & ~0xFFFF) == 0); 1157 tl_assert((maxoff & ~0xFFFF) == 0); 1158 1159 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 1160 minoffD = mce->layout->alwaysDefd[i].offset; 1161 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 1162 tl_assert((minoffD & ~0xFFFF) == 0); 1163 tl_assert((maxoffD & ~0xFFFF) == 0); 1164 1165 if (maxoff < minoffD || maxoffD < minoff) 1166 continue; /* no overlap */ 1167 if (minoff >= minoffD && maxoff <= maxoffD) 1168 return True; /* completely contained in an always-defd section */ 1169 1170 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 1171 } 1172 return False; /* could not find any containing section */ 1173} 1174 1175 1176/* Generate into bb suitable actions to shadow this Put. If the state 1177 slice is marked 'always defined', do nothing. Otherwise, write the 1178 supplied V bits to the shadow state. We can pass in either an 1179 original atom or a V-atom, but not both. In the former case the 1180 relevant V-bits are then generated from the original. 1181*/ 1182static 1183void do_shadow_PUT ( MCEnv* mce, Int offset, 1184 IRAtom* atom, IRAtom* vatom ) 1185{ 1186 IRType ty; 1187 1188 // Don't do shadow PUTs if we're not doing undefined value checking. 1189 // Their absence lets Vex's optimiser remove all the shadow computation 1190 // that they depend on, which includes GETs of the shadow registers. 1191 if (MC_(clo_mc_level) == 1) 1192 return; 1193 1194 if (atom) { 1195 tl_assert(!vatom); 1196 tl_assert(isOriginalAtom(mce, atom)); 1197 vatom = expr2vbits( mce, atom ); 1198 } else { 1199 tl_assert(vatom); 1200 tl_assert(isShadowAtom(mce, vatom)); 1201 } 1202 1203 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1204 tl_assert(ty != Ity_I1); 1205 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1206 /* later: no ... */ 1207 /* emit code to emit a complaint if any of the vbits are 1. */ 1208 /* complainIfUndefined(mce, atom); */ 1209 } else { 1210 /* Do a plain shadow Put. */ 1211 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) ); 1212 } 1213} 1214 1215 1216/* Return an expression which contains the V bits corresponding to the 1217 given GETI (passed in in pieces). 1218*/ 1219static 1220void do_shadow_PUTI ( MCEnv* mce, 1221 IRRegArray* descr, 1222 IRAtom* ix, Int bias, IRAtom* atom ) 1223{ 1224 IRAtom* vatom; 1225 IRType ty, tyS; 1226 Int arrSize;; 1227 1228 // Don't do shadow PUTIs if we're not doing undefined value checking. 1229 // Their absence lets Vex's optimiser remove all the shadow computation 1230 // that they depend on, which includes GETIs of the shadow registers. 1231 if (MC_(clo_mc_level) == 1) 1232 return; 1233 1234 tl_assert(isOriginalAtom(mce,atom)); 1235 vatom = expr2vbits( mce, atom ); 1236 tl_assert(sameKindedAtoms(atom, vatom)); 1237 ty = descr->elemTy; 1238 tyS = shadowTypeV(ty); 1239 arrSize = descr->nElems * sizeofIRType(ty); 1240 tl_assert(ty != Ity_I1); 1241 tl_assert(isOriginalAtom(mce,ix)); 1242 complainIfUndefined(mce,ix); 1243 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1244 /* later: no ... */ 1245 /* emit code to emit a complaint if any of the vbits are 1. */ 1246 /* complainIfUndefined(mce, atom); */ 1247 } else { 1248 /* Do a cloned version of the Put that refers to the shadow 1249 area. */ 1250 IRRegArray* new_descr 1251 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1252 tyS, descr->nElems); 1253 stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom )); 1254 } 1255} 1256 1257 1258/* Return an expression which contains the V bits corresponding to the 1259 given GET (passed in in pieces). 1260*/ 1261static 1262IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 1263{ 1264 IRType tyS = shadowTypeV(ty); 1265 tl_assert(ty != Ity_I1); 1266 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1267 /* Always defined, return all zeroes of the relevant type */ 1268 return definedOfType(tyS); 1269 } else { 1270 /* return a cloned version of the Get that refers to the shadow 1271 area. */ 1272 /* FIXME: this isn't an atom! */ 1273 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 1274 } 1275} 1276 1277 1278/* Return an expression which contains the V bits corresponding to the 1279 given GETI (passed in in pieces). 1280*/ 1281static 1282IRExpr* shadow_GETI ( MCEnv* mce, 1283 IRRegArray* descr, IRAtom* ix, Int bias ) 1284{ 1285 IRType ty = descr->elemTy; 1286 IRType tyS = shadowTypeV(ty); 1287 Int arrSize = descr->nElems * sizeofIRType(ty); 1288 tl_assert(ty != Ity_I1); 1289 tl_assert(isOriginalAtom(mce,ix)); 1290 complainIfUndefined(mce,ix); 1291 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1292 /* Always defined, return all zeroes of the relevant type */ 1293 return definedOfType(tyS); 1294 } else { 1295 /* return a cloned version of the Get that refers to the shadow 1296 area. */ 1297 IRRegArray* new_descr 1298 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1299 tyS, descr->nElems); 1300 return IRExpr_GetI( new_descr, ix, bias ); 1301 } 1302} 1303 1304 1305/*------------------------------------------------------------*/ 1306/*--- Generating approximations for unknown operations, ---*/ 1307/*--- using lazy-propagate semantics ---*/ 1308/*------------------------------------------------------------*/ 1309 1310/* Lazy propagation of undefinedness from two values, resulting in the 1311 specified shadow type. 1312*/ 1313static 1314IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 1315{ 1316 IRAtom* at; 1317 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1318 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1319 tl_assert(isShadowAtom(mce,va1)); 1320 tl_assert(isShadowAtom(mce,va2)); 1321 1322 /* The general case is inefficient because PCast is an expensive 1323 operation. Here are some special cases which use PCast only 1324 once rather than twice. */ 1325 1326 /* I64 x I64 -> I64 */ 1327 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) { 1328 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n"); 1329 at = mkUifU(mce, Ity_I64, va1, va2); 1330 at = mkPCastTo(mce, Ity_I64, at); 1331 return at; 1332 } 1333 1334 /* I64 x I64 -> I32 */ 1335 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) { 1336 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n"); 1337 at = mkUifU(mce, Ity_I64, va1, va2); 1338 at = mkPCastTo(mce, Ity_I32, at); 1339 return at; 1340 } 1341 1342 if (0) { 1343 VG_(printf)("mkLazy2 "); 1344 ppIRType(t1); 1345 VG_(printf)("_"); 1346 ppIRType(t2); 1347 VG_(printf)("_"); 1348 ppIRType(finalVty); 1349 VG_(printf)("\n"); 1350 } 1351 1352 /* General case: force everything via 32-bit intermediaries. */ 1353 at = mkPCastTo(mce, Ity_I32, va1); 1354 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1355 at = mkPCastTo(mce, finalVty, at); 1356 return at; 1357} 1358 1359 1360/* 3-arg version of the above. */ 1361static 1362IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, 1363 IRAtom* va1, IRAtom* va2, IRAtom* va3 ) 1364{ 1365 IRAtom* at; 1366 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1367 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1368 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1369 tl_assert(isShadowAtom(mce,va1)); 1370 tl_assert(isShadowAtom(mce,va2)); 1371 tl_assert(isShadowAtom(mce,va3)); 1372 1373 /* The general case is inefficient because PCast is an expensive 1374 operation. Here are some special cases which use PCast only 1375 twice rather than three times. */ 1376 1377 /* I32 x I64 x I64 -> I64 */ 1378 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1379 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1380 && finalVty == Ity_I64) { 1381 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n"); 1382 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1383 mode indication which is fully defined, this should get 1384 folded out later. */ 1385 at = mkPCastTo(mce, Ity_I64, va1); 1386 /* Now fold in 2nd and 3rd args. */ 1387 at = mkUifU(mce, Ity_I64, at, va2); 1388 at = mkUifU(mce, Ity_I64, at, va3); 1389 /* and PCast once again. */ 1390 at = mkPCastTo(mce, Ity_I64, at); 1391 return at; 1392 } 1393 1394 /* I32 x I64 x I64 -> I32 */ 1395 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1396 && finalVty == Ity_I32) { 1397 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n"); 1398 at = mkPCastTo(mce, Ity_I64, va1); 1399 at = mkUifU(mce, Ity_I64, at, va2); 1400 at = mkUifU(mce, Ity_I64, at, va3); 1401 at = mkPCastTo(mce, Ity_I32, at); 1402 return at; 1403 } 1404 1405 /* I32 x I32 x I32 -> I32 */ 1406 /* 32-bit FP idiom, as (eg) happens on ARM */ 1407 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 1408 && finalVty == Ity_I32) { 1409 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n"); 1410 at = va1; 1411 at = mkUifU(mce, Ity_I32, at, va2); 1412 at = mkUifU(mce, Ity_I32, at, va3); 1413 at = mkPCastTo(mce, Ity_I32, at); 1414 return at; 1415 } 1416 1417 if (1) { 1418 VG_(printf)("mkLazy3: "); 1419 ppIRType(t1); 1420 VG_(printf)(" x "); 1421 ppIRType(t2); 1422 VG_(printf)(" x "); 1423 ppIRType(t3); 1424 VG_(printf)(" -> "); 1425 ppIRType(finalVty); 1426 VG_(printf)("\n"); 1427 } 1428 1429 tl_assert(0); 1430 /* General case: force everything via 32-bit intermediaries. */ 1431 /* 1432 at = mkPCastTo(mce, Ity_I32, va1); 1433 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1434 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3)); 1435 at = mkPCastTo(mce, finalVty, at); 1436 return at; 1437 */ 1438} 1439 1440 1441/* 4-arg version of the above. */ 1442static 1443IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty, 1444 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 ) 1445{ 1446 IRAtom* at; 1447 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1448 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1449 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1450 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4); 1451 tl_assert(isShadowAtom(mce,va1)); 1452 tl_assert(isShadowAtom(mce,va2)); 1453 tl_assert(isShadowAtom(mce,va3)); 1454 tl_assert(isShadowAtom(mce,va4)); 1455 1456 /* The general case is inefficient because PCast is an expensive 1457 operation. Here are some special cases which use PCast only 1458 twice rather than three times. */ 1459 1460 /* I32 x I64 x I64 x I64 -> I64 */ 1461 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1462 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64 1463 && finalVty == Ity_I64) { 1464 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n"); 1465 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1466 mode indication which is fully defined, this should get 1467 folded out later. */ 1468 at = mkPCastTo(mce, Ity_I64, va1); 1469 /* Now fold in 2nd, 3rd, 4th args. */ 1470 at = mkUifU(mce, Ity_I64, at, va2); 1471 at = mkUifU(mce, Ity_I64, at, va3); 1472 at = mkUifU(mce, Ity_I64, at, va4); 1473 /* and PCast once again. */ 1474 at = mkPCastTo(mce, Ity_I64, at); 1475 return at; 1476 } 1477 1478 if (1) { 1479 VG_(printf)("mkLazy4: "); 1480 ppIRType(t1); 1481 VG_(printf)(" x "); 1482 ppIRType(t2); 1483 VG_(printf)(" x "); 1484 ppIRType(t3); 1485 VG_(printf)(" x "); 1486 ppIRType(t4); 1487 VG_(printf)(" -> "); 1488 ppIRType(finalVty); 1489 VG_(printf)("\n"); 1490 } 1491 1492 tl_assert(0); 1493} 1494 1495 1496/* Do the lazy propagation game from a null-terminated vector of 1497 atoms. This is presumably the arguments to a helper call, so the 1498 IRCallee info is also supplied in order that we can know which 1499 arguments should be ignored (via the .mcx_mask field). 1500*/ 1501static 1502IRAtom* mkLazyN ( MCEnv* mce, 1503 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 1504{ 1505 Int i; 1506 IRAtom* here; 1507 IRAtom* curr; 1508 IRType mergeTy; 1509 IRType mergeTy64 = True; 1510 1511 /* Decide on the type of the merge intermediary. If all relevant 1512 args are I64, then it's I64. In all other circumstances, use 1513 I32. */ 1514 for (i = 0; exprvec[i]; i++) { 1515 tl_assert(i < 32); 1516 tl_assert(isOriginalAtom(mce, exprvec[i])); 1517 if (cee->mcx_mask & (1<<i)) 1518 continue; 1519 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64) 1520 mergeTy64 = False; 1521 } 1522 1523 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32; 1524 curr = definedOfType(mergeTy); 1525 1526 for (i = 0; exprvec[i]; i++) { 1527 tl_assert(i < 32); 1528 tl_assert(isOriginalAtom(mce, exprvec[i])); 1529 /* Only take notice of this arg if the callee's mc-exclusion 1530 mask does not say it is to be excluded. */ 1531 if (cee->mcx_mask & (1<<i)) { 1532 /* the arg is to be excluded from definedness checking. Do 1533 nothing. */ 1534 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 1535 } else { 1536 /* calculate the arg's definedness, and pessimistically merge 1537 it in. */ 1538 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) ); 1539 curr = mergeTy64 1540 ? mkUifU64(mce, here, curr) 1541 : mkUifU32(mce, here, curr); 1542 } 1543 } 1544 return mkPCastTo(mce, finalVtype, curr ); 1545} 1546 1547 1548/*------------------------------------------------------------*/ 1549/*--- Generating expensive sequences for exact carry-chain ---*/ 1550/*--- propagation in add/sub and related operations. ---*/ 1551/*------------------------------------------------------------*/ 1552 1553static 1554IRAtom* expensiveAddSub ( MCEnv* mce, 1555 Bool add, 1556 IRType ty, 1557 IRAtom* qaa, IRAtom* qbb, 1558 IRAtom* aa, IRAtom* bb ) 1559{ 1560 IRAtom *a_min, *b_min, *a_max, *b_max; 1561 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB; 1562 1563 tl_assert(isShadowAtom(mce,qaa)); 1564 tl_assert(isShadowAtom(mce,qbb)); 1565 tl_assert(isOriginalAtom(mce,aa)); 1566 tl_assert(isOriginalAtom(mce,bb)); 1567 tl_assert(sameKindedAtoms(qaa,aa)); 1568 tl_assert(sameKindedAtoms(qbb,bb)); 1569 1570 switch (ty) { 1571 case Ity_I32: 1572 opAND = Iop_And32; 1573 opOR = Iop_Or32; 1574 opXOR = Iop_Xor32; 1575 opNOT = Iop_Not32; 1576 opADD = Iop_Add32; 1577 opSUB = Iop_Sub32; 1578 break; 1579 case Ity_I64: 1580 opAND = Iop_And64; 1581 opOR = Iop_Or64; 1582 opXOR = Iop_Xor64; 1583 opNOT = Iop_Not64; 1584 opADD = Iop_Add64; 1585 opSUB = Iop_Sub64; 1586 break; 1587 default: 1588 VG_(tool_panic)("expensiveAddSub"); 1589 } 1590 1591 // a_min = aa & ~qaa 1592 a_min = assignNew('V', mce,ty, 1593 binop(opAND, aa, 1594 assignNew('V', mce,ty, unop(opNOT, qaa)))); 1595 1596 // b_min = bb & ~qbb 1597 b_min = assignNew('V', mce,ty, 1598 binop(opAND, bb, 1599 assignNew('V', mce,ty, unop(opNOT, qbb)))); 1600 1601 // a_max = aa | qaa 1602 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa)); 1603 1604 // b_max = bb | qbb 1605 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb)); 1606 1607 if (add) { 1608 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1609 return 1610 assignNew('V', mce,ty, 1611 binop( opOR, 1612 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1613 assignNew('V', mce,ty, 1614 binop( opXOR, 1615 assignNew('V', mce,ty, binop(opADD, a_min, b_min)), 1616 assignNew('V', mce,ty, binop(opADD, a_max, b_max)) 1617 ) 1618 ) 1619 ) 1620 ); 1621 } else { 1622 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min)) 1623 return 1624 assignNew('V', mce,ty, 1625 binop( opOR, 1626 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1627 assignNew('V', mce,ty, 1628 binop( opXOR, 1629 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)), 1630 assignNew('V', mce,ty, binop(opSUB, a_max, b_min)) 1631 ) 1632 ) 1633 ) 1634 ); 1635 } 1636 1637} 1638 1639 1640/*------------------------------------------------------------*/ 1641/*--- Scalar shifts. ---*/ 1642/*------------------------------------------------------------*/ 1643 1644/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic 1645 idea is to shift the definedness bits by the original shift amount. 1646 This introduces 0s ("defined") in new positions for left shifts and 1647 unsigned right shifts, and copies the top definedness bit for 1648 signed right shifts. So, conveniently, applying the original shift 1649 operator to the definedness bits for the left arg is exactly the 1650 right thing to do: 1651 1652 (qaa << bb) 1653 1654 However if the shift amount is undefined then the whole result 1655 is undefined. Hence need: 1656 1657 (qaa << bb) `UifU` PCast(qbb) 1658 1659 If the shift amount bb is a literal than qbb will say 'all defined' 1660 and the UifU and PCast will get folded out by post-instrumentation 1661 optimisation. 1662*/ 1663static IRAtom* scalarShift ( MCEnv* mce, 1664 IRType ty, 1665 IROp original_op, 1666 IRAtom* qaa, IRAtom* qbb, 1667 IRAtom* aa, IRAtom* bb ) 1668{ 1669 tl_assert(isShadowAtom(mce,qaa)); 1670 tl_assert(isShadowAtom(mce,qbb)); 1671 tl_assert(isOriginalAtom(mce,aa)); 1672 tl_assert(isOriginalAtom(mce,bb)); 1673 tl_assert(sameKindedAtoms(qaa,aa)); 1674 tl_assert(sameKindedAtoms(qbb,bb)); 1675 return 1676 assignNew( 1677 'V', mce, ty, 1678 mkUifU( mce, ty, 1679 assignNew('V', mce, ty, binop(original_op, qaa, bb)), 1680 mkPCastTo(mce, ty, qbb) 1681 ) 1682 ); 1683} 1684 1685 1686/*------------------------------------------------------------*/ 1687/*--- Helpers for dealing with vector primops. ---*/ 1688/*------------------------------------------------------------*/ 1689 1690/* Vector pessimisation -- pessimise within each lane individually. */ 1691 1692static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 1693{ 1694 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 1695} 1696 1697static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 1698{ 1699 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 1700} 1701 1702static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 1703{ 1704 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 1705} 1706 1707static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 1708{ 1709 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 1710} 1711 1712static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at ) 1713{ 1714 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at)); 1715} 1716 1717static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at ) 1718{ 1719 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at)); 1720} 1721 1722static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at ) 1723{ 1724 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at)); 1725} 1726 1727static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at ) 1728{ 1729 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at)); 1730} 1731 1732static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at ) 1733{ 1734 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at)); 1735} 1736 1737 1738/* Here's a simple scheme capable of handling ops derived from SSE1 1739 code and while only generating ops that can be efficiently 1740 implemented in SSE1. */ 1741 1742/* All-lanes versions are straightforward: 1743 1744 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 1745 1746 unary32Fx4(x,y) ==> PCast32x4(x#) 1747 1748 Lowest-lane-only versions are more complex: 1749 1750 binary32F0x4(x,y) ==> SetV128lo32( 1751 x#, 1752 PCast32(V128to32(UifUV128(x#,y#))) 1753 ) 1754 1755 This is perhaps not so obvious. In particular, it's faster to 1756 do a V128-bit UifU and then take the bottom 32 bits than the more 1757 obvious scheme of taking the bottom 32 bits of each operand 1758 and doing a 32-bit UifU. Basically since UifU is fast and 1759 chopping lanes off vector values is slow. 1760 1761 Finally: 1762 1763 unary32F0x4(x) ==> SetV128lo32( 1764 x#, 1765 PCast32(V128to32(x#)) 1766 ) 1767 1768 Where: 1769 1770 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 1771 PCast32x4(v#) = CmpNEZ32x4(v#) 1772*/ 1773 1774static 1775IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1776{ 1777 IRAtom* at; 1778 tl_assert(isShadowAtom(mce, vatomX)); 1779 tl_assert(isShadowAtom(mce, vatomY)); 1780 at = mkUifUV128(mce, vatomX, vatomY); 1781 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at)); 1782 return at; 1783} 1784 1785static 1786IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 1787{ 1788 IRAtom* at; 1789 tl_assert(isShadowAtom(mce, vatomX)); 1790 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX)); 1791 return at; 1792} 1793 1794static 1795IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1796{ 1797 IRAtom* at; 1798 tl_assert(isShadowAtom(mce, vatomX)); 1799 tl_assert(isShadowAtom(mce, vatomY)); 1800 at = mkUifUV128(mce, vatomX, vatomY); 1801 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at)); 1802 at = mkPCastTo(mce, Ity_I32, at); 1803 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1804 return at; 1805} 1806 1807static 1808IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 1809{ 1810 IRAtom* at; 1811 tl_assert(isShadowAtom(mce, vatomX)); 1812 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX)); 1813 at = mkPCastTo(mce, Ity_I32, at); 1814 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1815 return at; 1816} 1817 1818/* --- ... and ... 64Fx2 versions of the same ... --- */ 1819 1820static 1821IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1822{ 1823 IRAtom* at; 1824 tl_assert(isShadowAtom(mce, vatomX)); 1825 tl_assert(isShadowAtom(mce, vatomY)); 1826 at = mkUifUV128(mce, vatomX, vatomY); 1827 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at)); 1828 return at; 1829} 1830 1831static 1832IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 1833{ 1834 IRAtom* at; 1835 tl_assert(isShadowAtom(mce, vatomX)); 1836 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX)); 1837 return at; 1838} 1839 1840static 1841IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1842{ 1843 IRAtom* at; 1844 tl_assert(isShadowAtom(mce, vatomX)); 1845 tl_assert(isShadowAtom(mce, vatomY)); 1846 at = mkUifUV128(mce, vatomX, vatomY); 1847 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at)); 1848 at = mkPCastTo(mce, Ity_I64, at); 1849 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 1850 return at; 1851} 1852 1853static 1854IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 1855{ 1856 IRAtom* at; 1857 tl_assert(isShadowAtom(mce, vatomX)); 1858 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX)); 1859 at = mkPCastTo(mce, Ity_I64, at); 1860 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 1861 return at; 1862} 1863 1864/* --- --- ... and ... 32Fx2 versions of the same --- --- */ 1865 1866static 1867IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1868{ 1869 IRAtom* at; 1870 tl_assert(isShadowAtom(mce, vatomX)); 1871 tl_assert(isShadowAtom(mce, vatomY)); 1872 at = mkUifU64(mce, vatomX, vatomY); 1873 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at)); 1874 return at; 1875} 1876 1877static 1878IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX ) 1879{ 1880 IRAtom* at; 1881 tl_assert(isShadowAtom(mce, vatomX)); 1882 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX)); 1883 return at; 1884} 1885 1886/* --- --- Vector saturated narrowing --- --- */ 1887 1888/* This is quite subtle. What to do is simple: 1889 1890 Let the original narrowing op be QNarrowW{S,U}xN. Produce: 1891 1892 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2)) 1893 1894 Why this is right is not so simple. Consider a lane in the args, 1895 vatom1 or 2, doesn't matter. 1896 1897 After the PCast, that lane is all 0s (defined) or all 1898 1s(undefined). 1899 1900 Both signed and unsigned saturating narrowing of all 0s produces 1901 all 0s, which is what we want. 1902 1903 The all-1s case is more complex. Unsigned narrowing interprets an 1904 all-1s input as the largest unsigned integer, and so produces all 1905 1s as a result since that is the largest unsigned value at the 1906 smaller width. 1907 1908 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows 1909 to -1, so we still wind up with all 1s at the smaller width. 1910 1911 So: In short, pessimise the args, then apply the original narrowing 1912 op. 1913*/ 1914static 1915IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op, 1916 IRAtom* vatom1, IRAtom* vatom2) 1917{ 1918 IRAtom *at1, *at2, *at3; 1919 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 1920 switch (narrow_op) { 1921 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break; 1922 case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break; 1923 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break; 1924 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break; 1925 default: VG_(tool_panic)("vectorNarrowV128"); 1926 } 1927 tl_assert(isShadowAtom(mce,vatom1)); 1928 tl_assert(isShadowAtom(mce,vatom2)); 1929 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 1930 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2)); 1931 at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2)); 1932 return at3; 1933} 1934 1935static 1936IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op, 1937 IRAtom* vatom1, IRAtom* vatom2) 1938{ 1939 IRAtom *at1, *at2, *at3; 1940 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 1941 switch (narrow_op) { 1942 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break; 1943 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break; 1944 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break; 1945 default: VG_(tool_panic)("vectorNarrow64"); 1946 } 1947 tl_assert(isShadowAtom(mce,vatom1)); 1948 tl_assert(isShadowAtom(mce,vatom2)); 1949 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1)); 1950 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2)); 1951 at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2)); 1952 return at3; 1953} 1954 1955static 1956IRAtom* vectorShortenV128 ( MCEnv* mce, IROp shorten_op, 1957 IRAtom* vatom1) 1958{ 1959 IRAtom *at1, *at2; 1960 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 1961 switch (shorten_op) { 1962 case Iop_Shorten16x8: pcast = mkPCast16x8; break; 1963 case Iop_Shorten32x4: pcast = mkPCast32x4; break; 1964 case Iop_Shorten64x2: pcast = mkPCast64x2; break; 1965 case Iop_QShortenS16Sx8: pcast = mkPCast16x8; break; 1966 case Iop_QShortenU16Sx8: pcast = mkPCast16x8; break; 1967 case Iop_QShortenU16Ux8: pcast = mkPCast16x8; break; 1968 case Iop_QShortenS32Sx4: pcast = mkPCast32x4; break; 1969 case Iop_QShortenU32Sx4: pcast = mkPCast32x4; break; 1970 case Iop_QShortenU32Ux4: pcast = mkPCast32x4; break; 1971 case Iop_QShortenS64Sx2: pcast = mkPCast64x2; break; 1972 case Iop_QShortenU64Sx2: pcast = mkPCast64x2; break; 1973 case Iop_QShortenU64Ux2: pcast = mkPCast64x2; break; 1974 default: VG_(tool_panic)("vectorShortenV128"); 1975 } 1976 tl_assert(isShadowAtom(mce,vatom1)); 1977 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 1978 at2 = assignNew('V', mce, Ity_I64, unop(shorten_op, at1)); 1979 return at2; 1980} 1981 1982static 1983IRAtom* vectorLongenI64 ( MCEnv* mce, IROp longen_op, 1984 IRAtom* vatom1) 1985{ 1986 IRAtom *at1, *at2; 1987 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 1988 switch (longen_op) { 1989 case Iop_Longen8Ux8: pcast = mkPCast16x8; break; 1990 case Iop_Longen8Sx8: pcast = mkPCast16x8; break; 1991 case Iop_Longen16Ux4: pcast = mkPCast32x4; break; 1992 case Iop_Longen16Sx4: pcast = mkPCast32x4; break; 1993 case Iop_Longen32Ux2: pcast = mkPCast64x2; break; 1994 case Iop_Longen32Sx2: pcast = mkPCast64x2; break; 1995 default: VG_(tool_panic)("vectorLongenI64"); 1996 } 1997 tl_assert(isShadowAtom(mce,vatom1)); 1998 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1)); 1999 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1)); 2000 return at2; 2001} 2002 2003 2004/* --- --- Vector integer arithmetic --- --- */ 2005 2006/* Simple ... UifU the args and per-lane pessimise the results. */ 2007 2008/* --- V128-bit versions --- */ 2009 2010static 2011IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2012{ 2013 IRAtom* at; 2014 at = mkUifUV128(mce, vatom1, vatom2); 2015 at = mkPCast8x16(mce, at); 2016 return at; 2017} 2018 2019static 2020IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2021{ 2022 IRAtom* at; 2023 at = mkUifUV128(mce, vatom1, vatom2); 2024 at = mkPCast16x8(mce, at); 2025 return at; 2026} 2027 2028static 2029IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2030{ 2031 IRAtom* at; 2032 at = mkUifUV128(mce, vatom1, vatom2); 2033 at = mkPCast32x4(mce, at); 2034 return at; 2035} 2036 2037static 2038IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2039{ 2040 IRAtom* at; 2041 at = mkUifUV128(mce, vatom1, vatom2); 2042 at = mkPCast64x2(mce, at); 2043 return at; 2044} 2045 2046/* --- 64-bit versions --- */ 2047 2048static 2049IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2050{ 2051 IRAtom* at; 2052 at = mkUifU64(mce, vatom1, vatom2); 2053 at = mkPCast8x8(mce, at); 2054 return at; 2055} 2056 2057static 2058IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2059{ 2060 IRAtom* at; 2061 at = mkUifU64(mce, vatom1, vatom2); 2062 at = mkPCast16x4(mce, at); 2063 return at; 2064} 2065 2066static 2067IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2068{ 2069 IRAtom* at; 2070 at = mkUifU64(mce, vatom1, vatom2); 2071 at = mkPCast32x2(mce, at); 2072 return at; 2073} 2074 2075static 2076IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2077{ 2078 IRAtom* at; 2079 at = mkUifU64(mce, vatom1, vatom2); 2080 at = mkPCastTo(mce, Ity_I64, at); 2081 return at; 2082} 2083 2084/* --- 32-bit versions --- */ 2085 2086static 2087IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2088{ 2089 IRAtom* at; 2090 at = mkUifU32(mce, vatom1, vatom2); 2091 at = mkPCast8x4(mce, at); 2092 return at; 2093} 2094 2095static 2096IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2097{ 2098 IRAtom* at; 2099 at = mkUifU32(mce, vatom1, vatom2); 2100 at = mkPCast16x2(mce, at); 2101 return at; 2102} 2103 2104 2105/*------------------------------------------------------------*/ 2106/*--- Generate shadow values from all kinds of IRExprs. ---*/ 2107/*------------------------------------------------------------*/ 2108 2109static 2110IRAtom* expr2vbits_Qop ( MCEnv* mce, 2111 IROp op, 2112 IRAtom* atom1, IRAtom* atom2, 2113 IRAtom* atom3, IRAtom* atom4 ) 2114{ 2115 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2116 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2117 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2118 IRAtom* vatom4 = expr2vbits( mce, atom4 ); 2119 2120 tl_assert(isOriginalAtom(mce,atom1)); 2121 tl_assert(isOriginalAtom(mce,atom2)); 2122 tl_assert(isOriginalAtom(mce,atom3)); 2123 tl_assert(isOriginalAtom(mce,atom4)); 2124 tl_assert(isShadowAtom(mce,vatom1)); 2125 tl_assert(isShadowAtom(mce,vatom2)); 2126 tl_assert(isShadowAtom(mce,vatom3)); 2127 tl_assert(isShadowAtom(mce,vatom4)); 2128 tl_assert(sameKindedAtoms(atom1,vatom1)); 2129 tl_assert(sameKindedAtoms(atom2,vatom2)); 2130 tl_assert(sameKindedAtoms(atom3,vatom3)); 2131 tl_assert(sameKindedAtoms(atom4,vatom4)); 2132 switch (op) { 2133 case Iop_MAddF64: 2134 case Iop_MAddF64r32: 2135 case Iop_MSubF64: 2136 case Iop_MSubF64r32: 2137 /* I32(rm) x F64 x F64 x F64 -> F64 */ 2138 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4); 2139 default: 2140 ppIROp(op); 2141 VG_(tool_panic)("memcheck:expr2vbits_Qop"); 2142 } 2143} 2144 2145 2146static 2147IRAtom* expr2vbits_Triop ( MCEnv* mce, 2148 IROp op, 2149 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 ) 2150{ 2151 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2152 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2153 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2154 2155 tl_assert(isOriginalAtom(mce,atom1)); 2156 tl_assert(isOriginalAtom(mce,atom2)); 2157 tl_assert(isOriginalAtom(mce,atom3)); 2158 tl_assert(isShadowAtom(mce,vatom1)); 2159 tl_assert(isShadowAtom(mce,vatom2)); 2160 tl_assert(isShadowAtom(mce,vatom3)); 2161 tl_assert(sameKindedAtoms(atom1,vatom1)); 2162 tl_assert(sameKindedAtoms(atom2,vatom2)); 2163 tl_assert(sameKindedAtoms(atom3,vatom3)); 2164 switch (op) { 2165 case Iop_AddF64: 2166 case Iop_AddF64r32: 2167 case Iop_SubF64: 2168 case Iop_SubF64r32: 2169 case Iop_MulF64: 2170 case Iop_MulF64r32: 2171 case Iop_DivF64: 2172 case Iop_DivF64r32: 2173 case Iop_ScaleF64: 2174 case Iop_Yl2xF64: 2175 case Iop_Yl2xp1F64: 2176 case Iop_AtanF64: 2177 case Iop_PRemF64: 2178 case Iop_PRem1F64: 2179 /* I32(rm) x F64 x F64 -> F64 */ 2180 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2181 case Iop_PRemC3210F64: 2182 case Iop_PRem1C3210F64: 2183 /* I32(rm) x F64 x F64 -> I32 */ 2184 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2185 case Iop_AddF32: 2186 case Iop_SubF32: 2187 case Iop_MulF32: 2188 case Iop_DivF32: 2189 /* I32(rm) x F32 x F32 -> I32 */ 2190 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2191 case Iop_ExtractV128: 2192 complainIfUndefined(mce, atom3); 2193 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2194 case Iop_Extract64: 2195 complainIfUndefined(mce, atom3); 2196 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3)); 2197 case Iop_SetElem8x8: 2198 case Iop_SetElem16x4: 2199 case Iop_SetElem32x2: 2200 complainIfUndefined(mce, atom2); 2201 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3)); 2202 default: 2203 ppIROp(op); 2204 VG_(tool_panic)("memcheck:expr2vbits_Triop"); 2205 } 2206} 2207 2208 2209static 2210IRAtom* expr2vbits_Binop ( MCEnv* mce, 2211 IROp op, 2212 IRAtom* atom1, IRAtom* atom2 ) 2213{ 2214 IRType and_or_ty; 2215 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 2216 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 2217 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 2218 2219 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2220 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2221 2222 tl_assert(isOriginalAtom(mce,atom1)); 2223 tl_assert(isOriginalAtom(mce,atom2)); 2224 tl_assert(isShadowAtom(mce,vatom1)); 2225 tl_assert(isShadowAtom(mce,vatom2)); 2226 tl_assert(sameKindedAtoms(atom1,vatom1)); 2227 tl_assert(sameKindedAtoms(atom2,vatom2)); 2228 switch (op) { 2229 2230 /* 32-bit SIMD */ 2231 2232 case Iop_Add16x2: 2233 case Iop_HAdd16Ux2: 2234 case Iop_HAdd16Sx2: 2235 case Iop_Sub16x2: 2236 case Iop_HSub16Ux2: 2237 case Iop_HSub16Sx2: 2238 case Iop_QAdd16Sx2: 2239 case Iop_QSub16Sx2: 2240 return binary16Ix2(mce, vatom1, vatom2); 2241 2242 case Iop_Add8x4: 2243 case Iop_HAdd8Ux4: 2244 case Iop_HAdd8Sx4: 2245 case Iop_Sub8x4: 2246 case Iop_HSub8Ux4: 2247 case Iop_HSub8Sx4: 2248 case Iop_QSub8Ux4: 2249 case Iop_QAdd8Ux4: 2250 case Iop_QSub8Sx4: 2251 case Iop_QAdd8Sx4: 2252 return binary8Ix4(mce, vatom1, vatom2); 2253 2254 /* 64-bit SIMD */ 2255 2256 case Iop_ShrN8x8: 2257 case Iop_ShrN16x4: 2258 case Iop_ShrN32x2: 2259 case Iop_SarN8x8: 2260 case Iop_SarN16x4: 2261 case Iop_SarN32x2: 2262 case Iop_ShlN16x4: 2263 case Iop_ShlN32x2: 2264 case Iop_ShlN8x8: 2265 /* Same scheme as with all other shifts. */ 2266 complainIfUndefined(mce, atom2); 2267 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 2268 2269 case Iop_QNarrow32Sx2: 2270 case Iop_QNarrow16Sx4: 2271 case Iop_QNarrow16Ux4: 2272 return vectorNarrow64(mce, op, vatom1, vatom2); 2273 2274 case Iop_Min8Ux8: 2275 case Iop_Min8Sx8: 2276 case Iop_Max8Ux8: 2277 case Iop_Max8Sx8: 2278 case Iop_Avg8Ux8: 2279 case Iop_QSub8Sx8: 2280 case Iop_QSub8Ux8: 2281 case Iop_Sub8x8: 2282 case Iop_CmpGT8Sx8: 2283 case Iop_CmpGT8Ux8: 2284 case Iop_CmpEQ8x8: 2285 case Iop_QAdd8Sx8: 2286 case Iop_QAdd8Ux8: 2287 case Iop_QSal8x8: 2288 case Iop_QShl8x8: 2289 case Iop_Add8x8: 2290 case Iop_Mul8x8: 2291 case Iop_PolynomialMul8x8: 2292 return binary8Ix8(mce, vatom1, vatom2); 2293 2294 case Iop_Min16Sx4: 2295 case Iop_Min16Ux4: 2296 case Iop_Max16Sx4: 2297 case Iop_Max16Ux4: 2298 case Iop_Avg16Ux4: 2299 case Iop_QSub16Ux4: 2300 case Iop_QSub16Sx4: 2301 case Iop_Sub16x4: 2302 case Iop_Mul16x4: 2303 case Iop_MulHi16Sx4: 2304 case Iop_MulHi16Ux4: 2305 case Iop_CmpGT16Sx4: 2306 case Iop_CmpGT16Ux4: 2307 case Iop_CmpEQ16x4: 2308 case Iop_QAdd16Sx4: 2309 case Iop_QAdd16Ux4: 2310 case Iop_QSal16x4: 2311 case Iop_QShl16x4: 2312 case Iop_Add16x4: 2313 case Iop_QDMulHi16Sx4: 2314 case Iop_QRDMulHi16Sx4: 2315 return binary16Ix4(mce, vatom1, vatom2); 2316 2317 case Iop_Sub32x2: 2318 case Iop_Mul32x2: 2319 case Iop_Max32Sx2: 2320 case Iop_Max32Ux2: 2321 case Iop_Min32Sx2: 2322 case Iop_Min32Ux2: 2323 case Iop_CmpGT32Sx2: 2324 case Iop_CmpGT32Ux2: 2325 case Iop_CmpEQ32x2: 2326 case Iop_Add32x2: 2327 case Iop_QAdd32Ux2: 2328 case Iop_QAdd32Sx2: 2329 case Iop_QSub32Ux2: 2330 case Iop_QSub32Sx2: 2331 case Iop_QSal32x2: 2332 case Iop_QShl32x2: 2333 case Iop_QDMulHi32Sx2: 2334 case Iop_QRDMulHi32Sx2: 2335 return binary32Ix2(mce, vatom1, vatom2); 2336 2337 case Iop_QSub64Ux1: 2338 case Iop_QSub64Sx1: 2339 case Iop_QAdd64Ux1: 2340 case Iop_QAdd64Sx1: 2341 case Iop_QSal64x1: 2342 case Iop_QShl64x1: 2343 case Iop_Sal64x1: 2344 return binary64Ix1(mce, vatom1, vatom2); 2345 2346 case Iop_QShlN8Sx8: 2347 case Iop_QShlN8x8: 2348 case Iop_QSalN8x8: 2349 complainIfUndefined(mce, atom2); 2350 return mkPCast8x8(mce, vatom1); 2351 2352 case Iop_QShlN16Sx4: 2353 case Iop_QShlN16x4: 2354 case Iop_QSalN16x4: 2355 complainIfUndefined(mce, atom2); 2356 return mkPCast16x4(mce, vatom1); 2357 2358 case Iop_QShlN32Sx2: 2359 case Iop_QShlN32x2: 2360 case Iop_QSalN32x2: 2361 complainIfUndefined(mce, atom2); 2362 return mkPCast32x2(mce, vatom1); 2363 2364 case Iop_QShlN64Sx1: 2365 case Iop_QShlN64x1: 2366 case Iop_QSalN64x1: 2367 complainIfUndefined(mce, atom2); 2368 return mkPCast32x2(mce, vatom1); 2369 2370 case Iop_PwMax32Sx2: 2371 case Iop_PwMax32Ux2: 2372 case Iop_PwMin32Sx2: 2373 case Iop_PwMin32Ux2: 2374 case Iop_PwMax32Fx2: 2375 case Iop_PwMin32Fx2: 2376 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1), 2377 mkPCast32x2(mce, vatom2))); 2378 2379 case Iop_PwMax16Sx4: 2380 case Iop_PwMax16Ux4: 2381 case Iop_PwMin16Sx4: 2382 case Iop_PwMin16Ux4: 2383 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1), 2384 mkPCast16x4(mce, vatom2))); 2385 2386 case Iop_PwMax8Sx8: 2387 case Iop_PwMax8Ux8: 2388 case Iop_PwMin8Sx8: 2389 case Iop_PwMin8Ux8: 2390 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1), 2391 mkPCast8x8(mce, vatom2))); 2392 2393 case Iop_PwAdd32x2: 2394 case Iop_PwAdd32Fx2: 2395 return mkPCast32x2(mce, 2396 assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1), 2397 mkPCast32x2(mce, vatom2)))); 2398 2399 case Iop_PwAdd16x4: 2400 return mkPCast16x4(mce, 2401 assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1), 2402 mkPCast16x4(mce, vatom2)))); 2403 2404 case Iop_PwAdd8x8: 2405 return mkPCast8x8(mce, 2406 assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1), 2407 mkPCast8x8(mce, vatom2)))); 2408 2409 case Iop_Shl8x8: 2410 case Iop_Shr8x8: 2411 case Iop_Sar8x8: 2412 case Iop_Sal8x8: 2413 return mkUifU64(mce, 2414 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2415 mkPCast8x8(mce,vatom2) 2416 ); 2417 2418 case Iop_Shl16x4: 2419 case Iop_Shr16x4: 2420 case Iop_Sar16x4: 2421 case Iop_Sal16x4: 2422 return mkUifU64(mce, 2423 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2424 mkPCast16x4(mce,vatom2) 2425 ); 2426 2427 case Iop_Shl32x2: 2428 case Iop_Shr32x2: 2429 case Iop_Sar32x2: 2430 case Iop_Sal32x2: 2431 return mkUifU64(mce, 2432 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2433 mkPCast32x2(mce,vatom2) 2434 ); 2435 2436 /* 64-bit data-steering */ 2437 case Iop_InterleaveLO32x2: 2438 case Iop_InterleaveLO16x4: 2439 case Iop_InterleaveLO8x8: 2440 case Iop_InterleaveHI32x2: 2441 case Iop_InterleaveHI16x4: 2442 case Iop_InterleaveHI8x8: 2443 case Iop_CatOddLanes8x8: 2444 case Iop_CatEvenLanes8x8: 2445 case Iop_CatOddLanes16x4: 2446 case Iop_CatEvenLanes16x4: 2447 case Iop_InterleaveOddLanes8x8: 2448 case Iop_InterleaveEvenLanes8x8: 2449 case Iop_InterleaveOddLanes16x4: 2450 case Iop_InterleaveEvenLanes16x4: 2451 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 2452 2453 case Iop_GetElem8x8: 2454 complainIfUndefined(mce, atom2); 2455 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 2456 case Iop_GetElem16x4: 2457 complainIfUndefined(mce, atom2); 2458 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 2459 case Iop_GetElem32x2: 2460 complainIfUndefined(mce, atom2); 2461 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 2462 2463 /* Perm8x8: rearrange values in left arg using steering values 2464 from right arg. So rearrange the vbits in the same way but 2465 pessimise wrt steering values. */ 2466 case Iop_Perm8x8: 2467 return mkUifU64( 2468 mce, 2469 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2470 mkPCast8x8(mce, vatom2) 2471 ); 2472 2473 /* V128-bit SIMD */ 2474 2475 case Iop_ShrN8x16: 2476 case Iop_ShrN16x8: 2477 case Iop_ShrN32x4: 2478 case Iop_ShrN64x2: 2479 case Iop_SarN8x16: 2480 case Iop_SarN16x8: 2481 case Iop_SarN32x4: 2482 case Iop_SarN64x2: 2483 case Iop_ShlN8x16: 2484 case Iop_ShlN16x8: 2485 case Iop_ShlN32x4: 2486 case Iop_ShlN64x2: 2487 /* Same scheme as with all other shifts. Note: 22 Oct 05: 2488 this is wrong now, scalar shifts are done properly lazily. 2489 Vector shifts should be fixed too. */ 2490 complainIfUndefined(mce, atom2); 2491 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 2492 2493 /* V x V shifts/rotates are done using the standard lazy scheme. */ 2494 case Iop_Shl8x16: 2495 case Iop_Shr8x16: 2496 case Iop_Sar8x16: 2497 case Iop_Sal8x16: 2498 case Iop_Rol8x16: 2499 return mkUifUV128(mce, 2500 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2501 mkPCast8x16(mce,vatom2) 2502 ); 2503 2504 case Iop_Shl16x8: 2505 case Iop_Shr16x8: 2506 case Iop_Sar16x8: 2507 case Iop_Sal16x8: 2508 case Iop_Rol16x8: 2509 return mkUifUV128(mce, 2510 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2511 mkPCast16x8(mce,vatom2) 2512 ); 2513 2514 case Iop_Shl32x4: 2515 case Iop_Shr32x4: 2516 case Iop_Sar32x4: 2517 case Iop_Sal32x4: 2518 case Iop_Rol32x4: 2519 return mkUifUV128(mce, 2520 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2521 mkPCast32x4(mce,vatom2) 2522 ); 2523 2524 case Iop_Shl64x2: 2525 case Iop_Shr64x2: 2526 case Iop_Sar64x2: 2527 case Iop_Sal64x2: 2528 return mkUifUV128(mce, 2529 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2530 mkPCast64x2(mce,vatom2) 2531 ); 2532 2533 case Iop_F32ToFixed32Ux4_RZ: 2534 case Iop_F32ToFixed32Sx4_RZ: 2535 case Iop_Fixed32UToF32x4_RN: 2536 case Iop_Fixed32SToF32x4_RN: 2537 complainIfUndefined(mce, atom2); 2538 return mkPCast32x4(mce, vatom1); 2539 2540 case Iop_F32ToFixed32Ux2_RZ: 2541 case Iop_F32ToFixed32Sx2_RZ: 2542 case Iop_Fixed32UToF32x2_RN: 2543 case Iop_Fixed32SToF32x2_RN: 2544 complainIfUndefined(mce, atom2); 2545 return mkPCast32x2(mce, vatom1); 2546 2547 case Iop_QSub8Ux16: 2548 case Iop_QSub8Sx16: 2549 case Iop_Sub8x16: 2550 case Iop_Min8Ux16: 2551 case Iop_Min8Sx16: 2552 case Iop_Max8Ux16: 2553 case Iop_Max8Sx16: 2554 case Iop_CmpGT8Sx16: 2555 case Iop_CmpGT8Ux16: 2556 case Iop_CmpEQ8x16: 2557 case Iop_Avg8Ux16: 2558 case Iop_Avg8Sx16: 2559 case Iop_QAdd8Ux16: 2560 case Iop_QAdd8Sx16: 2561 case Iop_QSal8x16: 2562 case Iop_QShl8x16: 2563 case Iop_Add8x16: 2564 case Iop_Mul8x16: 2565 case Iop_PolynomialMul8x16: 2566 return binary8Ix16(mce, vatom1, vatom2); 2567 2568 case Iop_QSub16Ux8: 2569 case Iop_QSub16Sx8: 2570 case Iop_Sub16x8: 2571 case Iop_Mul16x8: 2572 case Iop_MulHi16Sx8: 2573 case Iop_MulHi16Ux8: 2574 case Iop_Min16Sx8: 2575 case Iop_Min16Ux8: 2576 case Iop_Max16Sx8: 2577 case Iop_Max16Ux8: 2578 case Iop_CmpGT16Sx8: 2579 case Iop_CmpGT16Ux8: 2580 case Iop_CmpEQ16x8: 2581 case Iop_Avg16Ux8: 2582 case Iop_Avg16Sx8: 2583 case Iop_QAdd16Ux8: 2584 case Iop_QAdd16Sx8: 2585 case Iop_QSal16x8: 2586 case Iop_QShl16x8: 2587 case Iop_Add16x8: 2588 case Iop_QDMulHi16Sx8: 2589 case Iop_QRDMulHi16Sx8: 2590 return binary16Ix8(mce, vatom1, vatom2); 2591 2592 case Iop_Sub32x4: 2593 case Iop_CmpGT32Sx4: 2594 case Iop_CmpGT32Ux4: 2595 case Iop_CmpEQ32x4: 2596 case Iop_QAdd32Sx4: 2597 case Iop_QAdd32Ux4: 2598 case Iop_QSub32Sx4: 2599 case Iop_QSub32Ux4: 2600 case Iop_QSal32x4: 2601 case Iop_QShl32x4: 2602 case Iop_Avg32Ux4: 2603 case Iop_Avg32Sx4: 2604 case Iop_Add32x4: 2605 case Iop_Max32Ux4: 2606 case Iop_Max32Sx4: 2607 case Iop_Min32Ux4: 2608 case Iop_Min32Sx4: 2609 case Iop_Mul32x4: 2610 case Iop_QDMulHi32Sx4: 2611 case Iop_QRDMulHi32Sx4: 2612 return binary32Ix4(mce, vatom1, vatom2); 2613 2614 case Iop_Sub64x2: 2615 case Iop_Add64x2: 2616 case Iop_CmpGT64Sx2: 2617 case Iop_QSal64x2: 2618 case Iop_QShl64x2: 2619 case Iop_QAdd64Ux2: 2620 case Iop_QAdd64Sx2: 2621 case Iop_QSub64Ux2: 2622 case Iop_QSub64Sx2: 2623 return binary64Ix2(mce, vatom1, vatom2); 2624 2625 case Iop_QNarrow32Sx4: 2626 case Iop_QNarrow32Ux4: 2627 case Iop_QNarrow16Sx8: 2628 case Iop_QNarrow16Ux8: 2629 return vectorNarrowV128(mce, op, vatom1, vatom2); 2630 2631 case Iop_Sub64Fx2: 2632 case Iop_Mul64Fx2: 2633 case Iop_Min64Fx2: 2634 case Iop_Max64Fx2: 2635 case Iop_Div64Fx2: 2636 case Iop_CmpLT64Fx2: 2637 case Iop_CmpLE64Fx2: 2638 case Iop_CmpEQ64Fx2: 2639 case Iop_CmpUN64Fx2: 2640 case Iop_Add64Fx2: 2641 return binary64Fx2(mce, vatom1, vatom2); 2642 2643 case Iop_Sub64F0x2: 2644 case Iop_Mul64F0x2: 2645 case Iop_Min64F0x2: 2646 case Iop_Max64F0x2: 2647 case Iop_Div64F0x2: 2648 case Iop_CmpLT64F0x2: 2649 case Iop_CmpLE64F0x2: 2650 case Iop_CmpEQ64F0x2: 2651 case Iop_CmpUN64F0x2: 2652 case Iop_Add64F0x2: 2653 return binary64F0x2(mce, vatom1, vatom2); 2654 2655 case Iop_Sub32Fx4: 2656 case Iop_Mul32Fx4: 2657 case Iop_Min32Fx4: 2658 case Iop_Max32Fx4: 2659 case Iop_Div32Fx4: 2660 case Iop_CmpLT32Fx4: 2661 case Iop_CmpLE32Fx4: 2662 case Iop_CmpEQ32Fx4: 2663 case Iop_CmpUN32Fx4: 2664 case Iop_CmpGT32Fx4: 2665 case Iop_CmpGE32Fx4: 2666 case Iop_Add32Fx4: 2667 case Iop_Recps32Fx4: 2668 case Iop_Rsqrts32Fx4: 2669 return binary32Fx4(mce, vatom1, vatom2); 2670 2671 case Iop_Sub32Fx2: 2672 case Iop_Mul32Fx2: 2673 case Iop_Min32Fx2: 2674 case Iop_Max32Fx2: 2675 case Iop_CmpEQ32Fx2: 2676 case Iop_CmpGT32Fx2: 2677 case Iop_CmpGE32Fx2: 2678 case Iop_Add32Fx2: 2679 case Iop_Recps32Fx2: 2680 case Iop_Rsqrts32Fx2: 2681 return binary32Fx2(mce, vatom1, vatom2); 2682 2683 case Iop_Sub32F0x4: 2684 case Iop_Mul32F0x4: 2685 case Iop_Min32F0x4: 2686 case Iop_Max32F0x4: 2687 case Iop_Div32F0x4: 2688 case Iop_CmpLT32F0x4: 2689 case Iop_CmpLE32F0x4: 2690 case Iop_CmpEQ32F0x4: 2691 case Iop_CmpUN32F0x4: 2692 case Iop_Add32F0x4: 2693 return binary32F0x4(mce, vatom1, vatom2); 2694 2695 case Iop_QShlN8Sx16: 2696 case Iop_QShlN8x16: 2697 case Iop_QSalN8x16: 2698 complainIfUndefined(mce, atom2); 2699 return mkPCast8x16(mce, vatom1); 2700 2701 case Iop_QShlN16Sx8: 2702 case Iop_QShlN16x8: 2703 case Iop_QSalN16x8: 2704 complainIfUndefined(mce, atom2); 2705 return mkPCast16x8(mce, vatom1); 2706 2707 case Iop_QShlN32Sx4: 2708 case Iop_QShlN32x4: 2709 case Iop_QSalN32x4: 2710 complainIfUndefined(mce, atom2); 2711 return mkPCast32x4(mce, vatom1); 2712 2713 case Iop_QShlN64Sx2: 2714 case Iop_QShlN64x2: 2715 case Iop_QSalN64x2: 2716 complainIfUndefined(mce, atom2); 2717 return mkPCast32x4(mce, vatom1); 2718 2719 case Iop_Mull32Sx2: 2720 case Iop_Mull32Ux2: 2721 case Iop_QDMulLong32Sx2: 2722 return vectorLongenI64(mce, Iop_Longen32Sx2, 2723 mkUifU64(mce, vatom1, vatom2)); 2724 2725 case Iop_Mull16Sx4: 2726 case Iop_Mull16Ux4: 2727 case Iop_QDMulLong16Sx4: 2728 return vectorLongenI64(mce, Iop_Longen16Sx4, 2729 mkUifU64(mce, vatom1, vatom2)); 2730 2731 case Iop_Mull8Sx8: 2732 case Iop_Mull8Ux8: 2733 case Iop_PolynomialMull8x8: 2734 return vectorLongenI64(mce, Iop_Longen8Sx8, 2735 mkUifU64(mce, vatom1, vatom2)); 2736 2737 case Iop_PwAdd32x4: 2738 return mkPCast32x4(mce, 2739 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1), 2740 mkPCast32x4(mce, vatom2)))); 2741 2742 case Iop_PwAdd16x8: 2743 return mkPCast16x8(mce, 2744 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1), 2745 mkPCast16x8(mce, vatom2)))); 2746 2747 case Iop_PwAdd8x16: 2748 return mkPCast8x16(mce, 2749 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1), 2750 mkPCast8x16(mce, vatom2)))); 2751 2752 /* V128-bit data-steering */ 2753 case Iop_SetV128lo32: 2754 case Iop_SetV128lo64: 2755 case Iop_64HLtoV128: 2756 case Iop_InterleaveLO64x2: 2757 case Iop_InterleaveLO32x4: 2758 case Iop_InterleaveLO16x8: 2759 case Iop_InterleaveLO8x16: 2760 case Iop_InterleaveHI64x2: 2761 case Iop_InterleaveHI32x4: 2762 case Iop_InterleaveHI16x8: 2763 case Iop_InterleaveHI8x16: 2764 case Iop_CatOddLanes8x16: 2765 case Iop_CatOddLanes16x8: 2766 case Iop_CatOddLanes32x4: 2767 case Iop_CatEvenLanes8x16: 2768 case Iop_CatEvenLanes16x8: 2769 case Iop_CatEvenLanes32x4: 2770 case Iop_InterleaveOddLanes8x16: 2771 case Iop_InterleaveOddLanes16x8: 2772 case Iop_InterleaveOddLanes32x4: 2773 case Iop_InterleaveEvenLanes8x16: 2774 case Iop_InterleaveEvenLanes16x8: 2775 case Iop_InterleaveEvenLanes32x4: 2776 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2)); 2777 2778 case Iop_GetElem8x16: 2779 complainIfUndefined(mce, atom2); 2780 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 2781 case Iop_GetElem16x8: 2782 complainIfUndefined(mce, atom2); 2783 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 2784 case Iop_GetElem32x4: 2785 complainIfUndefined(mce, atom2); 2786 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 2787 case Iop_GetElem64x2: 2788 complainIfUndefined(mce, atom2); 2789 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 2790 2791 /* Perm8x16: rearrange values in left arg using steering values 2792 from right arg. So rearrange the vbits in the same way but 2793 pessimise wrt steering values. */ 2794 case Iop_Perm8x16: 2795 return mkUifUV128( 2796 mce, 2797 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2798 mkPCast8x16(mce, vatom2) 2799 ); 2800 2801 /* These two take the lower half of each 16-bit lane, sign/zero 2802 extend it to 32, and multiply together, producing a 32x4 2803 result (and implicitly ignoring half the operand bits). So 2804 treat it as a bunch of independent 16x8 operations, but then 2805 do 32-bit shifts left-right to copy the lower half results 2806 (which are all 0s or all 1s due to PCasting in binary16Ix8) 2807 into the upper half of each result lane. */ 2808 case Iop_MullEven16Ux8: 2809 case Iop_MullEven16Sx8: { 2810 IRAtom* at; 2811 at = binary16Ix8(mce,vatom1,vatom2); 2812 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16))); 2813 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16))); 2814 return at; 2815 } 2816 2817 /* Same deal as Iop_MullEven16{S,U}x8 */ 2818 case Iop_MullEven8Ux16: 2819 case Iop_MullEven8Sx16: { 2820 IRAtom* at; 2821 at = binary8Ix16(mce,vatom1,vatom2); 2822 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8))); 2823 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8))); 2824 return at; 2825 } 2826 2827 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x 2828 32x4 -> 16x8 laneage, discarding the upper half of each lane. 2829 Simply apply same op to the V bits, since this really no more 2830 than a data steering operation. */ 2831 case Iop_Narrow32x4: 2832 case Iop_Narrow16x8: 2833 return assignNew('V', mce, Ity_V128, 2834 binop(op, vatom1, vatom2)); 2835 2836 case Iop_ShrV128: 2837 case Iop_ShlV128: 2838 /* Same scheme as with all other shifts. Note: 10 Nov 05: 2839 this is wrong now, scalar shifts are done properly lazily. 2840 Vector shifts should be fixed too. */ 2841 complainIfUndefined(mce, atom2); 2842 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 2843 2844 /* I128-bit data-steering */ 2845 case Iop_64HLto128: 2846 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2)); 2847 2848 /* Scalar floating point */ 2849 2850 case Iop_RoundF64toInt: 2851 case Iop_RoundF64toF32: 2852 case Iop_F64toI64S: 2853 case Iop_I64StoF64: 2854 case Iop_SinF64: 2855 case Iop_CosF64: 2856 case Iop_TanF64: 2857 case Iop_2xm1F64: 2858 case Iop_SqrtF64: 2859 /* I32(rm) x I64/F64 -> I64/F64 */ 2860 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 2861 2862 case Iop_RoundF32toInt: 2863 case Iop_SqrtF32: 2864 /* I32(rm) x I32/F32 -> I32/F32 */ 2865 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 2866 2867 case Iop_F64toI32U: 2868 case Iop_F64toI32S: 2869 case Iop_F64toF32: 2870 /* First arg is I32 (rounding mode), second is F64 (data). */ 2871 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 2872 2873 case Iop_F64toI16S: 2874 /* First arg is I32 (rounding mode), second is F64 (data). */ 2875 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 2876 2877 case Iop_CmpF64: 2878 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 2879 2880 /* non-FP after here */ 2881 2882 case Iop_DivModU64to32: 2883 case Iop_DivModS64to32: 2884 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 2885 2886 case Iop_DivModU128to64: 2887 case Iop_DivModS128to64: 2888 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 2889 2890 case Iop_16HLto32: 2891 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2)); 2892 case Iop_32HLto64: 2893 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 2894 2895 case Iop_MullS64: 2896 case Iop_MullU64: { 2897 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 2898 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64); 2899 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64)); 2900 } 2901 2902 case Iop_MullS32: 2903 case Iop_MullU32: { 2904 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 2905 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 2906 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32)); 2907 } 2908 2909 case Iop_MullS16: 2910 case Iop_MullU16: { 2911 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 2912 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 2913 return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16)); 2914 } 2915 2916 case Iop_MullS8: 2917 case Iop_MullU8: { 2918 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 2919 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 2920 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 2921 } 2922 2923 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */ 2924 case Iop_DivS32: 2925 case Iop_DivU32: 2926 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 2927 2928 case Iop_DivS64: 2929 case Iop_DivU64: 2930 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 2931 2932 case Iop_Add32: 2933 if (mce->bogusLiterals) 2934 return expensiveAddSub(mce,True,Ity_I32, 2935 vatom1,vatom2, atom1,atom2); 2936 else 2937 goto cheap_AddSub32; 2938 case Iop_Sub32: 2939 if (mce->bogusLiterals) 2940 return expensiveAddSub(mce,False,Ity_I32, 2941 vatom1,vatom2, atom1,atom2); 2942 else 2943 goto cheap_AddSub32; 2944 2945 cheap_AddSub32: 2946 case Iop_Mul32: 2947 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 2948 2949 case Iop_CmpORD32S: 2950 case Iop_CmpORD32U: 2951 case Iop_CmpORD64S: 2952 case Iop_CmpORD64U: 2953 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2); 2954 2955 case Iop_Add64: 2956 if (mce->bogusLiterals) 2957 return expensiveAddSub(mce,True,Ity_I64, 2958 vatom1,vatom2, atom1,atom2); 2959 else 2960 goto cheap_AddSub64; 2961 case Iop_Sub64: 2962 if (mce->bogusLiterals) 2963 return expensiveAddSub(mce,False,Ity_I64, 2964 vatom1,vatom2, atom1,atom2); 2965 else 2966 goto cheap_AddSub64; 2967 2968 cheap_AddSub64: 2969 case Iop_Mul64: 2970 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 2971 2972 case Iop_Mul16: 2973 case Iop_Add16: 2974 case Iop_Sub16: 2975 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 2976 2977 case Iop_Sub8: 2978 case Iop_Add8: 2979 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 2980 2981 case Iop_CmpEQ64: 2982 case Iop_CmpNE64: 2983 if (mce->bogusLiterals) 2984 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 ); 2985 else 2986 goto cheap_cmp64; 2987 cheap_cmp64: 2988 case Iop_CmpLE64S: case Iop_CmpLE64U: 2989 case Iop_CmpLT64U: case Iop_CmpLT64S: 2990 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2)); 2991 2992 case Iop_CmpEQ32: 2993 case Iop_CmpNE32: 2994 if (mce->bogusLiterals) 2995 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 ); 2996 else 2997 goto cheap_cmp32; 2998 cheap_cmp32: 2999 case Iop_CmpLE32S: case Iop_CmpLE32U: 3000 case Iop_CmpLT32U: case Iop_CmpLT32S: 3001 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 3002 3003 case Iop_CmpEQ16: case Iop_CmpNE16: 3004 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 3005 3006 case Iop_CmpEQ8: case Iop_CmpNE8: 3007 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 3008 3009 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 3010 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 3011 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 3012 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 3013 /* Just say these all produce a defined result, regardless 3014 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */ 3015 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1)); 3016 3017 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: 3018 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 ); 3019 3020 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 3021 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 ); 3022 3023 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 3024 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 ); 3025 3026 case Iop_Shl8: case Iop_Shr8: 3027 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 ); 3028 3029 case Iop_AndV128: 3030 uifu = mkUifUV128; difd = mkDifDV128; 3031 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 3032 case Iop_And64: 3033 uifu = mkUifU64; difd = mkDifD64; 3034 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 3035 case Iop_And32: 3036 uifu = mkUifU32; difd = mkDifD32; 3037 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 3038 case Iop_And16: 3039 uifu = mkUifU16; difd = mkDifD16; 3040 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 3041 case Iop_And8: 3042 uifu = mkUifU8; difd = mkDifD8; 3043 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 3044 3045 case Iop_OrV128: 3046 uifu = mkUifUV128; difd = mkDifDV128; 3047 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 3048 case Iop_Or64: 3049 uifu = mkUifU64; difd = mkDifD64; 3050 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 3051 case Iop_Or32: 3052 uifu = mkUifU32; difd = mkDifD32; 3053 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 3054 case Iop_Or16: 3055 uifu = mkUifU16; difd = mkDifD16; 3056 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 3057 case Iop_Or8: 3058 uifu = mkUifU8; difd = mkDifD8; 3059 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 3060 3061 do_And_Or: 3062 return 3063 assignNew( 3064 'V', mce, 3065 and_or_ty, 3066 difd(mce, uifu(mce, vatom1, vatom2), 3067 difd(mce, improve(mce, atom1, vatom1), 3068 improve(mce, atom2, vatom2) ) ) ); 3069 3070 case Iop_Xor8: 3071 return mkUifU8(mce, vatom1, vatom2); 3072 case Iop_Xor16: 3073 return mkUifU16(mce, vatom1, vatom2); 3074 case Iop_Xor32: 3075 return mkUifU32(mce, vatom1, vatom2); 3076 case Iop_Xor64: 3077 return mkUifU64(mce, vatom1, vatom2); 3078 case Iop_XorV128: 3079 return mkUifUV128(mce, vatom1, vatom2); 3080 3081 default: 3082 ppIROp(op); 3083 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 3084 } 3085} 3086 3087 3088static 3089IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 3090{ 3091 IRAtom* vatom = expr2vbits( mce, atom ); 3092 tl_assert(isOriginalAtom(mce,atom)); 3093 switch (op) { 3094 3095 case Iop_Sqrt64Fx2: 3096 return unary64Fx2(mce, vatom); 3097 3098 case Iop_Sqrt64F0x2: 3099 return unary64F0x2(mce, vatom); 3100 3101 case Iop_Sqrt32Fx4: 3102 case Iop_RSqrt32Fx4: 3103 case Iop_Recip32Fx4: 3104 case Iop_I32UtoFx4: 3105 case Iop_I32StoFx4: 3106 case Iop_QFtoI32Ux4_RZ: 3107 case Iop_QFtoI32Sx4_RZ: 3108 case Iop_RoundF32x4_RM: 3109 case Iop_RoundF32x4_RP: 3110 case Iop_RoundF32x4_RN: 3111 case Iop_RoundF32x4_RZ: 3112 case Iop_Recip32x4: 3113 case Iop_Abs32Fx4: 3114 case Iop_Neg32Fx4: 3115 case Iop_Rsqrte32Fx4: 3116 return unary32Fx4(mce, vatom); 3117 3118 case Iop_I32UtoFx2: 3119 case Iop_I32StoFx2: 3120 case Iop_Recip32Fx2: 3121 case Iop_Recip32x2: 3122 case Iop_Abs32Fx2: 3123 case Iop_Neg32Fx2: 3124 case Iop_Rsqrte32Fx2: 3125 return unary32Fx2(mce, vatom); 3126 3127 case Iop_Sqrt32F0x4: 3128 case Iop_RSqrt32F0x4: 3129 case Iop_Recip32F0x4: 3130 return unary32F0x4(mce, vatom); 3131 3132 case Iop_32UtoV128: 3133 case Iop_64UtoV128: 3134 case Iop_Dup8x16: 3135 case Iop_Dup16x8: 3136 case Iop_Dup32x4: 3137 case Iop_Reverse16_8x16: 3138 case Iop_Reverse32_8x16: 3139 case Iop_Reverse32_16x8: 3140 case Iop_Reverse64_8x16: 3141 case Iop_Reverse64_16x8: 3142 case Iop_Reverse64_32x4: 3143 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 3144 3145 case Iop_F32toF64: 3146 case Iop_I32StoF64: 3147 case Iop_I32UtoF64: 3148 case Iop_NegF64: 3149 case Iop_AbsF64: 3150 case Iop_Est5FRSqrt: 3151 case Iop_RoundF64toF64_NEAREST: 3152 case Iop_RoundF64toF64_NegINF: 3153 case Iop_RoundF64toF64_PosINF: 3154 case Iop_RoundF64toF64_ZERO: 3155 case Iop_Clz64: 3156 case Iop_Ctz64: 3157 return mkPCastTo(mce, Ity_I64, vatom); 3158 3159 case Iop_Clz32: 3160 case Iop_Ctz32: 3161 case Iop_TruncF64asF32: 3162 case Iop_NegF32: 3163 case Iop_AbsF32: 3164 return mkPCastTo(mce, Ity_I32, vatom); 3165 3166 case Iop_1Uto64: 3167 case Iop_8Uto64: 3168 case Iop_8Sto64: 3169 case Iop_16Uto64: 3170 case Iop_16Sto64: 3171 case Iop_32Sto64: 3172 case Iop_32Uto64: 3173 case Iop_V128to64: 3174 case Iop_V128HIto64: 3175 case Iop_128HIto64: 3176 case Iop_128to64: 3177 case Iop_Dup8x8: 3178 case Iop_Dup16x4: 3179 case Iop_Dup32x2: 3180 case Iop_Reverse16_8x8: 3181 case Iop_Reverse32_8x8: 3182 case Iop_Reverse32_16x4: 3183 case Iop_Reverse64_8x8: 3184 case Iop_Reverse64_16x4: 3185 case Iop_Reverse64_32x2: 3186 return assignNew('V', mce, Ity_I64, unop(op, vatom)); 3187 3188 case Iop_64to32: 3189 case Iop_64HIto32: 3190 case Iop_1Uto32: 3191 case Iop_1Sto32: 3192 case Iop_8Uto32: 3193 case Iop_16Uto32: 3194 case Iop_16Sto32: 3195 case Iop_8Sto32: 3196 case Iop_V128to32: 3197 return assignNew('V', mce, Ity_I32, unop(op, vatom)); 3198 3199 case Iop_8Sto16: 3200 case Iop_8Uto16: 3201 case Iop_32to16: 3202 case Iop_32HIto16: 3203 case Iop_64to16: 3204 return assignNew('V', mce, Ity_I16, unop(op, vatom)); 3205 3206 case Iop_1Uto8: 3207 case Iop_16to8: 3208 case Iop_16HIto8: 3209 case Iop_32to8: 3210 case Iop_64to8: 3211 return assignNew('V', mce, Ity_I8, unop(op, vatom)); 3212 3213 case Iop_32to1: 3214 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom)); 3215 3216 case Iop_64to1: 3217 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom)); 3218 3219 case Iop_ReinterpF64asI64: 3220 case Iop_ReinterpI64asF64: 3221 case Iop_ReinterpI32asF32: 3222 case Iop_ReinterpF32asI32: 3223 case Iop_NotV128: 3224 case Iop_Not64: 3225 case Iop_Not32: 3226 case Iop_Not16: 3227 case Iop_Not8: 3228 case Iop_Not1: 3229 return vatom; 3230 3231 case Iop_CmpNEZ8x8: 3232 case Iop_Cnt8x8: 3233 case Iop_Clz8Sx8: 3234 case Iop_Cls8Sx8: 3235 case Iop_Abs8x8: 3236 return mkPCast8x8(mce, vatom); 3237 3238 case Iop_CmpNEZ8x16: 3239 case Iop_Cnt8x16: 3240 case Iop_Clz8Sx16: 3241 case Iop_Cls8Sx16: 3242 case Iop_Abs8x16: 3243 return mkPCast8x16(mce, vatom); 3244 3245 case Iop_CmpNEZ16x4: 3246 case Iop_Clz16Sx4: 3247 case Iop_Cls16Sx4: 3248 case Iop_Abs16x4: 3249 return mkPCast16x4(mce, vatom); 3250 3251 case Iop_CmpNEZ16x8: 3252 case Iop_Clz16Sx8: 3253 case Iop_Cls16Sx8: 3254 case Iop_Abs16x8: 3255 return mkPCast16x8(mce, vatom); 3256 3257 case Iop_CmpNEZ32x2: 3258 case Iop_Clz32Sx2: 3259 case Iop_Cls32Sx2: 3260 case Iop_FtoI32Ux2_RZ: 3261 case Iop_FtoI32Sx2_RZ: 3262 case Iop_Abs32x2: 3263 return mkPCast32x2(mce, vatom); 3264 3265 case Iop_CmpNEZ32x4: 3266 case Iop_Clz32Sx4: 3267 case Iop_Cls32Sx4: 3268 case Iop_FtoI32Ux4_RZ: 3269 case Iop_FtoI32Sx4_RZ: 3270 case Iop_Abs32x4: 3271 return mkPCast32x4(mce, vatom); 3272 3273 case Iop_CmpwNEZ64: 3274 return mkPCastTo(mce, Ity_I64, vatom); 3275 3276 case Iop_CmpNEZ64x2: 3277 return mkPCast64x2(mce, vatom); 3278 3279 case Iop_Shorten16x8: 3280 case Iop_Shorten32x4: 3281 case Iop_Shorten64x2: 3282 case Iop_QShortenS16Sx8: 3283 case Iop_QShortenU16Sx8: 3284 case Iop_QShortenU16Ux8: 3285 case Iop_QShortenS32Sx4: 3286 case Iop_QShortenU32Sx4: 3287 case Iop_QShortenU32Ux4: 3288 case Iop_QShortenS64Sx2: 3289 case Iop_QShortenU64Sx2: 3290 case Iop_QShortenU64Ux2: 3291 return vectorShortenV128(mce, op, vatom); 3292 3293 case Iop_Longen8Sx8: 3294 case Iop_Longen8Ux8: 3295 case Iop_Longen16Sx4: 3296 case Iop_Longen16Ux4: 3297 case Iop_Longen32Sx2: 3298 case Iop_Longen32Ux2: 3299 return vectorLongenI64(mce, op, vatom); 3300 3301 case Iop_PwAddL32Ux2: 3302 case Iop_PwAddL32Sx2: 3303 return mkPCastTo(mce, Ity_I64, 3304 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom)))); 3305 3306 case Iop_PwAddL16Ux4: 3307 case Iop_PwAddL16Sx4: 3308 return mkPCast32x2(mce, 3309 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom)))); 3310 3311 case Iop_PwAddL8Ux8: 3312 case Iop_PwAddL8Sx8: 3313 return mkPCast16x4(mce, 3314 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom)))); 3315 3316 case Iop_PwAddL32Ux4: 3317 case Iop_PwAddL32Sx4: 3318 return mkPCast64x2(mce, 3319 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom)))); 3320 3321 case Iop_PwAddL16Ux8: 3322 case Iop_PwAddL16Sx8: 3323 return mkPCast32x4(mce, 3324 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom)))); 3325 3326 case Iop_PwAddL8Ux16: 3327 case Iop_PwAddL8Sx16: 3328 return mkPCast16x8(mce, 3329 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom)))); 3330 3331 default: 3332 ppIROp(op); 3333 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 3334 } 3335} 3336 3337 3338/* Worker function; do not call directly. */ 3339static 3340IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 3341 IREndness end, IRType ty, 3342 IRAtom* addr, UInt bias ) 3343{ 3344 void* helper; 3345 Char* hname; 3346 IRDirty* di; 3347 IRTemp datavbits; 3348 IRAtom* addrAct; 3349 3350 tl_assert(isOriginalAtom(mce,addr)); 3351 tl_assert(end == Iend_LE || end == Iend_BE); 3352 3353 /* First, emit a definedness test for the address. This also sets 3354 the address (shadow) to 'defined' following the test. */ 3355 complainIfUndefined( mce, addr ); 3356 3357 /* Now cook up a call to the relevant helper function, to read the 3358 data V bits from shadow memory. */ 3359 ty = shadowTypeV(ty); 3360 3361 if (end == Iend_LE) { 3362 switch (ty) { 3363 case Ity_I64: helper = &MC_(helperc_LOADV64le); 3364 hname = "MC_(helperc_LOADV64le)"; 3365 break; 3366 case Ity_I32: helper = &MC_(helperc_LOADV32le); 3367 hname = "MC_(helperc_LOADV32le)"; 3368 break; 3369 case Ity_I16: helper = &MC_(helperc_LOADV16le); 3370 hname = "MC_(helperc_LOADV16le)"; 3371 break; 3372 case Ity_I8: helper = &MC_(helperc_LOADV8); 3373 hname = "MC_(helperc_LOADV8)"; 3374 break; 3375 default: ppIRType(ty); 3376 VG_(tool_panic)("memcheck:do_shadow_Load(LE)"); 3377 } 3378 } else { 3379 switch (ty) { 3380 case Ity_I64: helper = &MC_(helperc_LOADV64be); 3381 hname = "MC_(helperc_LOADV64be)"; 3382 break; 3383 case Ity_I32: helper = &MC_(helperc_LOADV32be); 3384 hname = "MC_(helperc_LOADV32be)"; 3385 break; 3386 case Ity_I16: helper = &MC_(helperc_LOADV16be); 3387 hname = "MC_(helperc_LOADV16be)"; 3388 break; 3389 case Ity_I8: helper = &MC_(helperc_LOADV8); 3390 hname = "MC_(helperc_LOADV8)"; 3391 break; 3392 default: ppIRType(ty); 3393 VG_(tool_panic)("memcheck:do_shadow_Load(BE)"); 3394 } 3395 } 3396 3397 /* Generate the actual address into addrAct. */ 3398 if (bias == 0) { 3399 addrAct = addr; 3400 } else { 3401 IROp mkAdd; 3402 IRAtom* eBias; 3403 IRType tyAddr = mce->hWordTy; 3404 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 3405 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 3406 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 3407 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) ); 3408 } 3409 3410 /* We need to have a place to park the V bits we're just about to 3411 read. */ 3412 datavbits = newTemp(mce, ty, VSh); 3413 di = unsafeIRDirty_1_N( datavbits, 3414 1/*regparms*/, 3415 hname, VG_(fnptr_to_fnentry)( helper ), 3416 mkIRExprVec_1( addrAct )); 3417 setHelperAnns( mce, di ); 3418 stmt( 'V', mce, IRStmt_Dirty(di) ); 3419 3420 return mkexpr(datavbits); 3421} 3422 3423 3424static 3425IRAtom* expr2vbits_Load ( MCEnv* mce, 3426 IREndness end, IRType ty, 3427 IRAtom* addr, UInt bias ) 3428{ 3429 IRAtom *v64hi, *v64lo; 3430 tl_assert(end == Iend_LE || end == Iend_BE); 3431 switch (shadowTypeV(ty)) { 3432 case Ity_I8: 3433 case Ity_I16: 3434 case Ity_I32: 3435 case Ity_I64: 3436 return expr2vbits_Load_WRK(mce, end, ty, addr, bias); 3437 case Ity_V128: 3438 if (end == Iend_LE) { 3439 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias); 3440 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3441 } else { 3442 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias); 3443 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3444 } 3445 return assignNew( 'V', mce, 3446 Ity_V128, 3447 binop(Iop_64HLtoV128, v64hi, v64lo)); 3448 default: 3449 VG_(tool_panic)("expr2vbits_Load"); 3450 } 3451} 3452 3453 3454static 3455IRAtom* expr2vbits_Mux0X ( MCEnv* mce, 3456 IRAtom* cond, IRAtom* expr0, IRAtom* exprX ) 3457{ 3458 IRAtom *vbitsC, *vbits0, *vbitsX; 3459 IRType ty; 3460 /* Given Mux0X(cond,expr0,exprX), generate 3461 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#) 3462 That is, steer the V bits like the originals, but trash the 3463 result if the steering value is undefined. This gives 3464 lazy propagation. */ 3465 tl_assert(isOriginalAtom(mce, cond)); 3466 tl_assert(isOriginalAtom(mce, expr0)); 3467 tl_assert(isOriginalAtom(mce, exprX)); 3468 3469 vbitsC = expr2vbits(mce, cond); 3470 vbits0 = expr2vbits(mce, expr0); 3471 vbitsX = expr2vbits(mce, exprX); 3472 ty = typeOfIRExpr(mce->sb->tyenv, vbits0); 3473 3474 return 3475 mkUifU(mce, ty, assignNew('V', mce, ty, 3476 IRExpr_Mux0X(cond, vbits0, vbitsX)), 3477 mkPCastTo(mce, ty, vbitsC) ); 3478} 3479 3480/* --------- This is the main expression-handling function. --------- */ 3481 3482static 3483IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 3484{ 3485 switch (e->tag) { 3486 3487 case Iex_Get: 3488 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 3489 3490 case Iex_GetI: 3491 return shadow_GETI( mce, e->Iex.GetI.descr, 3492 e->Iex.GetI.ix, e->Iex.GetI.bias ); 3493 3494 case Iex_RdTmp: 3495 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) ); 3496 3497 case Iex_Const: 3498 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e))); 3499 3500 case Iex_Qop: 3501 return expr2vbits_Qop( 3502 mce, 3503 e->Iex.Qop.op, 3504 e->Iex.Qop.arg1, e->Iex.Qop.arg2, 3505 e->Iex.Qop.arg3, e->Iex.Qop.arg4 3506 ); 3507 3508 case Iex_Triop: 3509 return expr2vbits_Triop( 3510 mce, 3511 e->Iex.Triop.op, 3512 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3 3513 ); 3514 3515 case Iex_Binop: 3516 return expr2vbits_Binop( 3517 mce, 3518 e->Iex.Binop.op, 3519 e->Iex.Binop.arg1, e->Iex.Binop.arg2 3520 ); 3521 3522 case Iex_Unop: 3523 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 3524 3525 case Iex_Load: 3526 return expr2vbits_Load( mce, e->Iex.Load.end, 3527 e->Iex.Load.ty, 3528 e->Iex.Load.addr, 0/*addr bias*/ ); 3529 3530 case Iex_CCall: 3531 return mkLazyN( mce, e->Iex.CCall.args, 3532 e->Iex.CCall.retty, 3533 e->Iex.CCall.cee ); 3534 3535 case Iex_Mux0X: 3536 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0, 3537 e->Iex.Mux0X.exprX); 3538 3539 default: 3540 VG_(printf)("\n"); 3541 ppIRExpr(e); 3542 VG_(printf)("\n"); 3543 VG_(tool_panic)("memcheck: expr2vbits"); 3544 } 3545} 3546 3547/*------------------------------------------------------------*/ 3548/*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 3549/*------------------------------------------------------------*/ 3550 3551/* Widen a value to the host word size. */ 3552 3553static 3554IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 3555{ 3556 IRType ty, tyH; 3557 3558 /* vatom is vbits-value and as such can only have a shadow type. */ 3559 tl_assert(isShadowAtom(mce,vatom)); 3560 3561 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 3562 tyH = mce->hWordTy; 3563 3564 if (tyH == Ity_I32) { 3565 switch (ty) { 3566 case Ity_I32: 3567 return vatom; 3568 case Ity_I16: 3569 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom)); 3570 case Ity_I8: 3571 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom)); 3572 default: 3573 goto unhandled; 3574 } 3575 } else 3576 if (tyH == Ity_I64) { 3577 switch (ty) { 3578 case Ity_I32: 3579 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom)); 3580 case Ity_I16: 3581 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 3582 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom)))); 3583 case Ity_I8: 3584 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 3585 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom)))); 3586 default: 3587 goto unhandled; 3588 } 3589 } else { 3590 goto unhandled; 3591 } 3592 unhandled: 3593 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 3594 VG_(tool_panic)("zwidenToHostWord"); 3595} 3596 3597 3598/* Generate a shadow store. addr is always the original address atom. 3599 You can pass in either originals or V-bits for the data atom, but 3600 obviously not both. guard :: Ity_I1 controls whether the store 3601 really happens; NULL means it unconditionally does. Note that 3602 guard itself is not checked for definedness; the caller of this 3603 function must do that if necessary. */ 3604 3605static 3606void do_shadow_Store ( MCEnv* mce, 3607 IREndness end, 3608 IRAtom* addr, UInt bias, 3609 IRAtom* data, IRAtom* vdata, 3610 IRAtom* guard ) 3611{ 3612 IROp mkAdd; 3613 IRType ty, tyAddr; 3614 void* helper = NULL; 3615 Char* hname = NULL; 3616 IRConst* c; 3617 3618 tyAddr = mce->hWordTy; 3619 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 3620 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 3621 tl_assert( end == Iend_LE || end == Iend_BE ); 3622 3623 if (data) { 3624 tl_assert(!vdata); 3625 tl_assert(isOriginalAtom(mce, data)); 3626 tl_assert(bias == 0); 3627 vdata = expr2vbits( mce, data ); 3628 } else { 3629 tl_assert(vdata); 3630 } 3631 3632 tl_assert(isOriginalAtom(mce,addr)); 3633 tl_assert(isShadowAtom(mce,vdata)); 3634 3635 if (guard) { 3636 tl_assert(isOriginalAtom(mce, guard)); 3637 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 3638 } 3639 3640 ty = typeOfIRExpr(mce->sb->tyenv, vdata); 3641 3642 // If we're not doing undefined value checking, pretend that this value 3643 // is "all valid". That lets Vex's optimiser remove some of the V bit 3644 // shadow computation ops that precede it. 3645 if (MC_(clo_mc_level) == 1) { 3646 switch (ty) { 3647 case Ity_V128: // V128 weirdness 3648 c = IRConst_V128(V_BITS16_DEFINED); break; 3649 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; 3650 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; 3651 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; 3652 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break; 3653 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 3654 } 3655 vdata = IRExpr_Const( c ); 3656 } 3657 3658 /* First, emit a definedness test for the address. This also sets 3659 the address (shadow) to 'defined' following the test. */ 3660 complainIfUndefined( mce, addr ); 3661 3662 /* Now decide which helper function to call to write the data V 3663 bits into shadow memory. */ 3664 if (end == Iend_LE) { 3665 switch (ty) { 3666 case Ity_V128: /* we'll use the helper twice */ 3667 case Ity_I64: helper = &MC_(helperc_STOREV64le); 3668 hname = "MC_(helperc_STOREV64le)"; 3669 break; 3670 case Ity_I32: helper = &MC_(helperc_STOREV32le); 3671 hname = "MC_(helperc_STOREV32le)"; 3672 break; 3673 case Ity_I16: helper = &MC_(helperc_STOREV16le); 3674 hname = "MC_(helperc_STOREV16le)"; 3675 break; 3676 case Ity_I8: helper = &MC_(helperc_STOREV8); 3677 hname = "MC_(helperc_STOREV8)"; 3678 break; 3679 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 3680 } 3681 } else { 3682 switch (ty) { 3683 case Ity_V128: /* we'll use the helper twice */ 3684 case Ity_I64: helper = &MC_(helperc_STOREV64be); 3685 hname = "MC_(helperc_STOREV64be)"; 3686 break; 3687 case Ity_I32: helper = &MC_(helperc_STOREV32be); 3688 hname = "MC_(helperc_STOREV32be)"; 3689 break; 3690 case Ity_I16: helper = &MC_(helperc_STOREV16be); 3691 hname = "MC_(helperc_STOREV16be)"; 3692 break; 3693 case Ity_I8: helper = &MC_(helperc_STOREV8); 3694 hname = "MC_(helperc_STOREV8)"; 3695 break; 3696 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)"); 3697 } 3698 } 3699 3700 if (ty == Ity_V128) { 3701 3702 /* V128-bit case */ 3703 /* See comment in next clause re 64-bit regparms */ 3704 /* also, need to be careful about endianness */ 3705 3706 Int offLo64, offHi64; 3707 IRDirty *diLo64, *diHi64; 3708 IRAtom *addrLo64, *addrHi64; 3709 IRAtom *vdataLo64, *vdataHi64; 3710 IRAtom *eBiasLo64, *eBiasHi64; 3711 3712 if (end == Iend_LE) { 3713 offLo64 = 0; 3714 offHi64 = 8; 3715 } else { 3716 offLo64 = 8; 3717 offHi64 = 0; 3718 } 3719 3720 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); 3721 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); 3722 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); 3723 diLo64 = unsafeIRDirty_0_N( 3724 1/*regparms*/, 3725 hname, VG_(fnptr_to_fnentry)( helper ), 3726 mkIRExprVec_2( addrLo64, vdataLo64 ) 3727 ); 3728 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); 3729 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); 3730 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 3731 diHi64 = unsafeIRDirty_0_N( 3732 1/*regparms*/, 3733 hname, VG_(fnptr_to_fnentry)( helper ), 3734 mkIRExprVec_2( addrHi64, vdataHi64 ) 3735 ); 3736 if (guard) diLo64->guard = guard; 3737 if (guard) diHi64->guard = guard; 3738 setHelperAnns( mce, diLo64 ); 3739 setHelperAnns( mce, diHi64 ); 3740 stmt( 'V', mce, IRStmt_Dirty(diLo64) ); 3741 stmt( 'V', mce, IRStmt_Dirty(diHi64) ); 3742 3743 } else { 3744 3745 IRDirty *di; 3746 IRAtom *addrAct; 3747 3748 /* 8/16/32/64-bit cases */ 3749 /* Generate the actual address into addrAct. */ 3750 if (bias == 0) { 3751 addrAct = addr; 3752 } else { 3753 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 3754 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias)); 3755 } 3756 3757 if (ty == Ity_I64) { 3758 /* We can't do this with regparm 2 on 32-bit platforms, since 3759 the back ends aren't clever enough to handle 64-bit 3760 regparm args. Therefore be different. */ 3761 di = unsafeIRDirty_0_N( 3762 1/*regparms*/, 3763 hname, VG_(fnptr_to_fnentry)( helper ), 3764 mkIRExprVec_2( addrAct, vdata ) 3765 ); 3766 } else { 3767 di = unsafeIRDirty_0_N( 3768 2/*regparms*/, 3769 hname, VG_(fnptr_to_fnentry)( helper ), 3770 mkIRExprVec_2( addrAct, 3771 zwidenToHostWord( mce, vdata )) 3772 ); 3773 } 3774 if (guard) di->guard = guard; 3775 setHelperAnns( mce, di ); 3776 stmt( 'V', mce, IRStmt_Dirty(di) ); 3777 } 3778 3779} 3780 3781 3782/* Do lazy pessimistic propagation through a dirty helper call, by 3783 looking at the annotations on it. This is the most complex part of 3784 Memcheck. */ 3785 3786static IRType szToITy ( Int n ) 3787{ 3788 switch (n) { 3789 case 1: return Ity_I8; 3790 case 2: return Ity_I16; 3791 case 4: return Ity_I32; 3792 case 8: return Ity_I64; 3793 default: VG_(tool_panic)("szToITy(memcheck)"); 3794 } 3795} 3796 3797static 3798void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 3799{ 3800 Int i, n, toDo, gSz, gOff; 3801 IRAtom *src, *here, *curr; 3802 IRType tySrc, tyDst; 3803 IRTemp dst; 3804 IREndness end; 3805 3806 /* What's the native endianness? We need to know this. */ 3807# if defined(VG_BIGENDIAN) 3808 end = Iend_BE; 3809# elif defined(VG_LITTLEENDIAN) 3810 end = Iend_LE; 3811# else 3812# error "Unknown endianness" 3813# endif 3814 3815 /* First check the guard. */ 3816 complainIfUndefined(mce, d->guard); 3817 3818 /* Now round up all inputs and PCast over them. */ 3819 curr = definedOfType(Ity_I32); 3820 3821 /* Inputs: unmasked args */ 3822 for (i = 0; d->args[i]; i++) { 3823 if (d->cee->mcx_mask & (1<<i)) { 3824 /* ignore this arg */ 3825 } else { 3826 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) ); 3827 curr = mkUifU32(mce, here, curr); 3828 } 3829 } 3830 3831 /* Inputs: guest state that we read. */ 3832 for (i = 0; i < d->nFxState; i++) { 3833 tl_assert(d->fxState[i].fx != Ifx_None); 3834 if (d->fxState[i].fx == Ifx_Write) 3835 continue; 3836 3837 /* Ignore any sections marked as 'always defined'. */ 3838 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) { 3839 if (0) 3840 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 3841 d->fxState[i].offset, d->fxState[i].size ); 3842 continue; 3843 } 3844 3845 /* This state element is read or modified. So we need to 3846 consider it. If larger than 8 bytes, deal with it in 8-byte 3847 chunks. */ 3848 gSz = d->fxState[i].size; 3849 gOff = d->fxState[i].offset; 3850 tl_assert(gSz > 0); 3851 while (True) { 3852 if (gSz == 0) break; 3853 n = gSz <= 8 ? gSz : 8; 3854 /* update 'curr' with UifU of the state slice 3855 gOff .. gOff+n-1 */ 3856 tySrc = szToITy( n ); 3857 src = assignNew( 'V', mce, tySrc, 3858 shadow_GET(mce, gOff, tySrc ) ); 3859 here = mkPCastTo( mce, Ity_I32, src ); 3860 curr = mkUifU32(mce, here, curr); 3861 gSz -= n; 3862 gOff += n; 3863 } 3864 3865 } 3866 3867 /* Inputs: memory. First set up some info needed regardless of 3868 whether we're doing reads or writes. */ 3869 3870 if (d->mFx != Ifx_None) { 3871 /* Because we may do multiple shadow loads/stores from the same 3872 base address, it's best to do a single test of its 3873 definedness right now. Post-instrumentation optimisation 3874 should remove all but this test. */ 3875 IRType tyAddr; 3876 tl_assert(d->mAddr); 3877 complainIfUndefined(mce, d->mAddr); 3878 3879 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr); 3880 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 3881 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 3882 } 3883 3884 /* Deal with memory inputs (reads or modifies) */ 3885 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 3886 toDo = d->mSize; 3887 /* chew off 32-bit chunks. We don't care about the endianness 3888 since it's all going to be condensed down to a single bit, 3889 but nevertheless choose an endianness which is hopefully 3890 native to the platform. */ 3891 while (toDo >= 4) { 3892 here = mkPCastTo( 3893 mce, Ity_I32, 3894 expr2vbits_Load ( mce, end, Ity_I32, 3895 d->mAddr, d->mSize - toDo ) 3896 ); 3897 curr = mkUifU32(mce, here, curr); 3898 toDo -= 4; 3899 } 3900 /* chew off 16-bit chunks */ 3901 while (toDo >= 2) { 3902 here = mkPCastTo( 3903 mce, Ity_I32, 3904 expr2vbits_Load ( mce, end, Ity_I16, 3905 d->mAddr, d->mSize - toDo ) 3906 ); 3907 curr = mkUifU32(mce, here, curr); 3908 toDo -= 2; 3909 } 3910 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 3911 } 3912 3913 /* Whew! So curr is a 32-bit V-value summarising pessimistically 3914 all the inputs to the helper. Now we need to re-distribute the 3915 results to all destinations. */ 3916 3917 /* Outputs: the destination temporary, if there is one. */ 3918 if (d->tmp != IRTemp_INVALID) { 3919 dst = findShadowTmpV(mce, d->tmp); 3920 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp); 3921 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) ); 3922 } 3923 3924 /* Outputs: guest state that we write or modify. */ 3925 for (i = 0; i < d->nFxState; i++) { 3926 tl_assert(d->fxState[i].fx != Ifx_None); 3927 if (d->fxState[i].fx == Ifx_Read) 3928 continue; 3929 /* Ignore any sections marked as 'always defined'. */ 3930 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) 3931 continue; 3932 /* This state element is written or modified. So we need to 3933 consider it. If larger than 8 bytes, deal with it in 8-byte 3934 chunks. */ 3935 gSz = d->fxState[i].size; 3936 gOff = d->fxState[i].offset; 3937 tl_assert(gSz > 0); 3938 while (True) { 3939 if (gSz == 0) break; 3940 n = gSz <= 8 ? gSz : 8; 3941 /* Write suitably-casted 'curr' to the state slice 3942 gOff .. gOff+n-1 */ 3943 tyDst = szToITy( n ); 3944 do_shadow_PUT( mce, gOff, 3945 NULL, /* original atom */ 3946 mkPCastTo( mce, tyDst, curr ) ); 3947 gSz -= n; 3948 gOff += n; 3949 } 3950 } 3951 3952 /* Outputs: memory that we write or modify. Same comments about 3953 endianness as above apply. */ 3954 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 3955 toDo = d->mSize; 3956 /* chew off 32-bit chunks */ 3957 while (toDo >= 4) { 3958 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 3959 NULL, /* original data */ 3960 mkPCastTo( mce, Ity_I32, curr ), 3961 NULL/*guard*/ ); 3962 toDo -= 4; 3963 } 3964 /* chew off 16-bit chunks */ 3965 while (toDo >= 2) { 3966 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 3967 NULL, /* original data */ 3968 mkPCastTo( mce, Ity_I16, curr ), 3969 NULL/*guard*/ ); 3970 toDo -= 2; 3971 } 3972 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 3973 } 3974 3975} 3976 3977 3978/* We have an ABI hint telling us that [base .. base+len-1] is to 3979 become undefined ("writable"). Generate code to call a helper to 3980 notify the A/V bit machinery of this fact. 3981 3982 We call 3983 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, 3984 Addr nia ); 3985*/ 3986static 3987void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia ) 3988{ 3989 IRDirty* di; 3990 /* Minor optimisation: if not doing origin tracking, ignore the 3991 supplied nia and pass zero instead. This is on the basis that 3992 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can 3993 almost always generate a shorter instruction to put zero into a 3994 register than any other value. */ 3995 if (MC_(clo_mc_level) < 3) 3996 nia = mkIRExpr_HWord(0); 3997 3998 di = unsafeIRDirty_0_N( 3999 0/*regparms*/, 4000 "MC_(helperc_MAKE_STACK_UNINIT)", 4001 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ), 4002 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia ) 4003 ); 4004 stmt( 'V', mce, IRStmt_Dirty(di) ); 4005} 4006 4007 4008/* ------ Dealing with IRCAS (big and complex) ------ */ 4009 4010/* FWDS */ 4011static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 4012 IRAtom* baseaddr, Int offset ); 4013static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ); 4014static void gen_store_b ( MCEnv* mce, Int szB, 4015 IRAtom* baseaddr, Int offset, IRAtom* dataB, 4016 IRAtom* guard ); 4017 4018static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ); 4019static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ); 4020 4021 4022/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both 4023 IRExpr.Consts, else this asserts. If they are both Consts, it 4024 doesn't do anything. So that just leaves the RdTmp case. 4025 4026 In which case: this assigns the shadow value SHADOW to the IR 4027 shadow temporary associated with ORIG. That is, ORIG, being an 4028 original temporary, will have a shadow temporary associated with 4029 it. However, in the case envisaged here, there will so far have 4030 been no IR emitted to actually write a shadow value into that 4031 temporary. What this routine does is to (emit IR to) copy the 4032 value in SHADOW into said temporary, so that after this call, 4033 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the 4034 value in SHADOW. 4035 4036 Point is to allow callers to compute "by hand" a shadow value for 4037 ORIG, and force it to be associated with ORIG. 4038 4039 How do we know that that shadow associated with ORIG has not so far 4040 been assigned to? Well, we don't per se know that, but supposing 4041 it had. Then this routine would create a second assignment to it, 4042 and later the IR sanity checker would barf. But that never 4043 happens. QED. 4044*/ 4045static void bind_shadow_tmp_to_orig ( UChar how, 4046 MCEnv* mce, 4047 IRAtom* orig, IRAtom* shadow ) 4048{ 4049 tl_assert(isOriginalAtom(mce, orig)); 4050 tl_assert(isShadowAtom(mce, shadow)); 4051 switch (orig->tag) { 4052 case Iex_Const: 4053 tl_assert(shadow->tag == Iex_Const); 4054 break; 4055 case Iex_RdTmp: 4056 tl_assert(shadow->tag == Iex_RdTmp); 4057 if (how == 'V') { 4058 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp), 4059 shadow); 4060 } else { 4061 tl_assert(how == 'B'); 4062 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp), 4063 shadow); 4064 } 4065 break; 4066 default: 4067 tl_assert(0); 4068 } 4069} 4070 4071 4072static 4073void do_shadow_CAS ( MCEnv* mce, IRCAS* cas ) 4074{ 4075 /* Scheme is (both single- and double- cases): 4076 4077 1. fetch data#,dataB (the proposed new value) 4078 4079 2. fetch expd#,expdB (what we expect to see at the address) 4080 4081 3. check definedness of address 4082 4083 4. load old#,oldB from shadow memory; this also checks 4084 addressibility of the address 4085 4086 5. the CAS itself 4087 4088 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below. 4089 4090 7. if "expected == old" (as computed by (6)) 4091 store data#,dataB to shadow memory 4092 4093 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores 4094 'data' but 7 stores 'data#'. Hence it is possible for the 4095 shadow data to be incorrectly checked and/or updated: 4096 4097 * 7 is at least gated correctly, since the 'expected == old' 4098 condition is derived from outputs of 5. However, the shadow 4099 write could happen too late: imagine after 5 we are 4100 descheduled, a different thread runs, writes a different 4101 (shadow) value at the address, and then we resume, hence 4102 overwriting the shadow value written by the other thread. 4103 4104 Because the original memory access is atomic, there's no way to 4105 make both the original and shadow accesses into a single atomic 4106 thing, hence this is unavoidable. 4107 4108 At least as Valgrind stands, I don't think it's a problem, since 4109 we're single threaded *and* we guarantee that there are no 4110 context switches during the execution of any specific superblock 4111 -- context switches can only happen at superblock boundaries. 4112 4113 If Valgrind ever becomes MT in the future, then it might be more 4114 of a problem. A possible kludge would be to artificially 4115 associate with the location, a lock, which we must acquire and 4116 release around the transaction as a whole. Hmm, that probably 4117 would't work properly since it only guards us against other 4118 threads doing CASs on the same location, not against other 4119 threads doing normal reads and writes. 4120 4121 ------------------------------------------------------------ 4122 4123 COMMENT_ON_CasCmpEQ: 4124 4125 Note two things. Firstly, in the sequence above, we compute 4126 "expected == old", but we don't check definedness of it. Why 4127 not? Also, the x86 and amd64 front ends use 4128 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent 4129 determination (expected == old ?) for themselves, and we also 4130 don't check definedness for those primops; we just say that the 4131 result is defined. Why? Details follow. 4132 4133 x86/amd64 contains various forms of locked insns: 4134 * lock prefix before all basic arithmetic insn; 4135 eg lock xorl %reg1,(%reg2) 4136 * atomic exchange reg-mem 4137 * compare-and-swaps 4138 4139 Rather than attempt to represent them all, which would be a 4140 royal PITA, I used a result from Maurice Herlihy 4141 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he 4142 demonstrates that compare-and-swap is a primitive more general 4143 than the other two, and so can be used to represent all of them. 4144 So the translation scheme for (eg) lock incl (%reg) is as 4145 follows: 4146 4147 again: 4148 old = * %reg 4149 new = old + 1 4150 atomically { if (* %reg == old) { * %reg = new } else { goto again } } 4151 4152 The "atomically" is the CAS bit. The scheme is always the same: 4153 get old value from memory, compute new value, atomically stuff 4154 new value back in memory iff the old value has not changed (iow, 4155 no other thread modified it in the meantime). If it has changed 4156 then we've been out-raced and we have to start over. 4157 4158 Now that's all very neat, but it has the bad side effect of 4159 introducing an explicit equality test into the translation. 4160 Consider the behaviour of said code on a memory location which 4161 is uninitialised. We will wind up doing a comparison on 4162 uninitialised data, and mc duly complains. 4163 4164 What's difficult about this is, the common case is that the 4165 location is uncontended, and so we're usually comparing the same 4166 value (* %reg) with itself. So we shouldn't complain even if it 4167 is undefined. But mc doesn't know that. 4168 4169 My solution is to mark the == in the IR specially, so as to tell 4170 mc that it almost certainly compares a value with itself, and we 4171 should just regard the result as always defined. Rather than 4172 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into 4173 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else. 4174 4175 So there's always the question of, can this give a false 4176 negative? eg, imagine that initially, * %reg is defined; and we 4177 read that; but then in the gap between the read and the CAS, a 4178 different thread writes an undefined (and different) value at 4179 the location. Then the CAS in this thread will fail and we will 4180 go back to "again:", but without knowing that the trip back 4181 there was based on an undefined comparison. No matter; at least 4182 the other thread won the race and the location is correctly 4183 marked as undefined. What if it wrote an uninitialised version 4184 of the same value that was there originally, though? 4185 4186 etc etc. Seems like there's a small corner case in which we 4187 might lose the fact that something's defined -- we're out-raced 4188 in between the "old = * reg" and the "atomically {", _and_ the 4189 other thread is writing in an undefined version of what's 4190 already there. Well, that seems pretty unlikely. 4191 4192 --- 4193 4194 If we ever need to reinstate it .. code which generates a 4195 definedness test for "expected == old" was removed at r10432 of 4196 this file. 4197 */ 4198 if (cas->oldHi == IRTemp_INVALID) { 4199 do_shadow_CAS_single( mce, cas ); 4200 } else { 4201 do_shadow_CAS_double( mce, cas ); 4202 } 4203} 4204 4205 4206static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ) 4207{ 4208 IRAtom *vdataLo = NULL, *bdataLo = NULL; 4209 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 4210 IRAtom *voldLo = NULL, *boldLo = NULL; 4211 IRAtom *expd_eq_old = NULL; 4212 IROp opCasCmpEQ; 4213 Int elemSzB; 4214 IRType elemTy; 4215 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 4216 4217 /* single CAS */ 4218 tl_assert(cas->oldHi == IRTemp_INVALID); 4219 tl_assert(cas->expdHi == NULL); 4220 tl_assert(cas->dataHi == NULL); 4221 4222 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 4223 switch (elemTy) { 4224 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break; 4225 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break; 4226 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break; 4227 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break; 4228 default: tl_assert(0); /* IR defn disallows any other types */ 4229 } 4230 4231 /* 1. fetch data# (the proposed new value) */ 4232 tl_assert(isOriginalAtom(mce, cas->dataLo)); 4233 vdataLo 4234 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 4235 tl_assert(isShadowAtom(mce, vdataLo)); 4236 if (otrak) { 4237 bdataLo 4238 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 4239 tl_assert(isShadowAtom(mce, bdataLo)); 4240 } 4241 4242 /* 2. fetch expected# (what we expect to see at the address) */ 4243 tl_assert(isOriginalAtom(mce, cas->expdLo)); 4244 vexpdLo 4245 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 4246 tl_assert(isShadowAtom(mce, vexpdLo)); 4247 if (otrak) { 4248 bexpdLo 4249 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 4250 tl_assert(isShadowAtom(mce, bexpdLo)); 4251 } 4252 4253 /* 3. check definedness of address */ 4254 /* 4. fetch old# from shadow memory; this also checks 4255 addressibility of the address */ 4256 voldLo 4257 = assignNew( 4258 'V', mce, elemTy, 4259 expr2vbits_Load( 4260 mce, 4261 cas->end, elemTy, cas->addr, 0/*Addr bias*/ 4262 )); 4263 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 4264 if (otrak) { 4265 boldLo 4266 = assignNew('B', mce, Ity_I32, 4267 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/)); 4268 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 4269 } 4270 4271 /* 5. the CAS itself */ 4272 stmt( 'C', mce, IRStmt_CAS(cas) ); 4273 4274 /* 6. compute "expected == old" */ 4275 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 4276 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 4277 tree, but it's not copied from the input block. */ 4278 expd_eq_old 4279 = assignNew('C', mce, Ity_I1, 4280 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo))); 4281 4282 /* 7. if "expected == old" 4283 store data# to shadow memory */ 4284 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/, 4285 NULL/*data*/, vdataLo/*vdata*/, 4286 expd_eq_old/*guard for store*/ ); 4287 if (otrak) { 4288 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/, 4289 bdataLo/*bdata*/, 4290 expd_eq_old/*guard for store*/ ); 4291 } 4292} 4293 4294 4295static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ) 4296{ 4297 IRAtom *vdataHi = NULL, *bdataHi = NULL; 4298 IRAtom *vdataLo = NULL, *bdataLo = NULL; 4299 IRAtom *vexpdHi = NULL, *bexpdHi = NULL; 4300 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 4301 IRAtom *voldHi = NULL, *boldHi = NULL; 4302 IRAtom *voldLo = NULL, *boldLo = NULL; 4303 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL; 4304 IRAtom *expd_eq_old = NULL, *zero = NULL; 4305 IROp opCasCmpEQ, opOr, opXor; 4306 Int elemSzB, memOffsLo, memOffsHi; 4307 IRType elemTy; 4308 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 4309 4310 /* double CAS */ 4311 tl_assert(cas->oldHi != IRTemp_INVALID); 4312 tl_assert(cas->expdHi != NULL); 4313 tl_assert(cas->dataHi != NULL); 4314 4315 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 4316 switch (elemTy) { 4317 case Ity_I8: 4318 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8; 4319 elemSzB = 1; zero = mkU8(0); 4320 break; 4321 case Ity_I16: 4322 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16; 4323 elemSzB = 2; zero = mkU16(0); 4324 break; 4325 case Ity_I32: 4326 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32; 4327 elemSzB = 4; zero = mkU32(0); 4328 break; 4329 case Ity_I64: 4330 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64; 4331 elemSzB = 8; zero = mkU64(0); 4332 break; 4333 default: 4334 tl_assert(0); /* IR defn disallows any other types */ 4335 } 4336 4337 /* 1. fetch data# (the proposed new value) */ 4338 tl_assert(isOriginalAtom(mce, cas->dataHi)); 4339 tl_assert(isOriginalAtom(mce, cas->dataLo)); 4340 vdataHi 4341 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi)); 4342 vdataLo 4343 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 4344 tl_assert(isShadowAtom(mce, vdataHi)); 4345 tl_assert(isShadowAtom(mce, vdataLo)); 4346 if (otrak) { 4347 bdataHi 4348 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi)); 4349 bdataLo 4350 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 4351 tl_assert(isShadowAtom(mce, bdataHi)); 4352 tl_assert(isShadowAtom(mce, bdataLo)); 4353 } 4354 4355 /* 2. fetch expected# (what we expect to see at the address) */ 4356 tl_assert(isOriginalAtom(mce, cas->expdHi)); 4357 tl_assert(isOriginalAtom(mce, cas->expdLo)); 4358 vexpdHi 4359 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi)); 4360 vexpdLo 4361 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 4362 tl_assert(isShadowAtom(mce, vexpdHi)); 4363 tl_assert(isShadowAtom(mce, vexpdLo)); 4364 if (otrak) { 4365 bexpdHi 4366 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi)); 4367 bexpdLo 4368 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 4369 tl_assert(isShadowAtom(mce, bexpdHi)); 4370 tl_assert(isShadowAtom(mce, bexpdLo)); 4371 } 4372 4373 /* 3. check definedness of address */ 4374 /* 4. fetch old# from shadow memory; this also checks 4375 addressibility of the address */ 4376 if (cas->end == Iend_LE) { 4377 memOffsLo = 0; 4378 memOffsHi = elemSzB; 4379 } else { 4380 tl_assert(cas->end == Iend_BE); 4381 memOffsLo = elemSzB; 4382 memOffsHi = 0; 4383 } 4384 voldHi 4385 = assignNew( 4386 'V', mce, elemTy, 4387 expr2vbits_Load( 4388 mce, 4389 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/ 4390 )); 4391 voldLo 4392 = assignNew( 4393 'V', mce, elemTy, 4394 expr2vbits_Load( 4395 mce, 4396 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/ 4397 )); 4398 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi); 4399 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 4400 if (otrak) { 4401 boldHi 4402 = assignNew('B', mce, Ity_I32, 4403 gen_load_b(mce, elemSzB, cas->addr, 4404 memOffsHi/*addr bias*/)); 4405 boldLo 4406 = assignNew('B', mce, Ity_I32, 4407 gen_load_b(mce, elemSzB, cas->addr, 4408 memOffsLo/*addr bias*/)); 4409 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi); 4410 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 4411 } 4412 4413 /* 5. the CAS itself */ 4414 stmt( 'C', mce, IRStmt_CAS(cas) ); 4415 4416 /* 6. compute "expected == old" */ 4417 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 4418 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 4419 tree, but it's not copied from the input block. */ 4420 /* 4421 xHi = oldHi ^ expdHi; 4422 xLo = oldLo ^ expdLo; 4423 xHL = xHi | xLo; 4424 expd_eq_old = xHL == 0; 4425 */ 4426 xHi = assignNew('C', mce, elemTy, 4427 binop(opXor, cas->expdHi, mkexpr(cas->oldHi))); 4428 xLo = assignNew('C', mce, elemTy, 4429 binop(opXor, cas->expdLo, mkexpr(cas->oldLo))); 4430 xHL = assignNew('C', mce, elemTy, 4431 binop(opOr, xHi, xLo)); 4432 expd_eq_old 4433 = assignNew('C', mce, Ity_I1, 4434 binop(opCasCmpEQ, xHL, zero)); 4435 4436 /* 7. if "expected == old" 4437 store data# to shadow memory */ 4438 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/, 4439 NULL/*data*/, vdataHi/*vdata*/, 4440 expd_eq_old/*guard for store*/ ); 4441 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/, 4442 NULL/*data*/, vdataLo/*vdata*/, 4443 expd_eq_old/*guard for store*/ ); 4444 if (otrak) { 4445 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/, 4446 bdataHi/*bdata*/, 4447 expd_eq_old/*guard for store*/ ); 4448 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/, 4449 bdataLo/*bdata*/, 4450 expd_eq_old/*guard for store*/ ); 4451 } 4452} 4453 4454 4455/* ------ Dealing with LL/SC (not difficult) ------ */ 4456 4457static void do_shadow_LLSC ( MCEnv* mce, 4458 IREndness stEnd, 4459 IRTemp stResult, 4460 IRExpr* stAddr, 4461 IRExpr* stStoredata ) 4462{ 4463 /* In short: treat a load-linked like a normal load followed by an 4464 assignment of the loaded (shadow) data to the result temporary. 4465 Treat a store-conditional like a normal store, and mark the 4466 result temporary as defined. */ 4467 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult); 4468 IRTemp resTmp = findShadowTmpV(mce, stResult); 4469 4470 tl_assert(isIRAtom(stAddr)); 4471 if (stStoredata) 4472 tl_assert(isIRAtom(stStoredata)); 4473 4474 if (stStoredata == NULL) { 4475 /* Load Linked */ 4476 /* Just treat this as a normal load, followed by an assignment of 4477 the value to .result. */ 4478 /* Stay sane */ 4479 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 4480 || resTy == Ity_I16 || resTy == Ity_I8); 4481 assign( 'V', mce, resTmp, 4482 expr2vbits_Load( 4483 mce, stEnd, resTy, stAddr, 0/*addr bias*/)); 4484 } else { 4485 /* Store Conditional */ 4486 /* Stay sane */ 4487 IRType dataTy = typeOfIRExpr(mce->sb->tyenv, 4488 stStoredata); 4489 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 4490 || dataTy == Ity_I16 || dataTy == Ity_I8); 4491 do_shadow_Store( mce, stEnd, 4492 stAddr, 0/* addr bias */, 4493 stStoredata, 4494 NULL /* shadow data */, 4495 NULL/*guard*/ ); 4496 /* This is a store conditional, so it writes to .result a value 4497 indicating whether or not the store succeeded. Just claim 4498 this value is always defined. In the PowerPC interpretation 4499 of store-conditional, definedness of the success indication 4500 depends on whether the address of the store matches the 4501 reservation address. But we can't tell that here (and 4502 anyway, we're not being PowerPC-specific). At least we are 4503 guaranteed that the definedness of the store address, and its 4504 addressibility, will be checked as per normal. So it seems 4505 pretty safe to just say that the success indication is always 4506 defined. 4507 4508 In schemeS, for origin tracking, we must correspondingly set 4509 a no-origin value for the origin shadow of .result. 4510 */ 4511 tl_assert(resTy == Ity_I1); 4512 assign( 'V', mce, resTmp, definedOfType(resTy) ); 4513 } 4514} 4515 4516 4517/*------------------------------------------------------------*/ 4518/*--- Memcheck main ---*/ 4519/*------------------------------------------------------------*/ 4520 4521static void schemeS ( MCEnv* mce, IRStmt* st ); 4522 4523static Bool isBogusAtom ( IRAtom* at ) 4524{ 4525 ULong n = 0; 4526 IRConst* con; 4527 tl_assert(isIRAtom(at)); 4528 if (at->tag == Iex_RdTmp) 4529 return False; 4530 tl_assert(at->tag == Iex_Const); 4531 con = at->Iex.Const.con; 4532 switch (con->tag) { 4533 case Ico_U1: return False; 4534 case Ico_U8: n = (ULong)con->Ico.U8; break; 4535 case Ico_U16: n = (ULong)con->Ico.U16; break; 4536 case Ico_U32: n = (ULong)con->Ico.U32; break; 4537 case Ico_U64: n = (ULong)con->Ico.U64; break; 4538 case Ico_F64: return False; 4539 case Ico_F64i: return False; 4540 case Ico_V128: return False; 4541 default: ppIRExpr(at); tl_assert(0); 4542 } 4543 /* VG_(printf)("%llx\n", n); */ 4544 return (/*32*/ n == 0xFEFEFEFFULL 4545 /*32*/ || n == 0x80808080ULL 4546 /*32*/ || n == 0x7F7F7F7FULL 4547 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL 4548 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL 4549 /*64*/ || n == 0x0000000000008080ULL 4550 /*64*/ || n == 0x8080808080808080ULL 4551 /*64*/ || n == 0x0101010101010101ULL 4552 ); 4553} 4554 4555static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 4556{ 4557 Int i; 4558 IRExpr* e; 4559 IRDirty* d; 4560 IRCAS* cas; 4561 switch (st->tag) { 4562 case Ist_WrTmp: 4563 e = st->Ist.WrTmp.data; 4564 switch (e->tag) { 4565 case Iex_Get: 4566 case Iex_RdTmp: 4567 return False; 4568 case Iex_Const: 4569 return isBogusAtom(e); 4570 case Iex_Unop: 4571 return isBogusAtom(e->Iex.Unop.arg); 4572 case Iex_GetI: 4573 return isBogusAtom(e->Iex.GetI.ix); 4574 case Iex_Binop: 4575 return isBogusAtom(e->Iex.Binop.arg1) 4576 || isBogusAtom(e->Iex.Binop.arg2); 4577 case Iex_Triop: 4578 return isBogusAtom(e->Iex.Triop.arg1) 4579 || isBogusAtom(e->Iex.Triop.arg2) 4580 || isBogusAtom(e->Iex.Triop.arg3); 4581 case Iex_Qop: 4582 return isBogusAtom(e->Iex.Qop.arg1) 4583 || isBogusAtom(e->Iex.Qop.arg2) 4584 || isBogusAtom(e->Iex.Qop.arg3) 4585 || isBogusAtom(e->Iex.Qop.arg4); 4586 case Iex_Mux0X: 4587 return isBogusAtom(e->Iex.Mux0X.cond) 4588 || isBogusAtom(e->Iex.Mux0X.expr0) 4589 || isBogusAtom(e->Iex.Mux0X.exprX); 4590 case Iex_Load: 4591 return isBogusAtom(e->Iex.Load.addr); 4592 case Iex_CCall: 4593 for (i = 0; e->Iex.CCall.args[i]; i++) 4594 if (isBogusAtom(e->Iex.CCall.args[i])) 4595 return True; 4596 return False; 4597 default: 4598 goto unhandled; 4599 } 4600 case Ist_Dirty: 4601 d = st->Ist.Dirty.details; 4602 for (i = 0; d->args[i]; i++) 4603 if (isBogusAtom(d->args[i])) 4604 return True; 4605 if (d->guard && isBogusAtom(d->guard)) 4606 return True; 4607 if (d->mAddr && isBogusAtom(d->mAddr)) 4608 return True; 4609 return False; 4610 case Ist_Put: 4611 return isBogusAtom(st->Ist.Put.data); 4612 case Ist_PutI: 4613 return isBogusAtom(st->Ist.PutI.ix) 4614 || isBogusAtom(st->Ist.PutI.data); 4615 case Ist_Store: 4616 return isBogusAtom(st->Ist.Store.addr) 4617 || isBogusAtom(st->Ist.Store.data); 4618 case Ist_Exit: 4619 return isBogusAtom(st->Ist.Exit.guard); 4620 case Ist_AbiHint: 4621 return isBogusAtom(st->Ist.AbiHint.base) 4622 || isBogusAtom(st->Ist.AbiHint.nia); 4623 case Ist_NoOp: 4624 case Ist_IMark: 4625 case Ist_MBE: 4626 return False; 4627 case Ist_CAS: 4628 cas = st->Ist.CAS.details; 4629 return isBogusAtom(cas->addr) 4630 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False) 4631 || isBogusAtom(cas->expdLo) 4632 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False) 4633 || isBogusAtom(cas->dataLo); 4634 case Ist_LLSC: 4635 return isBogusAtom(st->Ist.LLSC.addr) 4636 || (st->Ist.LLSC.storedata 4637 ? isBogusAtom(st->Ist.LLSC.storedata) 4638 : False); 4639 default: 4640 unhandled: 4641 ppIRStmt(st); 4642 VG_(tool_panic)("hasBogusLiterals"); 4643 } 4644} 4645 4646 4647IRSB* MC_(instrument) ( VgCallbackClosure* closure, 4648 IRSB* sb_in, 4649 VexGuestLayout* layout, 4650 VexGuestExtents* vge, 4651 IRType gWordTy, IRType hWordTy ) 4652{ 4653 Bool verboze = 0||False; 4654 Bool bogus; 4655 Int i, j, first_stmt; 4656 IRStmt* st; 4657 MCEnv mce; 4658 IRSB* sb_out; 4659 4660 if (gWordTy != hWordTy) { 4661 /* We don't currently support this case. */ 4662 VG_(tool_panic)("host/guest word size mismatch"); 4663 } 4664 4665 /* Check we're not completely nuts */ 4666 tl_assert(sizeof(UWord) == sizeof(void*)); 4667 tl_assert(sizeof(Word) == sizeof(void*)); 4668 tl_assert(sizeof(Addr) == sizeof(void*)); 4669 tl_assert(sizeof(ULong) == 8); 4670 tl_assert(sizeof(Long) == 8); 4671 tl_assert(sizeof(Addr64) == 8); 4672 tl_assert(sizeof(UInt) == 4); 4673 tl_assert(sizeof(Int) == 4); 4674 4675 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3); 4676 4677 /* Set up SB */ 4678 sb_out = deepCopyIRSBExceptStmts(sb_in); 4679 4680 /* Set up the running environment. Both .sb and .tmpMap are 4681 modified as we go along. Note that tmps are added to both 4682 .sb->tyenv and .tmpMap together, so the valid index-set for 4683 those two arrays should always be identical. */ 4684 VG_(memset)(&mce, 0, sizeof(mce)); 4685 mce.sb = sb_out; 4686 mce.trace = verboze; 4687 mce.layout = layout; 4688 mce.hWordTy = hWordTy; 4689 mce.bogusLiterals = False; 4690 4691 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free), 4692 sizeof(TempMapEnt)); 4693 for (i = 0; i < sb_in->tyenv->types_used; i++) { 4694 TempMapEnt ent; 4695 ent.kind = Orig; 4696 ent.shadowV = IRTemp_INVALID; 4697 ent.shadowB = IRTemp_INVALID; 4698 VG_(addToXA)( mce.tmpMap, &ent ); 4699 } 4700 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used ); 4701 4702 /* Make a preliminary inspection of the statements, to see if there 4703 are any dodgy-looking literals. If there are, we generate 4704 extra-detailed (hence extra-expensive) instrumentation in 4705 places. Scan the whole bb even if dodgyness is found earlier, 4706 so that the flatness assertion is applied to all stmts. */ 4707 4708 bogus = False; 4709 4710 for (i = 0; i < sb_in->stmts_used; i++) { 4711 4712 st = sb_in->stmts[i]; 4713 tl_assert(st); 4714 tl_assert(isFlatIRStmt(st)); 4715 4716 if (!bogus) { 4717 bogus = checkForBogusLiterals(st); 4718 if (0 && bogus) { 4719 VG_(printf)("bogus: "); 4720 ppIRStmt(st); 4721 VG_(printf)("\n"); 4722 } 4723 } 4724 4725 } 4726 4727 mce.bogusLiterals = bogus; 4728 4729 /* Copy verbatim any IR preamble preceding the first IMark */ 4730 4731 tl_assert(mce.sb == sb_out); 4732 tl_assert(mce.sb != sb_in); 4733 4734 i = 0; 4735 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) { 4736 4737 st = sb_in->stmts[i]; 4738 tl_assert(st); 4739 tl_assert(isFlatIRStmt(st)); 4740 4741 stmt( 'C', &mce, sb_in->stmts[i] ); 4742 i++; 4743 } 4744 4745 /* Nasty problem. IR optimisation of the pre-instrumented IR may 4746 cause the IR following the preamble to contain references to IR 4747 temporaries defined in the preamble. Because the preamble isn't 4748 instrumented, these temporaries don't have any shadows. 4749 Nevertheless uses of them following the preamble will cause 4750 memcheck to generate references to their shadows. End effect is 4751 to cause IR sanity check failures, due to references to 4752 non-existent shadows. This is only evident for the complex 4753 preambles used for function wrapping on TOC-afflicted platforms 4754 (ppc64-linux, ppc32-aix5, ppc64-aix5). 4755 4756 The following loop therefore scans the preamble looking for 4757 assignments to temporaries. For each one found it creates an 4758 assignment to the corresponding (V) shadow temp, marking it as 4759 'defined'. This is the same resulting IR as if the main 4760 instrumentation loop before had been applied to the statement 4761 'tmp = CONSTANT'. 4762 4763 Similarly, if origin tracking is enabled, we must generate an 4764 assignment for the corresponding origin (B) shadow, claiming 4765 no-origin, as appropriate for a defined value. 4766 */ 4767 for (j = 0; j < i; j++) { 4768 if (sb_in->stmts[j]->tag == Ist_WrTmp) { 4769 /* findShadowTmpV checks its arg is an original tmp; 4770 no need to assert that here. */ 4771 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp; 4772 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o); 4773 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v); 4774 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) ); 4775 if (MC_(clo_mc_level) == 3) { 4776 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o); 4777 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32); 4778 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); 4779 } 4780 if (0) { 4781 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j); 4782 ppIRType( ty_v ); 4783 VG_(printf)("\n"); 4784 } 4785 } 4786 } 4787 4788 /* Iterate over the remaining stmts to generate instrumentation. */ 4789 4790 tl_assert(sb_in->stmts_used > 0); 4791 tl_assert(i >= 0); 4792 tl_assert(i < sb_in->stmts_used); 4793 tl_assert(sb_in->stmts[i]->tag == Ist_IMark); 4794 4795 for (/* use current i*/; i < sb_in->stmts_used; i++) { 4796 4797 st = sb_in->stmts[i]; 4798 first_stmt = sb_out->stmts_used; 4799 4800 if (verboze) { 4801 VG_(printf)("\n"); 4802 ppIRStmt(st); 4803 VG_(printf)("\n"); 4804 } 4805 4806 if (MC_(clo_mc_level) == 3) { 4807 /* See comments on case Ist_CAS below. */ 4808 if (st->tag != Ist_CAS) 4809 schemeS( &mce, st ); 4810 } 4811 4812 /* Generate instrumentation code for each stmt ... */ 4813 4814 switch (st->tag) { 4815 4816 case Ist_WrTmp: 4817 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp), 4818 expr2vbits( &mce, st->Ist.WrTmp.data) ); 4819 break; 4820 4821 case Ist_Put: 4822 do_shadow_PUT( &mce, 4823 st->Ist.Put.offset, 4824 st->Ist.Put.data, 4825 NULL /* shadow atom */ ); 4826 break; 4827 4828 case Ist_PutI: 4829 do_shadow_PUTI( &mce, 4830 st->Ist.PutI.descr, 4831 st->Ist.PutI.ix, 4832 st->Ist.PutI.bias, 4833 st->Ist.PutI.data ); 4834 break; 4835 4836 case Ist_Store: 4837 do_shadow_Store( &mce, st->Ist.Store.end, 4838 st->Ist.Store.addr, 0/* addr bias */, 4839 st->Ist.Store.data, 4840 NULL /* shadow data */, 4841 NULL/*guard*/ ); 4842 break; 4843 4844 case Ist_Exit: 4845 complainIfUndefined( &mce, st->Ist.Exit.guard ); 4846 break; 4847 4848 case Ist_IMark: 4849 break; 4850 4851 case Ist_NoOp: 4852 case Ist_MBE: 4853 break; 4854 4855 case Ist_Dirty: 4856 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 4857 break; 4858 4859 case Ist_AbiHint: 4860 do_AbiHint( &mce, st->Ist.AbiHint.base, 4861 st->Ist.AbiHint.len, 4862 st->Ist.AbiHint.nia ); 4863 break; 4864 4865 case Ist_CAS: 4866 do_shadow_CAS( &mce, st->Ist.CAS.details ); 4867 /* Note, do_shadow_CAS copies the CAS itself to the output 4868 block, because it needs to add instrumentation both 4869 before and after it. Hence skip the copy below. Also 4870 skip the origin-tracking stuff (call to schemeS) above, 4871 since that's all tangled up with it too; do_shadow_CAS 4872 does it all. */ 4873 break; 4874 4875 case Ist_LLSC: 4876 do_shadow_LLSC( &mce, 4877 st->Ist.LLSC.end, 4878 st->Ist.LLSC.result, 4879 st->Ist.LLSC.addr, 4880 st->Ist.LLSC.storedata ); 4881 break; 4882 4883 default: 4884 VG_(printf)("\n"); 4885 ppIRStmt(st); 4886 VG_(printf)("\n"); 4887 VG_(tool_panic)("memcheck: unhandled IRStmt"); 4888 4889 } /* switch (st->tag) */ 4890 4891 if (0 && verboze) { 4892 for (j = first_stmt; j < sb_out->stmts_used; j++) { 4893 VG_(printf)(" "); 4894 ppIRStmt(sb_out->stmts[j]); 4895 VG_(printf)("\n"); 4896 } 4897 VG_(printf)("\n"); 4898 } 4899 4900 /* ... and finally copy the stmt itself to the output. Except, 4901 skip the copy of IRCASs; see comments on case Ist_CAS 4902 above. */ 4903 if (st->tag != Ist_CAS) 4904 stmt('C', &mce, st); 4905 } 4906 4907 /* Now we need to complain if the jump target is undefined. */ 4908 first_stmt = sb_out->stmts_used; 4909 4910 if (verboze) { 4911 VG_(printf)("sb_in->next = "); 4912 ppIRExpr(sb_in->next); 4913 VG_(printf)("\n\n"); 4914 } 4915 4916 complainIfUndefined( &mce, sb_in->next ); 4917 4918 if (0 && verboze) { 4919 for (j = first_stmt; j < sb_out->stmts_used; j++) { 4920 VG_(printf)(" "); 4921 ppIRStmt(sb_out->stmts[j]); 4922 VG_(printf)("\n"); 4923 } 4924 VG_(printf)("\n"); 4925 } 4926 4927 /* If this fails, there's been some serious snafu with tmp management, 4928 that should be investigated. */ 4929 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used ); 4930 VG_(deleteXA)( mce.tmpMap ); 4931 4932 tl_assert(mce.sb == sb_out); 4933 return sb_out; 4934} 4935 4936/*------------------------------------------------------------*/ 4937/*--- Post-tree-build final tidying ---*/ 4938/*------------------------------------------------------------*/ 4939 4940/* This exploits the observation that Memcheck often produces 4941 repeated conditional calls of the form 4942 4943 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag) 4944 4945 with the same guard expression G guarding the same helper call. 4946 The second and subsequent calls are redundant. This usually 4947 results from instrumentation of guest code containing multiple 4948 memory references at different constant offsets from the same base 4949 register. After optimisation of the instrumentation, you get a 4950 test for the definedness of the base register for each memory 4951 reference, which is kinda pointless. MC_(final_tidy) therefore 4952 looks for such repeated calls and removes all but the first. */ 4953 4954/* A struct for recording which (helper, guard) pairs we have already 4955 seen. */ 4956typedef 4957 struct { void* entry; IRExpr* guard; } 4958 Pair; 4959 4960/* Return True if e1 and e2 definitely denote the same value (used to 4961 compare guards). Return False if unknown; False is the safe 4962 answer. Since guest registers and guest memory do not have the 4963 SSA property we must return False if any Gets or Loads appear in 4964 the expression. */ 4965 4966static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) 4967{ 4968 if (e1->tag != e2->tag) 4969 return False; 4970 switch (e1->tag) { 4971 case Iex_Const: 4972 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con ); 4973 case Iex_Binop: 4974 return e1->Iex.Binop.op == e2->Iex.Binop.op 4975 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1) 4976 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2); 4977 case Iex_Unop: 4978 return e1->Iex.Unop.op == e2->Iex.Unop.op 4979 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg); 4980 case Iex_RdTmp: 4981 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp; 4982 case Iex_Mux0X: 4983 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond ) 4984 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 ) 4985 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX ); 4986 case Iex_Qop: 4987 case Iex_Triop: 4988 case Iex_CCall: 4989 /* be lazy. Could define equality for these, but they never 4990 appear to be used. */ 4991 return False; 4992 case Iex_Get: 4993 case Iex_GetI: 4994 case Iex_Load: 4995 /* be conservative - these may not give the same value each 4996 time */ 4997 return False; 4998 case Iex_Binder: 4999 /* should never see this */ 5000 /* fallthrough */ 5001 default: 5002 VG_(printf)("mc_translate.c: sameIRValue: unhandled: "); 5003 ppIRExpr(e1); 5004 VG_(tool_panic)("memcheck:sameIRValue"); 5005 return False; 5006 } 5007} 5008 5009/* See if 'pairs' already has an entry for (entry, guard). Return 5010 True if so. If not, add an entry. */ 5011 5012static 5013Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry ) 5014{ 5015 Pair p; 5016 Pair* pp; 5017 Int i, n = VG_(sizeXA)( pairs ); 5018 for (i = 0; i < n; i++) { 5019 pp = VG_(indexXA)( pairs, i ); 5020 if (pp->entry == entry && sameIRValue(pp->guard, guard)) 5021 return True; 5022 } 5023 p.guard = guard; 5024 p.entry = entry; 5025 VG_(addToXA)( pairs, &p ); 5026 return False; 5027} 5028 5029static Bool is_helperc_value_checkN_fail ( HChar* name ) 5030{ 5031 return 5032 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)") 5033 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)") 5034 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)") 5035 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)") 5036 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)") 5037 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)") 5038 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)") 5039 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)"); 5040} 5041 5042IRSB* MC_(final_tidy) ( IRSB* sb_in ) 5043{ 5044 Int i; 5045 IRStmt* st; 5046 IRDirty* di; 5047 IRExpr* guard; 5048 IRCallee* cee; 5049 Bool alreadyPresent; 5050 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1", 5051 VG_(free), sizeof(Pair) ); 5052 /* Scan forwards through the statements. Each time a call to one 5053 of the relevant helpers is seen, check if we have made a 5054 previous call to the same helper using the same guard 5055 expression, and if so, delete the call. */ 5056 for (i = 0; i < sb_in->stmts_used; i++) { 5057 st = sb_in->stmts[i]; 5058 tl_assert(st); 5059 if (st->tag != Ist_Dirty) 5060 continue; 5061 di = st->Ist.Dirty.details; 5062 guard = di->guard; 5063 if (!guard) 5064 continue; 5065 if (0) { ppIRExpr(guard); VG_(printf)("\n"); } 5066 cee = di->cee; 5067 if (!is_helperc_value_checkN_fail( cee->name )) 5068 continue; 5069 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with 5070 guard 'guard'. Check if we have already seen a call to this 5071 function with the same guard. If so, delete it. If not, 5072 add it to the set of calls we do know about. */ 5073 alreadyPresent = check_or_add( pairs, guard, cee->addr ); 5074 if (alreadyPresent) { 5075 sb_in->stmts[i] = IRStmt_NoOp(); 5076 if (0) VG_(printf)("XX\n"); 5077 } 5078 } 5079 VG_(deleteXA)( pairs ); 5080 return sb_in; 5081} 5082 5083 5084/*------------------------------------------------------------*/ 5085/*--- Origin tracking stuff ---*/ 5086/*------------------------------------------------------------*/ 5087 5088/* Almost identical to findShadowTmpV. */ 5089static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig ) 5090{ 5091 TempMapEnt* ent; 5092 /* VG_(indexXA) range-checks 'orig', hence no need to check 5093 here. */ 5094 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5095 tl_assert(ent->kind == Orig); 5096 if (ent->shadowB == IRTemp_INVALID) { 5097 IRTemp tmpB 5098 = newTemp( mce, Ity_I32, BSh ); 5099 /* newTemp may cause mce->tmpMap to resize, hence previous results 5100 from VG_(indexXA) are invalid. */ 5101 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5102 tl_assert(ent->kind == Orig); 5103 tl_assert(ent->shadowB == IRTemp_INVALID); 5104 ent->shadowB = tmpB; 5105 } 5106 return ent->shadowB; 5107} 5108 5109static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ) 5110{ 5111 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) ); 5112} 5113 5114static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 5115 IRAtom* baseaddr, Int offset ) 5116{ 5117 void* hFun; 5118 HChar* hName; 5119 IRTemp bTmp; 5120 IRDirty* di; 5121 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5122 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5123 IRAtom* ea = baseaddr; 5124 if (offset != 0) { 5125 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5126 : mkU64( (Long)(Int)offset ); 5127 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5128 } 5129 bTmp = newTemp(mce, mce->hWordTy, BSh); 5130 5131 switch (szB) { 5132 case 1: hFun = (void*)&MC_(helperc_b_load1); 5133 hName = "MC_(helperc_b_load1)"; 5134 break; 5135 case 2: hFun = (void*)&MC_(helperc_b_load2); 5136 hName = "MC_(helperc_b_load2)"; 5137 break; 5138 case 4: hFun = (void*)&MC_(helperc_b_load4); 5139 hName = "MC_(helperc_b_load4)"; 5140 break; 5141 case 8: hFun = (void*)&MC_(helperc_b_load8); 5142 hName = "MC_(helperc_b_load8)"; 5143 break; 5144 case 16: hFun = (void*)&MC_(helperc_b_load16); 5145 hName = "MC_(helperc_b_load16)"; 5146 break; 5147 default: 5148 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB); 5149 tl_assert(0); 5150 } 5151 di = unsafeIRDirty_1_N( 5152 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ), 5153 mkIRExprVec_1( ea ) 5154 ); 5155 /* no need to mess with any annotations. This call accesses 5156 neither guest state nor guest memory. */ 5157 stmt( 'B', mce, IRStmt_Dirty(di) ); 5158 if (mce->hWordTy == Ity_I64) { 5159 /* 64-bit host */ 5160 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh); 5161 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) ); 5162 return mkexpr(bTmp32); 5163 } else { 5164 /* 32-bit host */ 5165 return mkexpr(bTmp); 5166 } 5167} 5168 5169/* Generate a shadow store. guard :: Ity_I1 controls whether the 5170 store really happens; NULL means it unconditionally does. */ 5171static void gen_store_b ( MCEnv* mce, Int szB, 5172 IRAtom* baseaddr, Int offset, IRAtom* dataB, 5173 IRAtom* guard ) 5174{ 5175 void* hFun; 5176 HChar* hName; 5177 IRDirty* di; 5178 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5179 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5180 IRAtom* ea = baseaddr; 5181 if (guard) { 5182 tl_assert(isOriginalAtom(mce, guard)); 5183 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 5184 } 5185 if (offset != 0) { 5186 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5187 : mkU64( (Long)(Int)offset ); 5188 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5189 } 5190 if (mce->hWordTy == Ity_I64) 5191 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB)); 5192 5193 switch (szB) { 5194 case 1: hFun = (void*)&MC_(helperc_b_store1); 5195 hName = "MC_(helperc_b_store1)"; 5196 break; 5197 case 2: hFun = (void*)&MC_(helperc_b_store2); 5198 hName = "MC_(helperc_b_store2)"; 5199 break; 5200 case 4: hFun = (void*)&MC_(helperc_b_store4); 5201 hName = "MC_(helperc_b_store4)"; 5202 break; 5203 case 8: hFun = (void*)&MC_(helperc_b_store8); 5204 hName = "MC_(helperc_b_store8)"; 5205 break; 5206 case 16: hFun = (void*)&MC_(helperc_b_store16); 5207 hName = "MC_(helperc_b_store16)"; 5208 break; 5209 default: 5210 tl_assert(0); 5211 } 5212 di = unsafeIRDirty_0_N( 2/*regparms*/, 5213 hName, VG_(fnptr_to_fnentry)( hFun ), 5214 mkIRExprVec_2( ea, dataB ) 5215 ); 5216 /* no need to mess with any annotations. This call accesses 5217 neither guest state nor guest memory. */ 5218 if (guard) di->guard = guard; 5219 stmt( 'B', mce, IRStmt_Dirty(di) ); 5220} 5221 5222static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { 5223 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 5224 if (eTy == Ity_I64) 5225 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) ); 5226 if (eTy == Ity_I32) 5227 return e; 5228 tl_assert(0); 5229} 5230 5231static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) { 5232 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 5233 tl_assert(eTy == Ity_I32); 5234 if (dstTy == Ity_I64) 5235 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) ); 5236 tl_assert(0); 5237} 5238 5239 5240static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) 5241{ 5242 tl_assert(MC_(clo_mc_level) == 3); 5243 5244 switch (e->tag) { 5245 5246 case Iex_GetI: { 5247 IRRegArray* descr_b; 5248 IRAtom *t1, *t2, *t3, *t4; 5249 IRRegArray* descr = e->Iex.GetI.descr; 5250 IRType equivIntTy 5251 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 5252 /* If this array is unshadowable for whatever reason, use the 5253 usual approximation. */ 5254 if (equivIntTy == Ity_INVALID) 5255 return mkU32(0); 5256 tl_assert(sizeofIRType(equivIntTy) >= 4); 5257 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 5258 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 5259 equivIntTy, descr->nElems ); 5260 /* Do a shadow indexed get of the same size, giving t1. Take 5261 the bottom 32 bits of it, giving t2. Compute into t3 the 5262 origin for the index (almost certainly zero, but there's 5263 no harm in being completely general here, since iropt will 5264 remove any useless code), and fold it in, giving a final 5265 value t4. */ 5266 t1 = assignNew( 'B', mce, equivIntTy, 5267 IRExpr_GetI( descr_b, e->Iex.GetI.ix, 5268 e->Iex.GetI.bias )); 5269 t2 = narrowTo32( mce, t1 ); 5270 t3 = schemeE( mce, e->Iex.GetI.ix ); 5271 t4 = gen_maxU32( mce, t2, t3 ); 5272 return t4; 5273 } 5274 case Iex_CCall: { 5275 Int i; 5276 IRAtom* here; 5277 IRExpr** args = e->Iex.CCall.args; 5278 IRAtom* curr = mkU32(0); 5279 for (i = 0; args[i]; i++) { 5280 tl_assert(i < 32); 5281 tl_assert(isOriginalAtom(mce, args[i])); 5282 /* Only take notice of this arg if the callee's 5283 mc-exclusion mask does not say it is to be excluded. */ 5284 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) { 5285 /* the arg is to be excluded from definedness checking. 5286 Do nothing. */ 5287 if (0) VG_(printf)("excluding %s(%d)\n", 5288 e->Iex.CCall.cee->name, i); 5289 } else { 5290 /* calculate the arg's definedness, and pessimistically 5291 merge it in. */ 5292 here = schemeE( mce, args[i] ); 5293 curr = gen_maxU32( mce, curr, here ); 5294 } 5295 } 5296 return curr; 5297 } 5298 case Iex_Load: { 5299 Int dszB; 5300 dszB = sizeofIRType(e->Iex.Load.ty); 5301 /* assert that the B value for the address is already 5302 available (somewhere) */ 5303 tl_assert(isIRAtom(e->Iex.Load.addr)); 5304 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64); 5305 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 ); 5306 } 5307 case Iex_Mux0X: { 5308 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond ); 5309 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 ); 5310 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX ); 5311 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 )); 5312 } 5313 case Iex_Qop: { 5314 IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 ); 5315 IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 ); 5316 IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 ); 5317 IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 ); 5318 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ), 5319 gen_maxU32( mce, b3, b4 ) ); 5320 } 5321 case Iex_Triop: { 5322 IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 ); 5323 IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 ); 5324 IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 ); 5325 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) ); 5326 } 5327 case Iex_Binop: { 5328 switch (e->Iex.Binop.op) { 5329 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 5330 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 5331 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 5332 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 5333 /* Just say these all produce a defined result, 5334 regardless of their arguments. See 5335 COMMENT_ON_CasCmpEQ in this file. */ 5336 return mkU32(0); 5337 default: { 5338 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 ); 5339 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 ); 5340 return gen_maxU32( mce, b1, b2 ); 5341 } 5342 } 5343 tl_assert(0); 5344 /*NOTREACHED*/ 5345 } 5346 case Iex_Unop: { 5347 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg ); 5348 return b1; 5349 } 5350 case Iex_Const: 5351 return mkU32(0); 5352 case Iex_RdTmp: 5353 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp )); 5354 case Iex_Get: { 5355 Int b_offset = MC_(get_otrack_shadow_offset)( 5356 e->Iex.Get.offset, 5357 sizeofIRType(e->Iex.Get.ty) 5358 ); 5359 tl_assert(b_offset >= -1 5360 && b_offset <= mce->layout->total_sizeB -4); 5361 if (b_offset >= 0) { 5362 /* FIXME: this isn't an atom! */ 5363 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB, 5364 Ity_I32 ); 5365 } 5366 return mkU32(0); 5367 } 5368 default: 5369 VG_(printf)("mc_translate.c: schemeE: unhandled: "); 5370 ppIRExpr(e); 5371 VG_(tool_panic)("memcheck:schemeE"); 5372 } 5373} 5374 5375 5376static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) 5377{ 5378 // This is a hacked version of do_shadow_Dirty 5379 Int i, n, toDo, gSz, gOff; 5380 IRAtom *here, *curr; 5381 IRTemp dst; 5382 5383 /* First check the guard. */ 5384 curr = schemeE( mce, d->guard ); 5385 5386 /* Now round up all inputs and maxU32 over them. */ 5387 5388 /* Inputs: unmasked args */ 5389 for (i = 0; d->args[i]; i++) { 5390 if (d->cee->mcx_mask & (1<<i)) { 5391 /* ignore this arg */ 5392 } else { 5393 here = schemeE( mce, d->args[i] ); 5394 curr = gen_maxU32( mce, curr, here ); 5395 } 5396 } 5397 5398 /* Inputs: guest state that we read. */ 5399 for (i = 0; i < d->nFxState; i++) { 5400 tl_assert(d->fxState[i].fx != Ifx_None); 5401 if (d->fxState[i].fx == Ifx_Write) 5402 continue; 5403 5404 /* Ignore any sections marked as 'always defined'. */ 5405 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) { 5406 if (0) 5407 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 5408 d->fxState[i].offset, d->fxState[i].size ); 5409 continue; 5410 } 5411 5412 /* This state element is read or modified. So we need to 5413 consider it. If larger than 4 bytes, deal with it in 4-byte 5414 chunks. */ 5415 gSz = d->fxState[i].size; 5416 gOff = d->fxState[i].offset; 5417 tl_assert(gSz > 0); 5418 while (True) { 5419 Int b_offset; 5420 if (gSz == 0) break; 5421 n = gSz <= 4 ? gSz : 4; 5422 /* update 'curr' with maxU32 of the state slice 5423 gOff .. gOff+n-1 */ 5424 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 5425 if (b_offset != -1) { 5426 here = assignNew( 'B',mce, 5427 Ity_I32, 5428 IRExpr_Get(b_offset + 2*mce->layout->total_sizeB, 5429 Ity_I32)); 5430 curr = gen_maxU32( mce, curr, here ); 5431 } 5432 gSz -= n; 5433 gOff += n; 5434 } 5435 5436 } 5437 5438 /* Inputs: memory */ 5439 5440 if (d->mFx != Ifx_None) { 5441 /* Because we may do multiple shadow loads/stores from the same 5442 base address, it's best to do a single test of its 5443 definedness right now. Post-instrumentation optimisation 5444 should remove all but this test. */ 5445 tl_assert(d->mAddr); 5446 here = schemeE( mce, d->mAddr ); 5447 curr = gen_maxU32( mce, curr, here ); 5448 } 5449 5450 /* Deal with memory inputs (reads or modifies) */ 5451 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 5452 toDo = d->mSize; 5453 /* chew off 32-bit chunks. We don't care about the endianness 5454 since it's all going to be condensed down to a single bit, 5455 but nevertheless choose an endianness which is hopefully 5456 native to the platform. */ 5457 while (toDo >= 4) { 5458 here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo ); 5459 curr = gen_maxU32( mce, curr, here ); 5460 toDo -= 4; 5461 } 5462 /* handle possible 16-bit excess */ 5463 while (toDo >= 2) { 5464 here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo ); 5465 curr = gen_maxU32( mce, curr, here ); 5466 toDo -= 2; 5467 } 5468 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 5469 } 5470 5471 /* Whew! So curr is a 32-bit B-value which should give an origin 5472 of some use if any of the inputs to the helper are undefined. 5473 Now we need to re-distribute the results to all destinations. */ 5474 5475 /* Outputs: the destination temporary, if there is one. */ 5476 if (d->tmp != IRTemp_INVALID) { 5477 dst = findShadowTmpB(mce, d->tmp); 5478 assign( 'V', mce, dst, curr ); 5479 } 5480 5481 /* Outputs: guest state that we write or modify. */ 5482 for (i = 0; i < d->nFxState; i++) { 5483 tl_assert(d->fxState[i].fx != Ifx_None); 5484 if (d->fxState[i].fx == Ifx_Read) 5485 continue; 5486 5487 /* Ignore any sections marked as 'always defined'. */ 5488 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) 5489 continue; 5490 5491 /* This state element is written or modified. So we need to 5492 consider it. If larger than 4 bytes, deal with it in 4-byte 5493 chunks. */ 5494 gSz = d->fxState[i].size; 5495 gOff = d->fxState[i].offset; 5496 tl_assert(gSz > 0); 5497 while (True) { 5498 Int b_offset; 5499 if (gSz == 0) break; 5500 n = gSz <= 4 ? gSz : 4; 5501 /* Write 'curr' to the state slice gOff .. gOff+n-1 */ 5502 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 5503 if (b_offset != -1) { 5504 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 5505 curr )); 5506 } 5507 gSz -= n; 5508 gOff += n; 5509 } 5510 } 5511 5512 /* Outputs: memory that we write or modify. Same comments about 5513 endianness as above apply. */ 5514 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 5515 toDo = d->mSize; 5516 /* chew off 32-bit chunks */ 5517 while (toDo >= 4) { 5518 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr, 5519 NULL/*guard*/ ); 5520 toDo -= 4; 5521 } 5522 /* handle possible 16-bit excess */ 5523 while (toDo >= 2) { 5524 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr, 5525 NULL/*guard*/ ); 5526 toDo -= 2; 5527 } 5528 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 5529 } 5530} 5531 5532 5533static void do_origins_Store ( MCEnv* mce, 5534 IREndness stEnd, 5535 IRExpr* stAddr, 5536 IRExpr* stData ) 5537{ 5538 Int dszB; 5539 IRAtom* dataB; 5540 /* assert that the B value for the address is already available 5541 (somewhere), since the call to schemeE will want to see it. 5542 XXXX how does this actually ensure that?? */ 5543 tl_assert(isIRAtom(stAddr)); 5544 tl_assert(isIRAtom(stData)); 5545 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) ); 5546 dataB = schemeE( mce, stData ); 5547 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, 5548 NULL/*guard*/ ); 5549} 5550 5551 5552static void schemeS ( MCEnv* mce, IRStmt* st ) 5553{ 5554 tl_assert(MC_(clo_mc_level) == 3); 5555 5556 switch (st->tag) { 5557 5558 case Ist_AbiHint: 5559 /* The value-check instrumenter handles this - by arranging 5560 to pass the address of the next instruction to 5561 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to 5562 happen for origin tracking w.r.t. AbiHints. So there is 5563 nothing to do here. */ 5564 break; 5565 5566 case Ist_PutI: { 5567 IRRegArray* descr_b; 5568 IRAtom *t1, *t2, *t3, *t4; 5569 IRRegArray* descr = st->Ist.PutI.descr; 5570 IRType equivIntTy 5571 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 5572 /* If this array is unshadowable for whatever reason, 5573 generate no code. */ 5574 if (equivIntTy == Ity_INVALID) 5575 break; 5576 tl_assert(sizeofIRType(equivIntTy) >= 4); 5577 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 5578 descr_b 5579 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 5580 equivIntTy, descr->nElems ); 5581 /* Compute a value to Put - the conjoinment of the origin for 5582 the data to be Put-ted (obviously) and of the index value 5583 (not so obviously). */ 5584 t1 = schemeE( mce, st->Ist.PutI.data ); 5585 t2 = schemeE( mce, st->Ist.PutI.ix ); 5586 t3 = gen_maxU32( mce, t1, t2 ); 5587 t4 = zWidenFrom32( mce, equivIntTy, t3 ); 5588 stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix, 5589 st->Ist.PutI.bias, t4 )); 5590 break; 5591 } 5592 5593 case Ist_Dirty: 5594 do_origins_Dirty( mce, st->Ist.Dirty.details ); 5595 break; 5596 5597 case Ist_Store: 5598 do_origins_Store( mce, st->Ist.Store.end, 5599 st->Ist.Store.addr, 5600 st->Ist.Store.data ); 5601 break; 5602 5603 case Ist_LLSC: { 5604 /* In short: treat a load-linked like a normal load followed 5605 by an assignment of the loaded (shadow) data the result 5606 temporary. Treat a store-conditional like a normal store, 5607 and mark the result temporary as defined. */ 5608 if (st->Ist.LLSC.storedata == NULL) { 5609 /* Load Linked */ 5610 IRType resTy 5611 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); 5612 IRExpr* vanillaLoad 5613 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); 5614 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 5615 || resTy == Ity_I16 || resTy == Ity_I8); 5616 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 5617 schemeE(mce, vanillaLoad)); 5618 } else { 5619 /* Store conditional */ 5620 do_origins_Store( mce, st->Ist.LLSC.end, 5621 st->Ist.LLSC.addr, 5622 st->Ist.LLSC.storedata ); 5623 /* For the rationale behind this, see comments at the 5624 place where the V-shadow for .result is constructed, in 5625 do_shadow_LLSC. In short, we regard .result as 5626 always-defined. */ 5627 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 5628 mkU32(0) ); 5629 } 5630 break; 5631 } 5632 5633 case Ist_Put: { 5634 Int b_offset 5635 = MC_(get_otrack_shadow_offset)( 5636 st->Ist.Put.offset, 5637 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data)) 5638 ); 5639 if (b_offset >= 0) { 5640 /* FIXME: this isn't an atom! */ 5641 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 5642 schemeE( mce, st->Ist.Put.data )) ); 5643 } 5644 break; 5645 } 5646 5647 case Ist_WrTmp: 5648 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp), 5649 schemeE(mce, st->Ist.WrTmp.data) ); 5650 break; 5651 5652 case Ist_MBE: 5653 case Ist_NoOp: 5654 case Ist_Exit: 5655 case Ist_IMark: 5656 break; 5657 5658 default: 5659 VG_(printf)("mc_translate.c: schemeS: unhandled: "); 5660 ppIRStmt(st); 5661 VG_(tool_panic)("memcheck:schemeS"); 5662 } 5663} 5664 5665 5666/*--------------------------------------------------------------------*/ 5667/*--- end mc_translate.c ---*/ 5668/*--------------------------------------------------------------------*/ 5669