1/* -*- mode: C; c-basic-offset: 3; -*- */ 2 3/*---------------------------------------------------------------*/ 4/*--- begin ir_opt.c ---*/ 5/*---------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2004-2017 OpenWorks LLP 12 info@open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35*/ 36 37#include "libvex_basictypes.h" 38#include "libvex_ir.h" 39#include "libvex.h" 40 41#include "main_util.h" 42#include "main_globals.h" 43#include "ir_opt.h" 44 45 46/* Set to 1 for lots of debugging output. */ 47#define DEBUG_IROPT 0 48 49/* Set to 1 to gather some statistics. Currently only for sameIRExprs. */ 50#define STATS_IROPT 0 51 52 53/* What iropt does, 29 Dec 04. 54 55 It takes an IRSB and produces a new one with the same meaning, 56 defined thus: 57 58 After execution of the new BB, all guest state and guest memory is 59 the same as after execution of the original. This is true 60 regardless of how the block was exited (at the end vs side exit). 61 62 In addition, parts of the guest state will be identical to that 63 created by execution of the original at the following observation 64 points: 65 66 * In a dirty helper call, any parts of the guest state that the 67 helper states that it reads or modifies will be up to date. 68 Also, guest memory will be up to date. Parts of the guest state 69 not marked as being read or modified by the helper cannot be 70 assumed to be up-to-date at the point where the helper is called. 71 72 * If iropt_register_updates == VexRegUpdSpAtMemAccess : 73 The guest state is only up to date only as explained above 74 (i.e. at SB exits and as specified by dirty helper call). 75 Also, the stack pointer register is up to date at memory 76 exception points (as this is needed for the stack extension 77 logic in m_signals.c). 78 79 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess : 80 Immediately prior to any load or store, those parts of the guest 81 state marked as requiring precise exceptions will be up to date. 82 Also, guest memory will be up to date. Parts of the guest state 83 not marked as requiring precise exceptions cannot be assumed to 84 be up-to-date at the point of the load/store. 85 86 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess: 87 Same as minimal, but all the guest state is up to date at memory 88 exception points. 89 90 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn : 91 Guest state is up to date at each instruction. 92 93 The relative order of loads and stores (including loads/stores of 94 guest memory done by dirty helpers annotated as such) is not 95 changed. However, the relative order of loads with no intervening 96 stores/modifies may be changed. 97 98 Transformation order 99 ~~~~~~~~~~~~~~~~~~~~ 100 101 There are three levels of optimisation, controlled by 102 vex_control.iropt_level. Define first: 103 104 "Cheap transformations" are the following sequence: 105 * Redundant-Get removal 106 * Redundant-Put removal 107 * Constant propagation/folding 108 * Dead code removal 109 * Specialisation of clean helper functions 110 * Dead code removal 111 112 "Expensive transformations" are the following sequence: 113 * CSE 114 * Folding of add/sub chains 115 * Redundant-GetI removal 116 * Redundant-PutI removal 117 * Dead code removal 118 119 Then the transformations are as follows, as defined by 120 vex_control.iropt_level: 121 122 Level 0: 123 * Flatten into atomic form. 124 125 Level 1: the following sequence: 126 * Flatten into atomic form. 127 * Cheap transformations. 128 129 Level 2: the following sequence 130 * Flatten into atomic form. 131 * Cheap transformations. 132 * If block contains any floating or vector types, CSE. 133 * If block contains GetI or PutI, Expensive transformations. 134 * Try unrolling loops. Three possible outcomes: 135 - No effect: do nothing more. 136 - Unrolled a loop, and block does not contain GetI or PutI: 137 Do: * CSE 138 * Dead code removal 139 - Unrolled a loop, and block contains GetI or PutI: 140 Do: * Expensive transformations 141 * Cheap transformations 142*/ 143 144/* Implementation notes, 29 Dec 04. 145 146 TODO (important): I think rPutI removal ignores precise exceptions 147 and is therefore in a sense, wrong. In the sense that PutIs are 148 assumed not to write parts of the guest state that we need to have 149 up-to-date at loads/stores. So far on x86 guest that has not 150 mattered since indeed only the x87 FP registers and tags are 151 accessed using GetI/PutI, and there is no need so far for them to 152 be up to date at mem exception points. The rPutI pass should be 153 fixed. 154 155 TODO: improve pessimistic handling of precise exceptions 156 in the tree builder. 157 158 TODO: check interaction of rGetI and dirty helpers. 159 160 F64i constants are treated differently from other constants. 161 They are not regarded as atoms, and instead lifted off and 162 bound to temps. This allows them to participate in CSE, which 163 is important for getting good performance for x86 guest code. 164 165 CSE up F64 literals (already doing F64is) 166 167 CSE: consider carefully the requirement for precise exns 168 prior to making CSE any more aggressive. */ 169 170 171/*---------------------------------------------------------------*/ 172/*--- Finite mappery, of a sort ---*/ 173/*---------------------------------------------------------------*/ 174 175/* General map from HWord-sized thing HWord-sized thing. Could be by 176 hashing, but it's not clear whether or not this would really be any 177 faster. */ 178 179typedef 180 struct { 181 Bool* inuse; 182 HWord* key; 183 HWord* val; 184 Int size; 185 Int used; 186 } 187 HashHW; 188 189static HashHW* newHHW ( void ) 190{ 191 HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW)); 192 h->size = 8; 193 h->used = 0; 194 h->inuse = LibVEX_Alloc_inline(h->size * sizeof(Bool)); 195 h->key = LibVEX_Alloc_inline(h->size * sizeof(HWord)); 196 h->val = LibVEX_Alloc_inline(h->size * sizeof(HWord)); 197 return h; 198} 199 200 201/* Look up key in the map. */ 202 203static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key ) 204{ 205 Int i; 206 /* vex_printf("lookupHHW(%llx)\n", key ); */ 207 for (i = 0; i < h->used; i++) { 208 if (h->inuse[i] && h->key[i] == key) { 209 if (val) 210 *val = h->val[i]; 211 return True; 212 } 213 } 214 return False; 215} 216 217 218/* Add key->val to the map. Replaces any existing binding for key. */ 219 220static void addToHHW ( HashHW* h, HWord key, HWord val ) 221{ 222 Int i, j; 223 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */ 224 225 /* Find and replace existing binding, if any. */ 226 for (i = 0; i < h->used; i++) { 227 if (h->inuse[i] && h->key[i] == key) { 228 h->val[i] = val; 229 return; 230 } 231 } 232 233 /* Ensure a space is available. */ 234 if (h->used == h->size) { 235 /* Copy into arrays twice the size. */ 236 Bool* inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool)); 237 HWord* key2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord)); 238 HWord* val2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord)); 239 for (i = j = 0; i < h->size; i++) { 240 if (!h->inuse[i]) continue; 241 inuse2[j] = True; 242 key2[j] = h->key[i]; 243 val2[j] = h->val[i]; 244 j++; 245 } 246 h->used = j; 247 h->size *= 2; 248 h->inuse = inuse2; 249 h->key = key2; 250 h->val = val2; 251 } 252 253 /* Finally, add it. */ 254 vassert(h->used < h->size); 255 h->inuse[h->used] = True; 256 h->key[h->used] = key; 257 h->val[h->used] = val; 258 h->used++; 259} 260 261 262/*---------------------------------------------------------------*/ 263/*--- Flattening out a BB into atomic SSA form ---*/ 264/*---------------------------------------------------------------*/ 265 266/* Non-critical helper, heuristic for reducing the number of tmp-tmp 267 copies made by flattening. If in doubt return False. */ 268 269static Bool isFlat ( IRExpr* e ) 270{ 271 if (e->tag == Iex_Get) 272 return True; 273 if (e->tag == Iex_Binop) 274 return toBool( isIRAtom(e->Iex.Binop.arg1) 275 && isIRAtom(e->Iex.Binop.arg2) ); 276 if (e->tag == Iex_Load) 277 return isIRAtom(e->Iex.Load.addr); 278 return False; 279} 280 281/* Flatten out 'ex' so it is atomic, returning a new expression with 282 the same value, after having appended extra IRTemp assignments to 283 the end of 'bb'. */ 284 285static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex ) 286{ 287 Int i; 288 IRExpr** newargs; 289 IRType ty = typeOfIRExpr(bb->tyenv, ex); 290 IRTemp t1; 291 292 switch (ex->tag) { 293 294 case Iex_GetI: 295 t1 = newIRTemp(bb->tyenv, ty); 296 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 297 IRExpr_GetI(ex->Iex.GetI.descr, 298 flatten_Expr(bb, ex->Iex.GetI.ix), 299 ex->Iex.GetI.bias))); 300 return IRExpr_RdTmp(t1); 301 302 case Iex_Get: 303 t1 = newIRTemp(bb->tyenv, ty); 304 addStmtToIRSB(bb, 305 IRStmt_WrTmp(t1, ex)); 306 return IRExpr_RdTmp(t1); 307 308 case Iex_Qop: { 309 IRQop* qop = ex->Iex.Qop.details; 310 t1 = newIRTemp(bb->tyenv, ty); 311 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 312 IRExpr_Qop(qop->op, 313 flatten_Expr(bb, qop->arg1), 314 flatten_Expr(bb, qop->arg2), 315 flatten_Expr(bb, qop->arg3), 316 flatten_Expr(bb, qop->arg4)))); 317 return IRExpr_RdTmp(t1); 318 } 319 320 case Iex_Triop: { 321 IRTriop* triop = ex->Iex.Triop.details; 322 t1 = newIRTemp(bb->tyenv, ty); 323 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 324 IRExpr_Triop(triop->op, 325 flatten_Expr(bb, triop->arg1), 326 flatten_Expr(bb, triop->arg2), 327 flatten_Expr(bb, triop->arg3)))); 328 return IRExpr_RdTmp(t1); 329 } 330 331 case Iex_Binop: 332 t1 = newIRTemp(bb->tyenv, ty); 333 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 334 IRExpr_Binop(ex->Iex.Binop.op, 335 flatten_Expr(bb, ex->Iex.Binop.arg1), 336 flatten_Expr(bb, ex->Iex.Binop.arg2)))); 337 return IRExpr_RdTmp(t1); 338 339 case Iex_Unop: 340 t1 = newIRTemp(bb->tyenv, ty); 341 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 342 IRExpr_Unop(ex->Iex.Unop.op, 343 flatten_Expr(bb, ex->Iex.Unop.arg)))); 344 return IRExpr_RdTmp(t1); 345 346 case Iex_Load: 347 t1 = newIRTemp(bb->tyenv, ty); 348 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 349 IRExpr_Load(ex->Iex.Load.end, 350 ex->Iex.Load.ty, 351 flatten_Expr(bb, ex->Iex.Load.addr)))); 352 return IRExpr_RdTmp(t1); 353 354 case Iex_CCall: 355 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args); 356 for (i = 0; newargs[i]; i++) 357 newargs[i] = flatten_Expr(bb, newargs[i]); 358 t1 = newIRTemp(bb->tyenv, ty); 359 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 360 IRExpr_CCall(ex->Iex.CCall.cee, 361 ex->Iex.CCall.retty, 362 newargs))); 363 return IRExpr_RdTmp(t1); 364 365 case Iex_ITE: 366 t1 = newIRTemp(bb->tyenv, ty); 367 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 368 IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond), 369 flatten_Expr(bb, ex->Iex.ITE.iftrue), 370 flatten_Expr(bb, ex->Iex.ITE.iffalse)))); 371 return IRExpr_RdTmp(t1); 372 373 case Iex_Const: 374 /* Lift F64i constants out onto temps so they can be CSEd 375 later. */ 376 if (ex->Iex.Const.con->tag == Ico_F64i) { 377 t1 = newIRTemp(bb->tyenv, ty); 378 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 379 IRExpr_Const(ex->Iex.Const.con))); 380 return IRExpr_RdTmp(t1); 381 } else { 382 /* Leave all other constants alone. */ 383 return ex; 384 } 385 386 case Iex_RdTmp: 387 return ex; 388 389 default: 390 vex_printf("\n"); 391 ppIRExpr(ex); 392 vex_printf("\n"); 393 vpanic("flatten_Expr"); 394 } 395} 396 397 398/* Append a completely flattened form of 'st' to the end of 'bb'. */ 399 400static void flatten_Stmt ( IRSB* bb, IRStmt* st ) 401{ 402 Int i; 403 IRExpr *e1, *e2, *e3, *e4, *e5; 404 IRDirty *d, *d2; 405 IRCAS *cas, *cas2; 406 IRPutI *puti, *puti2; 407 IRLoadG *lg; 408 IRStoreG *sg; 409 switch (st->tag) { 410 case Ist_Put: 411 if (isIRAtom(st->Ist.Put.data)) { 412 /* optimisation to reduce the amount of heap wasted 413 by the flattener */ 414 addStmtToIRSB(bb, st); 415 } else { 416 /* general case, always correct */ 417 e1 = flatten_Expr(bb, st->Ist.Put.data); 418 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1)); 419 } 420 break; 421 case Ist_PutI: 422 puti = st->Ist.PutI.details; 423 e1 = flatten_Expr(bb, puti->ix); 424 e2 = flatten_Expr(bb, puti->data); 425 puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2); 426 addStmtToIRSB(bb, IRStmt_PutI(puti2)); 427 break; 428 case Ist_WrTmp: 429 if (isFlat(st->Ist.WrTmp.data)) { 430 /* optimisation, to reduce the number of tmp-tmp 431 copies generated */ 432 addStmtToIRSB(bb, st); 433 } else { 434 /* general case, always correct */ 435 e1 = flatten_Expr(bb, st->Ist.WrTmp.data); 436 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1)); 437 } 438 break; 439 case Ist_Store: 440 e1 = flatten_Expr(bb, st->Ist.Store.addr); 441 e2 = flatten_Expr(bb, st->Ist.Store.data); 442 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2)); 443 break; 444 case Ist_StoreG: 445 sg = st->Ist.StoreG.details; 446 e1 = flatten_Expr(bb, sg->addr); 447 e2 = flatten_Expr(bb, sg->data); 448 e3 = flatten_Expr(bb, sg->guard); 449 addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3)); 450 break; 451 case Ist_LoadG: 452 lg = st->Ist.LoadG.details; 453 e1 = flatten_Expr(bb, lg->addr); 454 e2 = flatten_Expr(bb, lg->alt); 455 e3 = flatten_Expr(bb, lg->guard); 456 addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst, 457 e1, e2, e3)); 458 break; 459 case Ist_CAS: 460 cas = st->Ist.CAS.details; 461 e1 = flatten_Expr(bb, cas->addr); 462 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL; 463 e3 = flatten_Expr(bb, cas->expdLo); 464 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL; 465 e5 = flatten_Expr(bb, cas->dataLo); 466 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end, 467 e1, e2, e3, e4, e5 ); 468 addStmtToIRSB(bb, IRStmt_CAS(cas2)); 469 break; 470 case Ist_LLSC: 471 e1 = flatten_Expr(bb, st->Ist.LLSC.addr); 472 e2 = st->Ist.LLSC.storedata 473 ? flatten_Expr(bb, st->Ist.LLSC.storedata) 474 : NULL; 475 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end, 476 st->Ist.LLSC.result, e1, e2)); 477 break; 478 case Ist_Dirty: 479 d = st->Ist.Dirty.details; 480 d2 = emptyIRDirty(); 481 *d2 = *d; 482 d2->args = shallowCopyIRExprVec(d2->args); 483 if (d2->mFx != Ifx_None) { 484 d2->mAddr = flatten_Expr(bb, d2->mAddr); 485 } else { 486 vassert(d2->mAddr == NULL); 487 } 488 d2->guard = flatten_Expr(bb, d2->guard); 489 for (i = 0; d2->args[i]; i++) { 490 IRExpr* arg = d2->args[i]; 491 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) 492 d2->args[i] = flatten_Expr(bb, arg); 493 } 494 addStmtToIRSB(bb, IRStmt_Dirty(d2)); 495 break; 496 case Ist_NoOp: 497 case Ist_MBE: 498 case Ist_IMark: 499 addStmtToIRSB(bb, st); 500 break; 501 case Ist_AbiHint: 502 e1 = flatten_Expr(bb, st->Ist.AbiHint.base); 503 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia); 504 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2)); 505 break; 506 case Ist_Exit: 507 e1 = flatten_Expr(bb, st->Ist.Exit.guard); 508 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk, 509 st->Ist.Exit.dst, 510 st->Ist.Exit.offsIP)); 511 break; 512 default: 513 vex_printf("\n"); 514 ppIRStmt(st); 515 vex_printf("\n"); 516 vpanic("flatten_Stmt"); 517 } 518} 519 520 521static IRSB* flatten_BB ( IRSB* in ) 522{ 523 Int i; 524 IRSB* out; 525 out = emptyIRSB(); 526 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 527 for (i = 0; i < in->stmts_used; i++) 528 if (in->stmts[i]) 529 flatten_Stmt( out, in->stmts[i] ); 530 out->next = flatten_Expr( out, in->next ); 531 out->jumpkind = in->jumpkind; 532 out->offsIP = in->offsIP; 533 return out; 534} 535 536 537/*---------------------------------------------------------------*/ 538/*--- In-place removal of redundant GETs ---*/ 539/*---------------------------------------------------------------*/ 540 541/* Scan forwards, building up an environment binding (min offset, max 542 offset) pairs to values, which will either be temps or constants. 543 544 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the 545 env and if it matches, replace the Get with the stored value. If 546 there is no match, add a (minoff,maxoff) :-> t binding. 547 548 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env 549 any binding which fully or partially overlaps with (minoff,maxoff). 550 Then add a new (minoff,maxoff) :-> t or c binding. */ 551 552/* Extract the min/max offsets from a guest state array descriptor. */ 553 554inline 555static void getArrayBounds ( IRRegArray* descr, 556 UInt* minoff, UInt* maxoff ) 557{ 558 *minoff = descr->base; 559 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1; 560 vassert((*minoff & ~0xFFFF) == 0); 561 vassert((*maxoff & ~0xFFFF) == 0); 562 vassert(*minoff <= *maxoff); 563} 564 565/* Create keys, of the form ((minoffset << 16) | maxoffset). */ 566 567static UInt mk_key_GetPut ( Int offset, IRType ty ) 568{ 569 /* offset should fit in 16 bits. */ 570 UInt minoff = offset; 571 UInt maxoff = minoff + sizeofIRType(ty) - 1; 572 vassert((minoff & ~0xFFFF) == 0); 573 vassert((maxoff & ~0xFFFF) == 0); 574 return (minoff << 16) | maxoff; 575} 576 577static UInt mk_key_GetIPutI ( IRRegArray* descr ) 578{ 579 UInt minoff, maxoff; 580 getArrayBounds( descr, &minoff, &maxoff ); 581 vassert((minoff & ~0xFFFF) == 0); 582 vassert((maxoff & ~0xFFFF) == 0); 583 return (minoff << 16) | maxoff; 584} 585 586/* Supposing h has keys of the form generated by mk_key_GetPut and 587 mk_key_GetIPutI, invalidate any key which overlaps (k_lo 588 .. k_hi). 589*/ 590static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi ) 591{ 592 Int j; 593 UInt e_lo, e_hi; 594 vassert(k_lo <= k_hi); 595 /* invalidate any env entries which in any way overlap (k_lo 596 .. k_hi) */ 597 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */ 598 599 for (j = 0; j < h->used; j++) { 600 if (!h->inuse[j]) 601 continue; 602 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF; 603 e_hi = ((UInt)h->key[j]) & 0xFFFF; 604 vassert(e_lo <= e_hi); 605 if (e_hi < k_lo || k_hi < e_lo) 606 continue; /* no overlap possible */ 607 else 608 /* overlap; invalidate */ 609 h->inuse[j] = False; 610 } 611} 612 613 614static void redundant_get_removal_BB ( IRSB* bb ) 615{ 616 HashHW* env = newHHW(); 617 UInt key = 0; /* keep gcc -O happy */ 618 Int i, j; 619 HWord val; 620 621 for (i = 0; i < bb->stmts_used; i++) { 622 IRStmt* st = bb->stmts[i]; 623 624 if (st->tag == Ist_NoOp) 625 continue; 626 627 /* Deal with Gets */ 628 if (st->tag == Ist_WrTmp 629 && st->Ist.WrTmp.data->tag == Iex_Get) { 630 /* st is 't = Get(...)'. Look up in the environment and see 631 if the Get can be replaced. */ 632 IRExpr* get = st->Ist.WrTmp.data; 633 key = (HWord)mk_key_GetPut( get->Iex.Get.offset, 634 get->Iex.Get.ty ); 635 if (lookupHHW(env, &val, (HWord)key)) { 636 /* found it */ 637 /* Note, we could do better here. If the types are 638 different we don't do the substitution, since doing so 639 could lead to invalidly-typed IR. An improvement would 640 be to stick in a reinterpret-style cast, although that 641 would make maintaining flatness more difficult. */ 642 IRExpr* valE = (IRExpr*)val; 643 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE) 644 == st->Ist.WrTmp.data->Iex.Get.ty ); 645 if (typesOK && DEBUG_IROPT) { 646 vex_printf("rGET: "); ppIRExpr(get); 647 vex_printf(" -> "); ppIRExpr(valE); 648 vex_printf("\n"); 649 } 650 if (typesOK) 651 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE); 652 } else { 653 /* Not found, but at least we know that t and the Get(...) 654 are now associated. So add a binding to reflect that 655 fact. */ 656 addToHHW( env, (HWord)key, 657 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) ); 658 } 659 } 660 661 /* Deal with Puts: invalidate any env entries overlapped by this 662 Put */ 663 if (st->tag == Ist_Put || st->tag == Ist_PutI) { 664 UInt k_lo, k_hi; 665 if (st->tag == Ist_Put) { 666 key = mk_key_GetPut( st->Ist.Put.offset, 667 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 668 } else { 669 vassert(st->tag == Ist_PutI); 670 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 671 } 672 673 k_lo = (key >> 16) & 0xFFFF; 674 k_hi = key & 0xFFFF; 675 invalidateOverlaps(env, k_lo, k_hi); 676 } 677 else 678 if (st->tag == Ist_Dirty) { 679 /* Deal with dirty helpers which write or modify guest state. 680 Invalidate the entire env. We could do a lot better 681 here. */ 682 IRDirty* d = st->Ist.Dirty.details; 683 Bool writes = False; 684 for (j = 0; j < d->nFxState; j++) { 685 if (d->fxState[j].fx == Ifx_Modify 686 || d->fxState[j].fx == Ifx_Write) 687 writes = True; 688 } 689 if (writes) { 690 /* dump the entire env (not clever, but correct ...) */ 691 for (j = 0; j < env->used; j++) 692 env->inuse[j] = False; 693 if (0) vex_printf("rGET: trash env due to dirty helper\n"); 694 } 695 } 696 697 /* add this one to the env, if appropriate */ 698 if (st->tag == Ist_Put) { 699 vassert(isIRAtom(st->Ist.Put.data)); 700 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data)); 701 } 702 703 } /* for (i = 0; i < bb->stmts_used; i++) */ 704 705} 706 707 708/*---------------------------------------------------------------*/ 709/*--- In-place removal of redundant PUTs ---*/ 710/*---------------------------------------------------------------*/ 711 712/* Find any Get uses in st and invalidate any partially or fully 713 overlapping ranges listed in env. Due to the flattening phase, the 714 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */ 715 716static void handle_gets_Stmt ( 717 HashHW* env, 718 IRStmt* st, 719 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 720 VexRegisterUpdates pxControl 721 ) 722{ 723 Int j; 724 UInt key = 0; /* keep gcc -O happy */ 725 Bool isGet; 726 Bool memRW = False; 727 IRExpr* e; 728 729 switch (st->tag) { 730 731 /* This is the only interesting case. Deal with Gets in the RHS 732 expression. */ 733 case Ist_WrTmp: 734 e = st->Ist.WrTmp.data; 735 switch (e->tag) { 736 case Iex_Get: 737 isGet = True; 738 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty ); 739 break; 740 case Iex_GetI: 741 isGet = True; 742 key = mk_key_GetIPutI ( e->Iex.GetI.descr ); 743 break; 744 case Iex_Load: 745 isGet = False; 746 memRW = True; 747 break; 748 default: 749 isGet = False; 750 } 751 if (isGet) { 752 UInt k_lo, k_hi; 753 k_lo = (key >> 16) & 0xFFFF; 754 k_hi = key & 0xFFFF; 755 invalidateOverlaps(env, k_lo, k_hi); 756 } 757 break; 758 759 /* Be very conservative for dirty helper calls; dump the entire 760 environment. The helper might read guest state, in which 761 case it needs to be flushed first. Also, the helper might 762 access guest memory, in which case all parts of the guest 763 state requiring precise exceptions needs to be flushed. The 764 crude solution is just to flush everything; we could easily 765 enough do a lot better if needed. */ 766 /* Probably also overly-conservative, but also dump everything 767 if we hit a memory bus event (fence, lock, unlock). Ditto 768 AbiHints, CASs, LLs and SCs. */ 769 case Ist_AbiHint: 770 vassert(isIRAtom(st->Ist.AbiHint.base)); 771 vassert(isIRAtom(st->Ist.AbiHint.nia)); 772 /* fall through */ 773 case Ist_MBE: 774 case Ist_Dirty: 775 case Ist_CAS: 776 case Ist_LLSC: 777 for (j = 0; j < env->used; j++) 778 env->inuse[j] = False; 779 break; 780 781 /* all other cases are boring. */ 782 case Ist_Store: 783 vassert(isIRAtom(st->Ist.Store.addr)); 784 vassert(isIRAtom(st->Ist.Store.data)); 785 memRW = True; 786 break; 787 case Ist_StoreG: { 788 IRStoreG* sg = st->Ist.StoreG.details; 789 vassert(isIRAtom(sg->addr)); 790 vassert(isIRAtom(sg->data)); 791 vassert(isIRAtom(sg->guard)); 792 memRW = True; 793 break; 794 } 795 case Ist_LoadG: { 796 IRLoadG* lg = st->Ist.LoadG.details; 797 vassert(isIRAtom(lg->addr)); 798 vassert(isIRAtom(lg->alt)); 799 vassert(isIRAtom(lg->guard)); 800 memRW = True; 801 break; 802 } 803 case Ist_Exit: 804 vassert(isIRAtom(st->Ist.Exit.guard)); 805 break; 806 807 case Ist_Put: 808 vassert(isIRAtom(st->Ist.Put.data)); 809 break; 810 811 case Ist_PutI: 812 vassert(isIRAtom(st->Ist.PutI.details->ix)); 813 vassert(isIRAtom(st->Ist.PutI.details->data)); 814 break; 815 816 case Ist_NoOp: 817 case Ist_IMark: 818 break; 819 820 default: 821 vex_printf("\n"); 822 ppIRStmt(st); 823 vex_printf("\n"); 824 vpanic("handle_gets_Stmt"); 825 } 826 827 if (memRW) { 828 /* This statement accesses memory. So we might need to dump all parts 829 of the environment corresponding to guest state that may not 830 be reordered with respect to memory references. That means 831 at least the stack pointer. */ 832 switch (pxControl) { 833 case VexRegUpdAllregsAtMemAccess: 834 /* Precise exceptions required at mem access. 835 Flush all guest state. */ 836 for (j = 0; j < env->used; j++) 837 env->inuse[j] = False; 838 break; 839 case VexRegUpdSpAtMemAccess: 840 /* We need to dump the stack pointer 841 (needed for stack extension in m_signals.c). 842 preciseMemExnsFn will use vex_control.iropt_register_updates 843 to verify only the sp is to be checked. */ 844 /* fallthrough */ 845 case VexRegUpdUnwindregsAtMemAccess: 846 for (j = 0; j < env->used; j++) { 847 if (!env->inuse[j]) 848 continue; 849 /* Just flush the minimal amount required, as computed by 850 preciseMemExnsFn. */ 851 HWord k_lo = (env->key[j] >> 16) & 0xFFFF; 852 HWord k_hi = env->key[j] & 0xFFFF; 853 if (preciseMemExnsFn( k_lo, k_hi, pxControl )) 854 env->inuse[j] = False; 855 } 856 break; 857 case VexRegUpdAllregsAtEachInsn: 858 // VexRegUpdAllregsAtEachInsn cannot happen here. 859 // fall through 860 case VexRegUpd_INVALID: 861 default: 862 vassert(0); 863 } 864 } /* if (memRW) */ 865 866} 867 868 869/* Scan backwards, building up a set of (min offset, max 870 offset) pairs, indicating those parts of the guest state 871 for which the next event is a write. 872 873 On seeing a conditional exit, empty the set. 874 875 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is 876 completely within the set, remove the Put. Otherwise, add 877 (minoff,maxoff) to the set. 878 879 On seeing 'Get (minoff,maxoff)', remove any part of the set 880 overlapping (minoff,maxoff). The same has to happen for any events 881 which implicitly read parts of the guest state: dirty helper calls 882 and loads/stores. 883*/ 884 885static void redundant_put_removal_BB ( 886 IRSB* bb, 887 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 888 VexRegisterUpdates pxControl 889 ) 890{ 891 Int i, j; 892 Bool isPut; 893 IRStmt* st; 894 UInt key = 0; /* keep gcc -O happy */ 895 896 vassert(pxControl < VexRegUpdAllregsAtEachInsn); 897 898 HashHW* env = newHHW(); 899 900 /* Initialise the running env with the fact that the final exit 901 writes the IP (or, whatever it claims to write. We don't 902 care.) */ 903 key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next)); 904 addToHHW(env, (HWord)key, 0); 905 906 /* And now scan backwards through the statements. */ 907 for (i = bb->stmts_used-1; i >= 0; i--) { 908 st = bb->stmts[i]; 909 910 if (st->tag == Ist_NoOp) 911 continue; 912 913 /* Deal with conditional exits. */ 914 if (st->tag == Ist_Exit) { 915 //Bool re_add; 916 /* Need to throw out from the env, any part of it which 917 doesn't overlap with the guest state written by this exit. 918 Since the exit only writes one section, it's simplest to 919 do this: (1) check whether env contains a write that 920 completely overlaps the write done by this exit; (2) empty 921 out env; and (3) if (1) was true, add the write done by 922 this exit. 923 924 To make (1) a bit simpler, merely search for a write that 925 exactly matches the one done by this exit. That's safe 926 because it will fail as often or more often than a full 927 overlap check, and failure to find an overlapping write in 928 env is the safe case (we just nuke env if that 929 happens). */ 930 //vassert(isIRAtom(st->Ist.Exit.guard)); 931 /* (1) */ 932 //key = mk_key_GetPut(st->Ist.Exit.offsIP, 933 // typeOfIRConst(st->Ist.Exit.dst)); 934 //re_add = lookupHHW(env, NULL, key); 935 /* (2) */ 936 for (j = 0; j < env->used; j++) 937 env->inuse[j] = False; 938 /* (3) */ 939 //if (0 && re_add) 940 // addToHHW(env, (HWord)key, 0); 941 continue; 942 } 943 944 /* Deal with Puts */ 945 switch (st->tag) { 946 case Ist_Put: 947 isPut = True; 948 key = mk_key_GetPut( st->Ist.Put.offset, 949 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 950 vassert(isIRAtom(st->Ist.Put.data)); 951 break; 952 case Ist_PutI: 953 isPut = True; 954 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 955 vassert(isIRAtom(st->Ist.PutI.details->ix)); 956 vassert(isIRAtom(st->Ist.PutI.details->data)); 957 break; 958 default: 959 isPut = False; 960 } 961 if (isPut && st->tag != Ist_PutI) { 962 /* See if any single entry in env overlaps this Put. This is 963 simplistic in that the transformation is valid if, say, two 964 or more entries in the env overlap this Put, but the use of 965 lookupHHW will only find a single entry which exactly 966 overlaps this Put. This is suboptimal but safe. */ 967 if (lookupHHW(env, NULL, (HWord)key)) { 968 /* This Put is redundant because a later one will overwrite 969 it. So NULL (nop) it out. */ 970 if (DEBUG_IROPT) { 971 vex_printf("rPUT: "); ppIRStmt(st); 972 vex_printf("\n"); 973 } 974 bb->stmts[i] = IRStmt_NoOp(); 975 } else { 976 /* We can't demonstrate that this Put is redundant, so add it 977 to the running collection. */ 978 addToHHW(env, (HWord)key, 0); 979 } 980 continue; 981 } 982 983 /* Deal with Gets. These remove bits of the environment since 984 appearance of a Get means that the next event for that slice 985 of the guest state is no longer a write, but a read. Also 986 deals with implicit reads of guest state needed to maintain 987 precise exceptions. */ 988 handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl ); 989 } 990} 991 992 993/*---------------------------------------------------------------*/ 994/*--- Constant propagation and folding ---*/ 995/*---------------------------------------------------------------*/ 996 997#if STATS_IROPT 998/* How often sameIRExprs was invoked */ 999static UInt invocation_count; 1000/* How often sameIRExprs recursed through IRTemp assignments */ 1001static UInt recursion_count; 1002/* How often sameIRExprs found identical IRExprs */ 1003static UInt success_count; 1004/* How often recursing through assignments to IRTemps helped 1005 establishing equality. */ 1006static UInt recursion_success_count; 1007/* Whether or not recursing through an IRTemp assignment helped 1008 establishing IRExpr equality for a given sameIRExprs invocation. */ 1009static Bool recursion_helped; 1010/* Whether or not a given sameIRExprs invocation recursed through an 1011 IRTemp assignment */ 1012static Bool recursed; 1013/* Maximum number of nodes ever visited when comparing two IRExprs. */ 1014static UInt max_nodes_visited; 1015#endif /* STATS_IROPT */ 1016 1017/* Count the number of nodes visited for a given sameIRExprs invocation. */ 1018static UInt num_nodes_visited; 1019 1020/* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs. 1021 This is to guard against performance degradation by visiting large 1022 trees without success. */ 1023#define NODE_LIMIT 30 1024 1025 1026/* The env in this section is a map from IRTemp to IRExpr*, 1027 that is, an array indexed by IRTemp. */ 1028 1029/* Do both expressions compute the same value? The answer is generally 1030 conservative, i.e. it will report that the expressions do not compute 1031 the same value when in fact they do. The reason is that we do not 1032 keep track of changes in the guest state and memory. Thusly, two 1033 Get's, GetI's or Load's, even when accessing the same location, will be 1034 assumed to compute different values. After all the accesses may happen 1035 at different times and the guest state / memory can have changed in 1036 the meantime. 1037 1038 XXX IMPORTANT XXX the two expressions must have the same IR type. 1039 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */ 1040 1041/* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable 1042 wrapper that deals with the common tags-don't-match case, and a 1043 slower out of line general case. Saves a few insns. */ 1044 1045__attribute__((noinline)) 1046static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ); 1047 1048inline 1049static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1050{ 1051 if (e1->tag != e2->tag) return False; 1052 return sameIRExprs_aux2(env, e1, e2); 1053} 1054 1055__attribute__((noinline)) 1056static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1057{ 1058 if (num_nodes_visited++ > NODE_LIMIT) return False; 1059 1060 switch (e1->tag) { 1061 case Iex_RdTmp: 1062 if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True; 1063 1064 if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) { 1065 Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp], 1066 env[e2->Iex.RdTmp.tmp]); 1067#if STATS_IROPT 1068 recursed = True; 1069 if (same) recursion_helped = True; 1070#endif 1071 return same; 1072 } 1073 return False; 1074 1075 case Iex_Get: 1076 case Iex_GetI: 1077 case Iex_Load: 1078 /* Guest state / memory could have changed in the meantime. */ 1079 return False; 1080 1081 case Iex_Binop: 1082 return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op 1083 && sameIRExprs_aux( env, e1->Iex.Binop.arg1, 1084 e2->Iex.Binop.arg1 ) 1085 && sameIRExprs_aux( env, e1->Iex.Binop.arg2, 1086 e2->Iex.Binop.arg2 )); 1087 1088 case Iex_Unop: 1089 return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op 1090 && sameIRExprs_aux( env, e1->Iex.Unop.arg, 1091 e2->Iex.Unop.arg )); 1092 1093 case Iex_Const: { 1094 IRConst *c1 = e1->Iex.Const.con; 1095 IRConst *c2 = e2->Iex.Const.con; 1096 vassert(c1->tag == c2->tag); 1097 switch (c1->tag) { 1098 case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 ); 1099 case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 ); 1100 case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 ); 1101 case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 ); 1102 case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 ); 1103 default: break; 1104 } 1105 return False; 1106 } 1107 1108 case Iex_Triop: { 1109 IRTriop *tri1 = e1->Iex.Triop.details; 1110 IRTriop *tri2 = e2->Iex.Triop.details; 1111 return toBool( tri1->op == tri2->op 1112 && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 ) 1113 && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 ) 1114 && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 )); 1115 } 1116 1117 case Iex_ITE: 1118 return toBool( sameIRExprs_aux( env, e1->Iex.ITE.cond, 1119 e2->Iex.ITE.cond ) 1120 && sameIRExprs_aux( env, e1->Iex.ITE.iftrue, 1121 e2->Iex.ITE.iftrue ) 1122 && sameIRExprs_aux( env, e1->Iex.ITE.iffalse, 1123 e2->Iex.ITE.iffalse )); 1124 1125 default: 1126 /* Not very likely to be "same". */ 1127 break; 1128 } 1129 1130 return False; 1131} 1132 1133inline 1134static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1135{ 1136 Bool same; 1137 1138 num_nodes_visited = 0; 1139 same = sameIRExprs_aux(env, e1, e2); 1140 1141#if STATS_IROPT 1142 ++invocation_count; 1143 if (recursed) ++recursion_count; 1144 success_count += same; 1145 if (same && recursion_helped) 1146 ++recursion_success_count; 1147 if (num_nodes_visited > max_nodes_visited) 1148 max_nodes_visited = num_nodes_visited; 1149 recursed = False; /* reset */ 1150 recursion_helped = False; /* reset */ 1151#endif /* STATS_IROPT */ 1152 1153 return same; 1154} 1155 1156 1157/* Debugging-only hack (not used in production runs): make a guess 1158 whether sameIRExprs might assert due to the two args being of 1159 different types. If in doubt return False. Is only used when 1160 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0. 1161 Bad because it duplicates functionality from typeOfIRExpr. See 1162 comment on the single use point below for rationale. */ 1163static 1164Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 ) 1165{ 1166 if (e1->tag != e2->tag) return False; 1167 switch (e1->tag) { 1168 case Iex_Const: { 1169 /* The only interesting case */ 1170 IRConst *c1 = e1->Iex.Const.con; 1171 IRConst *c2 = e2->Iex.Const.con; 1172 return c1->tag != c2->tag; 1173 } 1174 default: 1175 break; 1176 } 1177 return False; 1178} 1179 1180 1181/* Is this literally IRExpr_Const(IRConst_U32(0)) ? */ 1182static Bool isZeroU32 ( IRExpr* e ) 1183{ 1184 return toBool( e->tag == Iex_Const 1185 && e->Iex.Const.con->tag == Ico_U32 1186 && e->Iex.Const.con->Ico.U32 == 0); 1187} 1188 1189/* Is this literally IRExpr_Const(IRConst_U64(0)) ? 1190 Currently unused; commented out to avoid compiler warning */ 1191#if 0 1192static Bool isZeroU64 ( IRExpr* e ) 1193{ 1194 return toBool( e->tag == Iex_Const 1195 && e->Iex.Const.con->tag == Ico_U64 1196 && e->Iex.Const.con->Ico.U64 == 0); 1197} 1198#endif 1199 1200/* Is this literally IRExpr_Const(IRConst_V128(0)) ? */ 1201static Bool isZeroV128 ( IRExpr* e ) 1202{ 1203 return toBool( e->tag == Iex_Const 1204 && e->Iex.Const.con->tag == Ico_V128 1205 && e->Iex.Const.con->Ico.V128 == 0x0000); 1206} 1207 1208/* Is this literally IRExpr_Const(IRConst_V256(0)) ? */ 1209static Bool isZeroV256 ( IRExpr* e ) 1210{ 1211 return toBool( e->tag == Iex_Const 1212 && e->Iex.Const.con->tag == Ico_V256 1213 && e->Iex.Const.con->Ico.V256 == 0x00000000); 1214} 1215 1216/* Is this an integer constant with value 0 ? */ 1217static Bool isZeroU ( IRExpr* e ) 1218{ 1219 if (e->tag != Iex_Const) return False; 1220 switch (e->Iex.Const.con->tag) { 1221 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0); 1222 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0); 1223 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0); 1224 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0); 1225 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0); 1226 case Ico_V256: return toBool( e->Iex.Const.con->Ico.V256 == 0x00000000); 1227 default: vpanic("isZeroU"); 1228 } 1229} 1230 1231/* Is this an integer constant with value 1---1b ? */ 1232static Bool isOnesU ( IRExpr* e ) 1233{ 1234 if (e->tag != Iex_Const) return False; 1235 switch (e->Iex.Const.con->tag) { 1236 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF); 1237 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF); 1238 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 1239 == 0xFFFFFFFF); 1240 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 1241 == 0xFFFFFFFFFFFFFFFFULL); 1242 default: ppIRExpr(e); vpanic("isOnesU"); 1243 } 1244} 1245 1246static Bool notBool ( Bool b ) 1247{ 1248 if (b == True) return False; 1249 if (b == False) return True; 1250 vpanic("notBool"); 1251} 1252 1253/* Make a zero which has the same type as the result of the given 1254 primop. */ 1255static IRExpr* mkZeroOfPrimopResultType ( IROp op ) 1256{ 1257 switch (op) { 1258 case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0))); 1259 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0)); 1260 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0)); 1261 case Iop_Sub32: 1262 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0)); 1263 case Iop_And64: 1264 case Iop_Sub64: 1265 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0)); 1266 case Iop_XorV128: 1267 case Iop_AndV128: return IRExpr_Const(IRConst_V128(0)); 1268 case Iop_XorV256: 1269 case Iop_AndV256: return IRExpr_Const(IRConst_V256(0)); 1270 default: vpanic("mkZeroOfPrimopResultType: bad primop"); 1271 } 1272} 1273 1274/* Make a value containing all 1-bits, which has the same type as the 1275 result of the given primop. */ 1276static IRExpr* mkOnesOfPrimopResultType ( IROp op ) 1277{ 1278 switch (op) { 1279 case Iop_CmpEQ32: 1280 case Iop_CmpEQ64: 1281 return IRExpr_Const(IRConst_U1(toBool(1))); 1282 case Iop_Or8: 1283 return IRExpr_Const(IRConst_U8(0xFF)); 1284 case Iop_Or16: 1285 return IRExpr_Const(IRConst_U16(0xFFFF)); 1286 case Iop_Or32: 1287 return IRExpr_Const(IRConst_U32(0xFFFFFFFF)); 1288 case Iop_CmpEQ8x8: 1289 case Iop_Or64: 1290 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL)); 1291 case Iop_CmpEQ8x16: 1292 case Iop_CmpEQ16x8: 1293 case Iop_CmpEQ32x4: 1294 return IRExpr_Const(IRConst_V128(0xFFFF)); 1295 default: 1296 ppIROp(op); 1297 vpanic("mkOnesOfPrimopResultType: bad primop"); 1298 } 1299} 1300 1301/* Helpers for folding Clz32/64. */ 1302static UInt fold_Clz64 ( ULong value ) 1303{ 1304 UInt i; 1305 vassert(value != 0ULL); /* no defined semantics for arg==0 */ 1306 for (i = 0; i < 64; ++i) { 1307 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i; 1308 } 1309 vassert(0); 1310 /*NOTREACHED*/ 1311 return 0; 1312} 1313 1314static UInt fold_Clz32 ( UInt value ) 1315{ 1316 UInt i; 1317 vassert(value != 0); /* no defined semantics for arg==0 */ 1318 for (i = 0; i < 32; ++i) { 1319 if (0 != (value & (((UInt)1) << (31 - i)))) return i; 1320 } 1321 vassert(0); 1322 /*NOTREACHED*/ 1323 return 0; 1324} 1325 1326/* V64 holds 8 summary-constant bits in V128/V256 style. Convert to 1327 the corresponding real constant. */ 1328//XXX re-check this before use 1329//static ULong de_summarise_V64 ( UChar v64 ) 1330//{ 1331// ULong r = 0; 1332// if (v64 & (1<<0)) r |= 0x00000000000000FFULL; 1333// if (v64 & (1<<1)) r |= 0x000000000000FF00ULL; 1334// if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL; 1335// if (v64 & (1<<3)) r |= 0x00000000FF000000ULL; 1336// if (v64 & (1<<4)) r |= 0x000000FF00000000ULL; 1337// if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL; 1338// if (v64 & (1<<6)) r |= 0x00FF000000000000ULL; 1339// if (v64 & (1<<7)) r |= 0xFF00000000000000ULL; 1340// return r; 1341//} 1342 1343/* Helper for arbitrary expression pattern matching in flat IR. If 1344 'e' is a reference to a tmp, look it up in env -- repeatedly, if 1345 necessary -- until it resolves to a non-tmp. Note that this can 1346 return NULL if it can't resolve 'e' to a new expression, which will 1347 be the case if 'e' is instead defined by an IRStmt (IRDirty or 1348 LLSC). */ 1349static IRExpr* chase ( IRExpr** env, IRExpr* e ) 1350{ 1351 /* Why is this loop guaranteed to terminate? Because all tmps must 1352 have definitions before use, hence a tmp cannot be bound 1353 (directly or indirectly) to itself. */ 1354 while (e->tag == Iex_RdTmp) { 1355 if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); } 1356 e = env[(Int)e->Iex.RdTmp.tmp]; 1357 if (e == NULL) break; 1358 } 1359 return e; 1360} 1361 1362/* Similar to |chase|, but follows at most one level of tmp reference. */ 1363static IRExpr* chase1 ( IRExpr** env, IRExpr* e ) 1364{ 1365 if (e == NULL || e->tag != Iex_RdTmp) 1366 return e; 1367 else 1368 return env[(Int)e->Iex.RdTmp.tmp]; 1369} 1370 1371static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) 1372{ 1373 Int shift; 1374 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */ 1375 1376 switch (e->tag) { 1377 case Iex_Unop: 1378 /* UNARY ops */ 1379 if (e->Iex.Unop.arg->tag == Iex_Const) { 1380 switch (e->Iex.Unop.op) { 1381 case Iop_1Uto8: 1382 e2 = IRExpr_Const(IRConst_U8(toUChar( 1383 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1384 ? 1 : 0))); 1385 break; 1386 case Iop_1Uto32: 1387 e2 = IRExpr_Const(IRConst_U32( 1388 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1389 ? 1 : 0)); 1390 break; 1391 case Iop_1Uto64: 1392 e2 = IRExpr_Const(IRConst_U64( 1393 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1394 ? 1 : 0)); 1395 break; 1396 1397 case Iop_1Sto8: 1398 e2 = IRExpr_Const(IRConst_U8(toUChar( 1399 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1400 ? 0xFF : 0))); 1401 break; 1402 case Iop_1Sto16: 1403 e2 = IRExpr_Const(IRConst_U16(toUShort( 1404 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1405 ? 0xFFFF : 0))); 1406 break; 1407 case Iop_1Sto32: 1408 e2 = IRExpr_Const(IRConst_U32( 1409 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1410 ? 0xFFFFFFFF : 0)); 1411 break; 1412 case Iop_1Sto64: 1413 e2 = IRExpr_Const(IRConst_U64( 1414 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1415 ? 0xFFFFFFFFFFFFFFFFULL : 0)); 1416 break; 1417 1418 case Iop_8Sto32: { 1419 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1420 u32 <<= 24; 1421 u32 = (Int)u32 >> 24; /* signed shift */ 1422 e2 = IRExpr_Const(IRConst_U32(u32)); 1423 break; 1424 } 1425 case Iop_16Sto32: { 1426 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1427 u32 <<= 16; 1428 u32 = (Int)u32 >> 16; /* signed shift */ 1429 e2 = IRExpr_Const(IRConst_U32(u32)); 1430 break; 1431 } 1432 case Iop_8Uto64: 1433 e2 = IRExpr_Const(IRConst_U64( 1434 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1435 break; 1436 case Iop_16Uto64: 1437 e2 = IRExpr_Const(IRConst_U64( 1438 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1439 break; 1440 case Iop_8Uto32: 1441 e2 = IRExpr_Const(IRConst_U32( 1442 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1443 break; 1444 case Iop_8Sto16: { 1445 UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1446 u16 <<= 8; 1447 u16 = (Short)u16 >> 8; /* signed shift */ 1448 e2 = IRExpr_Const(IRConst_U16(u16)); 1449 break; 1450 } 1451 case Iop_8Uto16: 1452 e2 = IRExpr_Const(IRConst_U16( 1453 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1454 break; 1455 case Iop_16Uto32: 1456 e2 = IRExpr_Const(IRConst_U32( 1457 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1458 break; 1459 case Iop_32to16: 1460 e2 = IRExpr_Const(IRConst_U16(toUShort( 1461 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1462 break; 1463 case Iop_32to8: 1464 e2 = IRExpr_Const(IRConst_U8(toUChar( 1465 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1466 break; 1467 case Iop_32to1: 1468 e2 = IRExpr_Const(IRConst_U1(toBool( 1469 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1470 ))); 1471 break; 1472 case Iop_64to1: 1473 e2 = IRExpr_Const(IRConst_U1(toBool( 1474 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64) 1475 ))); 1476 break; 1477 1478 case Iop_NotV128: 1479 e2 = IRExpr_Const(IRConst_V128( 1480 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128))); 1481 break; 1482 case Iop_Not64: 1483 e2 = IRExpr_Const(IRConst_U64( 1484 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64))); 1485 break; 1486 case Iop_Not32: 1487 e2 = IRExpr_Const(IRConst_U32( 1488 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1489 break; 1490 case Iop_Not16: 1491 e2 = IRExpr_Const(IRConst_U16(toUShort( 1492 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16)))); 1493 break; 1494 case Iop_Not8: 1495 e2 = IRExpr_Const(IRConst_U8(toUChar( 1496 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8)))); 1497 break; 1498 1499 case Iop_Not1: 1500 e2 = IRExpr_Const(IRConst_U1( 1501 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1))); 1502 break; 1503 1504 case Iop_64to8: { 1505 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1506 w64 &= 0xFFULL; 1507 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 )); 1508 break; 1509 } 1510 case Iop_64to16: { 1511 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1512 w64 &= 0xFFFFULL; 1513 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 )); 1514 break; 1515 } 1516 case Iop_64to32: { 1517 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1518 w64 &= 0x00000000FFFFFFFFULL; 1519 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1520 break; 1521 } 1522 case Iop_64HIto32: { 1523 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1524 w64 >>= 32; 1525 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1526 break; 1527 } 1528 case Iop_32Uto64: 1529 e2 = IRExpr_Const(IRConst_U64( 1530 0xFFFFFFFFULL 1531 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)); 1532 break; 1533 case Iop_16Sto64: { 1534 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1535 u64 <<= 48; 1536 u64 = (Long)u64 >> 48; /* signed shift */ 1537 e2 = IRExpr_Const(IRConst_U64(u64)); 1538 break; 1539 } 1540 case Iop_32Sto64: { 1541 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1542 u64 <<= 32; 1543 u64 = (Long)u64 >> 32; /* signed shift */ 1544 e2 = IRExpr_Const(IRConst_U64(u64)); 1545 break; 1546 } 1547 1548 case Iop_16to8: { 1549 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1550 w16 &= 0xFF; 1551 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1552 break; 1553 } 1554 case Iop_16HIto8: { 1555 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1556 w16 >>= 8; 1557 w16 &= 0xFF; 1558 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1559 break; 1560 } 1561 1562 case Iop_CmpNEZ8: 1563 e2 = IRExpr_Const(IRConst_U1(toBool( 1564 0 != 1565 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8) 1566 ))); 1567 break; 1568 case Iop_CmpNEZ32: 1569 e2 = IRExpr_Const(IRConst_U1(toBool( 1570 0 != 1571 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1572 ))); 1573 break; 1574 case Iop_CmpNEZ64: 1575 e2 = IRExpr_Const(IRConst_U1(toBool( 1576 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64 1577 ))); 1578 break; 1579 1580 case Iop_CmpwNEZ32: { 1581 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1582 if (w32 == 0) 1583 e2 = IRExpr_Const(IRConst_U32( 0 )); 1584 else 1585 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF )); 1586 break; 1587 } 1588 case Iop_CmpwNEZ64: { 1589 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1590 if (w64 == 0) 1591 e2 = IRExpr_Const(IRConst_U64( 0 )); 1592 else 1593 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL )); 1594 break; 1595 } 1596 1597 case Iop_Left32: { 1598 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1599 Int s32 = (Int)(u32 & 0xFFFFFFFF); 1600 s32 = (s32 | (-s32)); 1601 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 )); 1602 break; 1603 } 1604 1605 case Iop_Left64: { 1606 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1607 Long s64 = (Long)u64; 1608 s64 = (s64 | (-s64)); 1609 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 )); 1610 break; 1611 } 1612 1613 case Iop_Clz32: { 1614 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1615 if (u32 != 0) 1616 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32))); 1617 break; 1618 } 1619 case Iop_Clz64: { 1620 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1621 if (u64 != 0ULL) 1622 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64))); 1623 break; 1624 } 1625 1626 /* For these vector ones, can't fold all cases, but at least 1627 do the most obvious one. Could do better here using 1628 summarise/desummarise of vector constants, but too 1629 difficult to verify; hence just handle the zero cases. */ 1630 case Iop_32UtoV128: { 1631 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1632 if (0 == u32) { 1633 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1634 } else { 1635 goto unhandled; 1636 } 1637 break; 1638 } 1639 case Iop_V128to64: { 1640 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1641 if (0 == ((v128 >> 0) & 0xFF)) { 1642 e2 = IRExpr_Const(IRConst_U64(0)); 1643 } else { 1644 goto unhandled; 1645 } 1646 break; 1647 } 1648 case Iop_V128HIto64: { 1649 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1650 if (0 == ((v128 >> 8) & 0xFF)) { 1651 e2 = IRExpr_Const(IRConst_U64(0)); 1652 } else { 1653 goto unhandled; 1654 } 1655 break; 1656 } 1657 case Iop_64UtoV128: { 1658 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1659 if (0 == u64) { 1660 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1661 } else { 1662 goto unhandled; 1663 } 1664 break; 1665 } 1666 1667 /* Even stupider (although still correct ..) */ 1668 case Iop_V256to64_0: case Iop_V256to64_1: 1669 case Iop_V256to64_2: case Iop_V256to64_3: { 1670 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256; 1671 if (v256 == 0x00000000) { 1672 e2 = IRExpr_Const(IRConst_U64(0)); 1673 } else { 1674 goto unhandled; 1675 } 1676 break; 1677 } 1678 1679 case Iop_ZeroHI64ofV128: { 1680 /* Could do better here -- only need to look at the bottom 64 bits 1681 of the argument, really. */ 1682 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1683 if (v128 == 0x0000) { 1684 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1685 } else { 1686 goto unhandled; 1687 } 1688 break; 1689 } 1690 1691 default: 1692 goto unhandled; 1693 } 1694 } 1695 break; 1696 1697 case Iex_Binop: 1698 /* BINARY ops */ 1699 if (e->Iex.Binop.arg1->tag == Iex_Const 1700 && e->Iex.Binop.arg2->tag == Iex_Const) { 1701 /* cases where both args are consts */ 1702 switch (e->Iex.Binop.op) { 1703 1704 /* -- Or -- */ 1705 case Iop_Or8: 1706 e2 = IRExpr_Const(IRConst_U8(toUChar( 1707 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1708 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1709 break; 1710 case Iop_Or16: 1711 e2 = IRExpr_Const(IRConst_U16(toUShort( 1712 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1713 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1714 break; 1715 case Iop_Or32: 1716 e2 = IRExpr_Const(IRConst_U32( 1717 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1718 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1719 break; 1720 case Iop_Or64: 1721 e2 = IRExpr_Const(IRConst_U64( 1722 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1723 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1724 break; 1725 case Iop_OrV128: 1726 e2 = IRExpr_Const(IRConst_V128( 1727 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1728 | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1729 break; 1730 1731 /* -- Xor -- */ 1732 case Iop_Xor8: 1733 e2 = IRExpr_Const(IRConst_U8(toUChar( 1734 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1735 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1736 break; 1737 case Iop_Xor16: 1738 e2 = IRExpr_Const(IRConst_U16(toUShort( 1739 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1740 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1741 break; 1742 case Iop_Xor32: 1743 e2 = IRExpr_Const(IRConst_U32( 1744 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1745 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1746 break; 1747 case Iop_Xor64: 1748 e2 = IRExpr_Const(IRConst_U64( 1749 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1750 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1751 break; 1752 case Iop_XorV128: 1753 e2 = IRExpr_Const(IRConst_V128( 1754 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1755 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1756 break; 1757 1758 /* -- And -- */ 1759 case Iop_And8: 1760 e2 = IRExpr_Const(IRConst_U8(toUChar( 1761 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1762 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1763 break; 1764 case Iop_And16: 1765 e2 = IRExpr_Const(IRConst_U16(toUShort( 1766 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1767 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1768 break; 1769 case Iop_And32: 1770 e2 = IRExpr_Const(IRConst_U32( 1771 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1772 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1773 break; 1774 case Iop_And64: 1775 e2 = IRExpr_Const(IRConst_U64( 1776 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1777 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1778 break; 1779 case Iop_AndV128: 1780 e2 = IRExpr_Const(IRConst_V128( 1781 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1782 & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1783 break; 1784 1785 /* -- Add -- */ 1786 case Iop_Add8: 1787 e2 = IRExpr_Const(IRConst_U8(toUChar( 1788 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1789 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1790 break; 1791 case Iop_Add32: 1792 e2 = IRExpr_Const(IRConst_U32( 1793 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1794 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1795 break; 1796 case Iop_Add64: 1797 e2 = IRExpr_Const(IRConst_U64( 1798 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1799 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1800 break; 1801 1802 /* -- Sub -- */ 1803 case Iop_Sub8: 1804 e2 = IRExpr_Const(IRConst_U8(toUChar( 1805 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1806 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1807 break; 1808 case Iop_Sub32: 1809 e2 = IRExpr_Const(IRConst_U32( 1810 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1811 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1812 break; 1813 case Iop_Sub64: 1814 e2 = IRExpr_Const(IRConst_U64( 1815 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1816 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1817 break; 1818 1819 /* -- Max32U -- */ 1820 case Iop_Max32U: { 1821 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1822 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1823 UInt res = u32a > u32b ? u32a : u32b; 1824 e2 = IRExpr_Const(IRConst_U32(res)); 1825 break; 1826 } 1827 1828 /* -- Mul -- */ 1829 case Iop_Mul32: 1830 e2 = IRExpr_Const(IRConst_U32( 1831 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1832 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1833 break; 1834 case Iop_Mul64: 1835 e2 = IRExpr_Const(IRConst_U64( 1836 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1837 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1838 break; 1839 1840 case Iop_MullS32: { 1841 /* very paranoid */ 1842 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1843 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1844 Int s32a = (Int)u32a; 1845 Int s32b = (Int)u32b; 1846 Long s64a = (Long)s32a; 1847 Long s64b = (Long)s32b; 1848 Long sres = s64a * s64b; 1849 ULong ures = (ULong)sres; 1850 e2 = IRExpr_Const(IRConst_U64(ures)); 1851 break; 1852 } 1853 1854 /* -- Shl -- */ 1855 case Iop_Shl32: 1856 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1857 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1858 if (shift >= 0 && shift <= 31) 1859 e2 = IRExpr_Const(IRConst_U32( 1860 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1861 << shift))); 1862 break; 1863 case Iop_Shl64: 1864 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1865 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1866 if (shift >= 0 && shift <= 63) 1867 e2 = IRExpr_Const(IRConst_U64( 1868 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1869 << shift))); 1870 break; 1871 1872 /* -- Sar -- */ 1873 case Iop_Sar32: { 1874 /* paranoid ... */ 1875 /*signed*/ Int s32; 1876 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1877 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1878 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1879 if (shift >= 0 && shift <= 31) { 1880 s32 >>=/*signed*/ shift; 1881 e2 = IRExpr_Const(IRConst_U32((UInt)s32)); 1882 } 1883 break; 1884 } 1885 case Iop_Sar64: { 1886 /* paranoid ... */ 1887 /*signed*/ Long s64; 1888 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1889 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1890 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1891 if (shift >= 0 && shift <= 63) { 1892 s64 >>=/*signed*/ shift; 1893 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1894 } 1895 break; 1896 } 1897 1898 /* -- Shr -- */ 1899 case Iop_Shr32: { 1900 /* paranoid ... */ 1901 /*unsigned*/ UInt u32; 1902 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1903 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1904 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1905 if (shift >= 0 && shift <= 31) { 1906 u32 >>=/*unsigned*/ shift; 1907 e2 = IRExpr_Const(IRConst_U32(u32)); 1908 } 1909 break; 1910 } 1911 case Iop_Shr64: { 1912 /* paranoid ... */ 1913 /*unsigned*/ ULong u64; 1914 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1915 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1916 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1917 if (shift >= 0 && shift <= 63) { 1918 u64 >>=/*unsigned*/ shift; 1919 e2 = IRExpr_Const(IRConst_U64(u64)); 1920 } 1921 break; 1922 } 1923 1924 /* -- CmpEQ -- */ 1925 case Iop_CmpEQ32: 1926 e2 = IRExpr_Const(IRConst_U1(toBool( 1927 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1928 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1929 break; 1930 case Iop_CmpEQ64: 1931 e2 = IRExpr_Const(IRConst_U1(toBool( 1932 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1933 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1934 break; 1935 1936 /* -- CmpNE -- */ 1937 case Iop_CmpNE8: 1938 case Iop_CasCmpNE8: 1939 case Iop_ExpCmpNE8: 1940 e2 = IRExpr_Const(IRConst_U1(toBool( 1941 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8) 1942 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))))); 1943 break; 1944 case Iop_CmpNE32: 1945 case Iop_CasCmpNE32: 1946 case Iop_ExpCmpNE32: 1947 e2 = IRExpr_Const(IRConst_U1(toBool( 1948 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1949 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1950 break; 1951 case Iop_CmpNE64: 1952 case Iop_CasCmpNE64: 1953 case Iop_ExpCmpNE64: 1954 e2 = IRExpr_Const(IRConst_U1(toBool( 1955 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1956 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1957 break; 1958 1959 /* -- CmpLEU -- */ 1960 case Iop_CmpLE32U: 1961 e2 = IRExpr_Const(IRConst_U1(toBool( 1962 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1963 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1964 break; 1965 case Iop_CmpLE64U: 1966 e2 = IRExpr_Const(IRConst_U1(toBool( 1967 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1968 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1969 break; 1970 1971 /* -- CmpLES -- */ 1972 case Iop_CmpLE32S: 1973 e2 = IRExpr_Const(IRConst_U1(toBool( 1974 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1975 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1976 break; 1977 case Iop_CmpLE64S: 1978 e2 = IRExpr_Const(IRConst_U1(toBool( 1979 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1980 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1981 break; 1982 1983 /* -- CmpLTS -- */ 1984 case Iop_CmpLT32S: 1985 e2 = IRExpr_Const(IRConst_U1(toBool( 1986 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1987 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1988 break; 1989 case Iop_CmpLT64S: 1990 e2 = IRExpr_Const(IRConst_U1(toBool( 1991 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1992 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1993 break; 1994 1995 /* -- CmpLTU -- */ 1996 case Iop_CmpLT32U: 1997 e2 = IRExpr_Const(IRConst_U1(toBool( 1998 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1999 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 2000 break; 2001 case Iop_CmpLT64U: 2002 e2 = IRExpr_Const(IRConst_U1(toBool( 2003 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 2004 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 2005 break; 2006 2007 /* -- CmpORD -- */ 2008 case Iop_CmpORD32S: { 2009 /* very paranoid */ 2010 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 2011 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 2012 Int s32a = (Int)u32a; 2013 Int s32b = (Int)u32b; 2014 Int r = 0x2; /* EQ */ 2015 if (s32a < s32b) { 2016 r = 0x8; /* LT */ 2017 } 2018 else if (s32a > s32b) { 2019 r = 0x4; /* GT */ 2020 } 2021 e2 = IRExpr_Const(IRConst_U32(r)); 2022 break; 2023 } 2024 2025 /* -- nHLto2n -- */ 2026 case Iop_32HLto64: 2027 e2 = IRExpr_Const(IRConst_U64( 2028 (((ULong)(e->Iex.Binop.arg1 2029 ->Iex.Const.con->Ico.U32)) << 32) 2030 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)) 2031 )); 2032 break; 2033 case Iop_64HLto128: 2034 /* We can't fold this, because there is no way to 2035 express he result in IR, but at least pretend to 2036 handle it, so as to stop getting blasted with 2037 no-rule-for-this-primop messages. */ 2038 break; 2039 /* For this vector one, can't fold all cases, but at 2040 least do the most obvious one. Could do better here 2041 using summarise/desummarise of vector constants, but 2042 too difficult to verify; hence just handle the zero 2043 cases. */ 2044 case Iop_64HLtoV128: { 2045 ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64; 2046 ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 2047 if (0 == argHi && 0 == argLo) { 2048 e2 = IRExpr_Const(IRConst_V128(0)); 2049 } else { 2050 goto unhandled; 2051 } 2052 break; 2053 } 2054 /* Same reasoning for the 256-bit version. */ 2055 case Iop_V128HLtoV256: { 2056 IRExpr* argHi = e->Iex.Binop.arg1; 2057 IRExpr* argLo = e->Iex.Binop.arg2; 2058 if (isZeroV128(argHi) && isZeroV128(argLo)) { 2059 e2 = IRExpr_Const(IRConst_V256(0)); 2060 } else { 2061 goto unhandled; 2062 } 2063 break; 2064 } 2065 2066 /* -- V128 stuff -- */ 2067 case Iop_InterleaveLO8x16: { 2068 /* This turns up a lot in Memcheck instrumentation of 2069 Icc generated code. I don't know why. */ 2070 UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128; 2071 UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128; 2072 if (0 == arg1 && 0 == arg2) { 2073 e2 = IRExpr_Const(IRConst_V128(0)); 2074 } else { 2075 goto unhandled; 2076 } 2077 break; 2078 } 2079 2080 default: 2081 goto unhandled; 2082 } 2083 2084 } else { 2085 2086 /* other cases (identities, etc) */ 2087 switch (e->Iex.Binop.op) { 2088 2089 case Iop_Shl32: 2090 case Iop_Shl64: 2091 case Iop_Shr64: 2092 case Iop_Sar64: 2093 /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */ 2094 if (isZeroU(e->Iex.Binop.arg2)) { 2095 e2 = e->Iex.Binop.arg1; 2096 break; 2097 } 2098 /* Shl32/Shl64/Shr64(0,x) ==> 0 */ 2099 if (isZeroU(e->Iex.Binop.arg1)) { 2100 e2 = e->Iex.Binop.arg1; 2101 break; 2102 } 2103 break; 2104 2105 case Iop_Sar32: 2106 case Iop_Shr32: 2107 /* Shr32/Sar32(x,0) ==> x */ 2108 if (isZeroU(e->Iex.Binop.arg2)) { 2109 e2 = e->Iex.Binop.arg1; 2110 break; 2111 } 2112 break; 2113 2114 case Iop_Or8: 2115 case Iop_Or16: 2116 case Iop_Or32: 2117 case Iop_Or64: 2118 case Iop_Max32U: 2119 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */ 2120 if (isZeroU(e->Iex.Binop.arg2)) { 2121 e2 = e->Iex.Binop.arg1; 2122 break; 2123 } 2124 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */ 2125 if (isZeroU(e->Iex.Binop.arg1)) { 2126 e2 = e->Iex.Binop.arg2; 2127 break; 2128 } 2129 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */ 2130 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */ 2131 if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) { 2132 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2133 break; 2134 } 2135 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */ 2136 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2137 e2 = e->Iex.Binop.arg1; 2138 break; 2139 } 2140 break; 2141 2142 case Iop_Add8: 2143 /* Add8(t,t) ==> t << 1. 2144 Memcheck doesn't understand that 2145 x+x produces a defined least significant bit, and it seems 2146 simplest just to get rid of the problem by rewriting it 2147 out, since the opportunity to do so exists. */ 2148 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2149 e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1, 2150 IRExpr_Const(IRConst_U8(1))); 2151 break; 2152 } 2153 break; 2154 2155 /* NB no Add16(t,t) case yet as no known test case exists */ 2156 2157 case Iop_Add32: 2158 case Iop_Add64: 2159 /* Add32/Add64(x,0) ==> x */ 2160 if (isZeroU(e->Iex.Binop.arg2)) { 2161 e2 = e->Iex.Binop.arg1; 2162 break; 2163 } 2164 /* Add32/Add64(0,x) ==> x */ 2165 if (isZeroU(e->Iex.Binop.arg1)) { 2166 e2 = e->Iex.Binop.arg2; 2167 break; 2168 } 2169 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */ 2170 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2171 e2 = IRExpr_Binop( 2172 e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64, 2173 e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1))); 2174 break; 2175 } 2176 break; 2177 2178 case Iop_Sub32: 2179 case Iop_Sub64: 2180 /* Sub32/Sub64(x,0) ==> x */ 2181 if (isZeroU(e->Iex.Binop.arg2)) { 2182 e2 = e->Iex.Binop.arg1; 2183 break; 2184 } 2185 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */ 2186 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2187 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2188 break; 2189 } 2190 break; 2191 case Iop_Sub8x16: 2192 /* Sub8x16(x,0) ==> x */ 2193 if (isZeroV128(e->Iex.Binop.arg2)) { 2194 e2 = e->Iex.Binop.arg1; 2195 break; 2196 } 2197 break; 2198 2199 case Iop_And8: 2200 case Iop_And16: 2201 case Iop_And32: 2202 case Iop_And64: 2203 /* And8/And16/And32/And64(x,1---1b) ==> x */ 2204 if (isOnesU(e->Iex.Binop.arg2)) { 2205 e2 = e->Iex.Binop.arg1; 2206 break; 2207 } 2208 /* And8/And16/And32/And64(1---1b,x) ==> x */ 2209 if (isOnesU(e->Iex.Binop.arg1)) { 2210 e2 = e->Iex.Binop.arg2; 2211 break; 2212 } 2213 /* And8/And16/And32/And64(x,0) ==> 0 */ 2214 if (isZeroU(e->Iex.Binop.arg2)) { 2215 e2 = e->Iex.Binop.arg2; 2216 break; 2217 } 2218 /* And8/And16/And32/And64(0,x) ==> 0 */ 2219 if (isZeroU(e->Iex.Binop.arg1)) { 2220 e2 = e->Iex.Binop.arg1; 2221 break; 2222 } 2223 /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */ 2224 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2225 e2 = e->Iex.Binop.arg1; 2226 break; 2227 } 2228 break; 2229 2230 case Iop_AndV128: 2231 case Iop_AndV256: 2232 /* And8/And16/AndV128/AndV256(t,t) 2233 ==> t, for some IRTemp t */ 2234 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2235 e2 = e->Iex.Binop.arg1; 2236 break; 2237 } 2238 /* Deal with either arg zero. Could handle other And 2239 cases here too. */ 2240 if (e->Iex.Binop.op == Iop_AndV256 2241 && (isZeroV256(e->Iex.Binop.arg1) 2242 || isZeroV256(e->Iex.Binop.arg2))) { 2243 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2244 break; 2245 } else if (e->Iex.Binop.op == Iop_AndV128 2246 && (isZeroV128(e->Iex.Binop.arg1) 2247 || isZeroV128(e->Iex.Binop.arg2))) { 2248 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2249 break; 2250 } 2251 break; 2252 2253 case Iop_OrV128: 2254 case Iop_OrV256: 2255 /* V128/V256(t,t) ==> t, for some IRTemp t */ 2256 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2257 e2 = e->Iex.Binop.arg1; 2258 break; 2259 } 2260 /* OrV128(t,0) ==> t */ 2261 if (e->Iex.Binop.op == Iop_OrV128) { 2262 if (isZeroV128(e->Iex.Binop.arg2)) { 2263 e2 = e->Iex.Binop.arg1; 2264 break; 2265 } 2266 if (isZeroV128(e->Iex.Binop.arg1)) { 2267 e2 = e->Iex.Binop.arg2; 2268 break; 2269 } 2270 } 2271 /* OrV256(t,0) ==> t */ 2272 if (e->Iex.Binop.op == Iop_OrV256) { 2273 if (isZeroV256(e->Iex.Binop.arg2)) { 2274 e2 = e->Iex.Binop.arg1; 2275 break; 2276 } 2277 //Disabled because there's no known test case right now. 2278 //if (isZeroV256(e->Iex.Binop.arg1)) { 2279 // e2 = e->Iex.Binop.arg2; 2280 // break; 2281 //} 2282 } 2283 break; 2284 2285 case Iop_Xor8: 2286 case Iop_Xor16: 2287 case Iop_Xor32: 2288 case Iop_Xor64: 2289 case Iop_XorV128: 2290 case Iop_XorV256: 2291 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */ 2292 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2293 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2294 break; 2295 } 2296 /* XorV128(t,0) ==> t */ 2297 if (e->Iex.Binop.op == Iop_XorV128) { 2298 if (isZeroV128(e->Iex.Binop.arg2)) { 2299 e2 = e->Iex.Binop.arg1; 2300 break; 2301 } 2302 //Disabled because there's no known test case right now. 2303 //if (isZeroV128(e->Iex.Binop.arg1)) { 2304 // e2 = e->Iex.Binop.arg2; 2305 // break; 2306 //} 2307 } else { 2308 /* Xor8/16/32/64(0,t) ==> t */ 2309 if (isZeroU(e->Iex.Binop.arg1)) { 2310 e2 = e->Iex.Binop.arg2; 2311 break; 2312 } 2313 /* Xor8/16/32/64(t,0) ==> t */ 2314 if (isZeroU(e->Iex.Binop.arg2)) { 2315 e2 = e->Iex.Binop.arg1; 2316 break; 2317 } 2318 } 2319 break; 2320 2321 case Iop_CmpNE32: 2322 /* CmpNE32(t,t) ==> 0, for some IRTemp t */ 2323 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2324 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2325 break; 2326 } 2327 /* CmpNE32(1Uto32(b), 0) ==> b */ 2328 if (isZeroU32(e->Iex.Binop.arg2)) { 2329 IRExpr* a1 = chase(env, e->Iex.Binop.arg1); 2330 if (a1 && a1->tag == Iex_Unop 2331 && a1->Iex.Unop.op == Iop_1Uto32) { 2332 e2 = a1->Iex.Unop.arg; 2333 break; 2334 } 2335 } 2336 break; 2337 2338 case Iop_CmpEQ32: 2339 case Iop_CmpEQ64: 2340 case Iop_CmpEQ8x8: 2341 case Iop_CmpEQ8x16: 2342 case Iop_CmpEQ16x8: 2343 case Iop_CmpEQ32x4: 2344 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2345 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2346 break; 2347 } 2348 break; 2349 2350 default: 2351 break; 2352 } 2353 } 2354 break; 2355 2356 case Iex_ITE: 2357 /* ITE */ 2358 /* is the discriminant is a constant? */ 2359 if (e->Iex.ITE.cond->tag == Iex_Const) { 2360 /* assured us by the IR type rules */ 2361 vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1); 2362 e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1 2363 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse; 2364 } 2365 else 2366 /* are the arms identical? (pretty weedy test) */ 2367 if (sameIRExprs(env, e->Iex.ITE.iftrue, 2368 e->Iex.ITE.iffalse)) { 2369 e2 = e->Iex.ITE.iffalse; 2370 } 2371 break; 2372 2373 default: 2374 /* not considered */ 2375 break; 2376 } 2377 2378 /* Show cases where we've found but not folded 'op(t,t)'. Be 2379 careful not to call sameIRExprs with values of different types, 2380 though, else it will assert (and so it should!). We can't 2381 conveniently call typeOfIRExpr on the two args without a whole 2382 bunch of extra plumbing to pass in a type env, so just use a 2383 hacky test to check the arguments are not anything that might 2384 sameIRExprs to assert. This is only OK because this kludge is 2385 only used for debug printing, not for "real" operation. For 2386 "real" operation (ie, all other calls to sameIRExprs), it is 2387 essential that the to args have the same type. 2388 2389 The "right" solution is to plumb the containing block's 2390 IRTypeEnv through to here and use typeOfIRExpr to be sure. But 2391 that's a bunch of extra parameter passing which will just slow 2392 down the normal case, for no purpose. */ 2393 if (vex_control.iropt_verbosity > 0 2394 && e == e2 2395 && e->tag == Iex_Binop 2396 && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1, 2397 e->Iex.Binop.arg2) 2398 && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2399 vex_printf("vex iropt: fold_Expr: no ident rule for: "); 2400 ppIRExpr(e); 2401 vex_printf("\n"); 2402 } 2403 2404 /* Show the overall results of folding. */ 2405 if (DEBUG_IROPT && e2 != e) { 2406 vex_printf("FOLD: "); 2407 ppIRExpr(e); vex_printf(" -> "); 2408 ppIRExpr(e2); vex_printf("\n"); 2409 } 2410 2411 return e2; 2412 2413 unhandled: 2414# if 0 2415 vex_printf("\n\n"); 2416 ppIRExpr(e); 2417 vpanic("fold_Expr: no rule for the above"); 2418# else 2419 if (vex_control.iropt_verbosity > 0) { 2420 vex_printf("vex iropt: fold_Expr: no const rule for: "); 2421 ppIRExpr(e); 2422 vex_printf("\n"); 2423 } 2424 return e2; 2425# endif 2426} 2427 2428 2429/* Apply the subst to a simple 1-level expression -- guaranteed to be 2430 1-level due to previous flattening pass. */ 2431 2432static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) 2433{ 2434 switch (ex->tag) { 2435 case Iex_RdTmp: 2436 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) { 2437 IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp]; 2438 if (rhs->tag == Iex_RdTmp) 2439 return rhs; 2440 if (rhs->tag == Iex_Const 2441 && rhs->Iex.Const.con->tag != Ico_F64i) 2442 return rhs; 2443 } 2444 /* not bound in env */ 2445 return ex; 2446 2447 case Iex_Const: 2448 case Iex_Get: 2449 return ex; 2450 2451 case Iex_GetI: 2452 vassert(isIRAtom(ex->Iex.GetI.ix)); 2453 return IRExpr_GetI( 2454 ex->Iex.GetI.descr, 2455 subst_Expr(env, ex->Iex.GetI.ix), 2456 ex->Iex.GetI.bias 2457 ); 2458 2459 case Iex_Qop: { 2460 IRQop* qop = ex->Iex.Qop.details; 2461 vassert(isIRAtom(qop->arg1)); 2462 vassert(isIRAtom(qop->arg2)); 2463 vassert(isIRAtom(qop->arg3)); 2464 vassert(isIRAtom(qop->arg4)); 2465 return IRExpr_Qop( 2466 qop->op, 2467 subst_Expr(env, qop->arg1), 2468 subst_Expr(env, qop->arg2), 2469 subst_Expr(env, qop->arg3), 2470 subst_Expr(env, qop->arg4) 2471 ); 2472 } 2473 2474 case Iex_Triop: { 2475 IRTriop* triop = ex->Iex.Triop.details; 2476 vassert(isIRAtom(triop->arg1)); 2477 vassert(isIRAtom(triop->arg2)); 2478 vassert(isIRAtom(triop->arg3)); 2479 return IRExpr_Triop( 2480 triop->op, 2481 subst_Expr(env, triop->arg1), 2482 subst_Expr(env, triop->arg2), 2483 subst_Expr(env, triop->arg3) 2484 ); 2485 } 2486 2487 case Iex_Binop: 2488 vassert(isIRAtom(ex->Iex.Binop.arg1)); 2489 vassert(isIRAtom(ex->Iex.Binop.arg2)); 2490 return IRExpr_Binop( 2491 ex->Iex.Binop.op, 2492 subst_Expr(env, ex->Iex.Binop.arg1), 2493 subst_Expr(env, ex->Iex.Binop.arg2) 2494 ); 2495 2496 case Iex_Unop: 2497 vassert(isIRAtom(ex->Iex.Unop.arg)); 2498 return IRExpr_Unop( 2499 ex->Iex.Unop.op, 2500 subst_Expr(env, ex->Iex.Unop.arg) 2501 ); 2502 2503 case Iex_Load: 2504 vassert(isIRAtom(ex->Iex.Load.addr)); 2505 return IRExpr_Load( 2506 ex->Iex.Load.end, 2507 ex->Iex.Load.ty, 2508 subst_Expr(env, ex->Iex.Load.addr) 2509 ); 2510 2511 case Iex_CCall: { 2512 Int i; 2513 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args); 2514 for (i = 0; args2[i]; i++) { 2515 vassert(isIRAtom(args2[i])); 2516 args2[i] = subst_Expr(env, args2[i]); 2517 } 2518 return IRExpr_CCall( 2519 ex->Iex.CCall.cee, 2520 ex->Iex.CCall.retty, 2521 args2 2522 ); 2523 } 2524 2525 case Iex_ITE: 2526 vassert(isIRAtom(ex->Iex.ITE.cond)); 2527 vassert(isIRAtom(ex->Iex.ITE.iftrue)); 2528 vassert(isIRAtom(ex->Iex.ITE.iffalse)); 2529 return IRExpr_ITE( 2530 subst_Expr(env, ex->Iex.ITE.cond), 2531 subst_Expr(env, ex->Iex.ITE.iftrue), 2532 subst_Expr(env, ex->Iex.ITE.iffalse) 2533 ); 2534 2535 default: 2536 vex_printf("\n\n"); ppIRExpr(ex); 2537 vpanic("subst_Expr"); 2538 2539 } 2540} 2541 2542 2543/* Apply the subst to stmt, then fold the result as much as possible. 2544 Much simplified due to stmt being previously flattened. As a 2545 result of this, the stmt may wind up being turned into a no-op. 2546*/ 2547static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) 2548{ 2549# if 0 2550 vex_printf("\nsubst and fold stmt\n"); 2551 ppIRStmt(st); 2552 vex_printf("\n"); 2553# endif 2554 2555 switch (st->tag) { 2556 case Ist_AbiHint: 2557 vassert(isIRAtom(st->Ist.AbiHint.base)); 2558 vassert(isIRAtom(st->Ist.AbiHint.nia)); 2559 return IRStmt_AbiHint( 2560 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)), 2561 st->Ist.AbiHint.len, 2562 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia)) 2563 ); 2564 case Ist_Put: 2565 vassert(isIRAtom(st->Ist.Put.data)); 2566 return IRStmt_Put( 2567 st->Ist.Put.offset, 2568 fold_Expr(env, subst_Expr(env, st->Ist.Put.data)) 2569 ); 2570 2571 case Ist_PutI: { 2572 IRPutI *puti, *puti2; 2573 puti = st->Ist.PutI.details; 2574 vassert(isIRAtom(puti->ix)); 2575 vassert(isIRAtom(puti->data)); 2576 puti2 = mkIRPutI(puti->descr, 2577 fold_Expr(env, subst_Expr(env, puti->ix)), 2578 puti->bias, 2579 fold_Expr(env, subst_Expr(env, puti->data))); 2580 return IRStmt_PutI(puti2); 2581 } 2582 2583 case Ist_WrTmp: 2584 /* This is the one place where an expr (st->Ist.WrTmp.data) is 2585 allowed to be more than just a constant or a tmp. */ 2586 return IRStmt_WrTmp( 2587 st->Ist.WrTmp.tmp, 2588 fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data)) 2589 ); 2590 2591 case Ist_Store: 2592 vassert(isIRAtom(st->Ist.Store.addr)); 2593 vassert(isIRAtom(st->Ist.Store.data)); 2594 return IRStmt_Store( 2595 st->Ist.Store.end, 2596 fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)), 2597 fold_Expr(env, subst_Expr(env, st->Ist.Store.data)) 2598 ); 2599 2600 case Ist_StoreG: { 2601 IRStoreG* sg = st->Ist.StoreG.details; 2602 vassert(isIRAtom(sg->addr)); 2603 vassert(isIRAtom(sg->data)); 2604 vassert(isIRAtom(sg->guard)); 2605 IRExpr* faddr = fold_Expr(env, subst_Expr(env, sg->addr)); 2606 IRExpr* fdata = fold_Expr(env, subst_Expr(env, sg->data)); 2607 IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard)); 2608 if (fguard->tag == Iex_Const) { 2609 /* The condition on this store has folded down to a constant. */ 2610 vassert(fguard->Iex.Const.con->tag == Ico_U1); 2611 if (fguard->Iex.Const.con->Ico.U1 == False) { 2612 return IRStmt_NoOp(); 2613 } else { 2614 vassert(fguard->Iex.Const.con->Ico.U1 == True); 2615 return IRStmt_Store(sg->end, faddr, fdata); 2616 } 2617 } 2618 return IRStmt_StoreG(sg->end, faddr, fdata, fguard); 2619 } 2620 2621 case Ist_LoadG: { 2622 /* This is complicated. If the guard folds down to 'false', 2623 we can replace it with an assignment 'dst := alt', but if 2624 the guard folds down to 'true', we can't conveniently 2625 replace it with an unconditional load, because doing so 2626 requires generating a new temporary, and that is not easy 2627 to do at this point. */ 2628 IRLoadG* lg = st->Ist.LoadG.details; 2629 vassert(isIRAtom(lg->addr)); 2630 vassert(isIRAtom(lg->alt)); 2631 vassert(isIRAtom(lg->guard)); 2632 IRExpr* faddr = fold_Expr(env, subst_Expr(env, lg->addr)); 2633 IRExpr* falt = fold_Expr(env, subst_Expr(env, lg->alt)); 2634 IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard)); 2635 if (fguard->tag == Iex_Const) { 2636 /* The condition on this load has folded down to a constant. */ 2637 vassert(fguard->Iex.Const.con->tag == Ico_U1); 2638 if (fguard->Iex.Const.con->Ico.U1 == False) { 2639 /* The load is not going to happen -- instead 'alt' is 2640 assigned to 'dst'. */ 2641 return IRStmt_WrTmp(lg->dst, falt); 2642 } else { 2643 vassert(fguard->Iex.Const.con->Ico.U1 == True); 2644 /* The load is always going to happen. We want to 2645 convert to an unconditional load and assign to 'dst' 2646 (IRStmt_WrTmp). Problem is we need an extra temp to 2647 hold the loaded value, but none is available. 2648 Instead, reconstitute the conditional load (with 2649 folded args, of course) and let the caller of this 2650 routine deal with the problem. */ 2651 } 2652 } 2653 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard); 2654 } 2655 2656 case Ist_CAS: { 2657 IRCAS *cas, *cas2; 2658 cas = st->Ist.CAS.details; 2659 vassert(isIRAtom(cas->addr)); 2660 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 2661 vassert(isIRAtom(cas->expdLo)); 2662 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 2663 vassert(isIRAtom(cas->dataLo)); 2664 cas2 = mkIRCAS( 2665 cas->oldHi, cas->oldLo, cas->end, 2666 fold_Expr(env, subst_Expr(env, cas->addr)), 2667 cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi)) 2668 : NULL, 2669 fold_Expr(env, subst_Expr(env, cas->expdLo)), 2670 cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi)) 2671 : NULL, 2672 fold_Expr(env, subst_Expr(env, cas->dataLo)) 2673 ); 2674 return IRStmt_CAS(cas2); 2675 } 2676 2677 case Ist_LLSC: 2678 vassert(isIRAtom(st->Ist.LLSC.addr)); 2679 if (st->Ist.LLSC.storedata) 2680 vassert(isIRAtom(st->Ist.LLSC.storedata)); 2681 return IRStmt_LLSC( 2682 st->Ist.LLSC.end, 2683 st->Ist.LLSC.result, 2684 fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)), 2685 st->Ist.LLSC.storedata 2686 ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata)) 2687 : NULL 2688 ); 2689 2690 case Ist_Dirty: { 2691 Int i; 2692 IRDirty *d, *d2; 2693 d = st->Ist.Dirty.details; 2694 d2 = emptyIRDirty(); 2695 *d2 = *d; 2696 d2->args = shallowCopyIRExprVec(d2->args); 2697 if (d2->mFx != Ifx_None) { 2698 vassert(isIRAtom(d2->mAddr)); 2699 d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr)); 2700 } 2701 vassert(isIRAtom(d2->guard)); 2702 d2->guard = fold_Expr(env, subst_Expr(env, d2->guard)); 2703 for (i = 0; d2->args[i]; i++) { 2704 IRExpr* arg = d2->args[i]; 2705 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) { 2706 vassert(isIRAtom(arg)); 2707 d2->args[i] = fold_Expr(env, subst_Expr(env, arg)); 2708 } 2709 } 2710 return IRStmt_Dirty(d2); 2711 } 2712 2713 case Ist_IMark: 2714 return IRStmt_IMark(st->Ist.IMark.addr, 2715 st->Ist.IMark.len, 2716 st->Ist.IMark.delta); 2717 2718 case Ist_NoOp: 2719 return IRStmt_NoOp(); 2720 2721 case Ist_MBE: 2722 return IRStmt_MBE(st->Ist.MBE.event); 2723 2724 case Ist_Exit: { 2725 IRExpr* fcond; 2726 vassert(isIRAtom(st->Ist.Exit.guard)); 2727 fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard)); 2728 if (fcond->tag == Iex_Const) { 2729 /* Interesting. The condition on this exit has folded down to 2730 a constant. */ 2731 vassert(fcond->Iex.Const.con->tag == Ico_U1); 2732 if (fcond->Iex.Const.con->Ico.U1 == False) { 2733 /* exit is never going to happen, so dump the statement. */ 2734 return IRStmt_NoOp(); 2735 } else { 2736 vassert(fcond->Iex.Const.con->Ico.U1 == True); 2737 /* Hmmm. The exit has become unconditional. Leave it 2738 as it is for now, since we'd have to truncate the BB 2739 at this point, which is tricky. Such truncation is 2740 done later by the dead-code elimination pass. */ 2741 /* fall out into the reconstruct-the-exit code. */ 2742 if (vex_control.iropt_verbosity > 0) 2743 /* really a misuse of vex_control.iropt_verbosity */ 2744 vex_printf("vex iropt: IRStmt_Exit became unconditional\n"); 2745 } 2746 } 2747 return IRStmt_Exit(fcond, st->Ist.Exit.jk, 2748 st->Ist.Exit.dst, st->Ist.Exit.offsIP); 2749 } 2750 2751 default: 2752 vex_printf("\n"); ppIRStmt(st); 2753 vpanic("subst_and_fold_Stmt"); 2754 } 2755} 2756 2757 2758IRSB* cprop_BB ( IRSB* in ) 2759{ 2760 Int i; 2761 IRSB* out; 2762 IRStmt* st2; 2763 Int n_tmps = in->tyenv->types_used; 2764 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*)); 2765 /* Keep track of IRStmt_LoadGs that we need to revisit after 2766 processing all the other statements. */ 2767 const Int N_FIXUPS = 16; 2768 Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */ 2769 Int n_fixups = 0; 2770 2771 out = emptyIRSB(); 2772 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 2773 2774 /* Set up the env with which travels forward. This holds a 2775 substitution, mapping IRTemps to IRExprs. The environment 2776 is to be applied as we move along. Keys are IRTemps. 2777 Values are IRExpr*s. 2778 */ 2779 for (i = 0; i < n_tmps; i++) 2780 env[i] = NULL; 2781 2782 /* For each original SSA-form stmt ... */ 2783 for (i = 0; i < in->stmts_used; i++) { 2784 2785 /* First apply the substitution to the current stmt. This 2786 propagates in any constants and tmp-tmp assignments 2787 accumulated prior to this point. As part of the subst_Stmt 2788 call, also then fold any constant expressions resulting. */ 2789 2790 st2 = in->stmts[i]; 2791 2792 /* perhaps st2 is already a no-op? */ 2793 if (st2->tag == Ist_NoOp) continue; 2794 2795 st2 = subst_and_fold_Stmt( env, st2 ); 2796 2797 /* Deal with some post-folding special cases. */ 2798 switch (st2->tag) { 2799 2800 /* If the statement has been folded into a no-op, forget 2801 it. */ 2802 case Ist_NoOp: 2803 continue; 2804 2805 /* If the statement assigns to an IRTemp add it to the 2806 running environment. This is for the benefit of copy 2807 propagation and to allow sameIRExpr look through 2808 IRTemps. */ 2809 case Ist_WrTmp: { 2810 vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL); 2811 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; 2812 2813 /* 't1 = t2' -- don't add to BB; will be optimized out */ 2814 if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) 2815 continue; 2816 2817 /* 't = const' && 'const != F64i' -- don't add to BB 2818 Note, we choose not to propagate const when const is an 2819 F64i, so that F64i literals can be CSE'd later. This 2820 helps x86 floating point code generation. */ 2821 if (st2->Ist.WrTmp.data->tag == Iex_Const 2822 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) { 2823 continue; 2824 } 2825 /* else add it to the output, as normal */ 2826 break; 2827 } 2828 2829 case Ist_LoadG: { 2830 IRLoadG* lg = st2->Ist.LoadG.details; 2831 IRExpr* guard = lg->guard; 2832 if (guard->tag == Iex_Const) { 2833 /* The guard has folded to a constant, and that 2834 constant must be 1:I1, since subst_and_fold_Stmt 2835 folds out the case 0:I1 by itself. */ 2836 vassert(guard->Iex.Const.con->tag == Ico_U1); 2837 vassert(guard->Iex.Const.con->Ico.U1 == True); 2838 /* Add a NoOp here as a placeholder, and make a note of 2839 where it is in the output block. Afterwards we'll 2840 come back here and transform the NoOp and the LoadG 2841 into a load-convert pair. The fixups[] entry 2842 refers to the inserted NoOp, and we expect to find 2843 the relevant LoadG immediately after it. */ 2844 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); 2845 if (n_fixups < N_FIXUPS) { 2846 fixups[n_fixups++] = out->stmts_used; 2847 addStmtToIRSB( out, IRStmt_NoOp() ); 2848 } 2849 } 2850 /* And always add the LoadG to the output, regardless. */ 2851 break; 2852 } 2853 2854 default: 2855 break; 2856 } 2857 2858 /* Not interesting, copy st2 into the output block. */ 2859 addStmtToIRSB( out, st2 ); 2860 } 2861 2862# if STATS_IROPT 2863 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n", 2864 invocation_count, recursion_count, success_count, 2865 recursion_success_count, max_nodes_visited); 2866# endif 2867 2868 out->next = subst_Expr( env, in->next ); 2869 out->jumpkind = in->jumpkind; 2870 out->offsIP = in->offsIP; 2871 2872 /* Process any leftover unconditional LoadGs that we noticed 2873 in the main pass. */ 2874 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); 2875 for (i = 0; i < n_fixups; i++) { 2876 Int ix = fixups[i]; 2877 /* Carefully verify that the LoadG has the expected form. */ 2878 vassert(ix >= 0 && ix+1 < out->stmts_used); 2879 IRStmt* nop = out->stmts[ix]; 2880 IRStmt* lgu = out->stmts[ix+1]; 2881 vassert(nop->tag == Ist_NoOp); 2882 vassert(lgu->tag == Ist_LoadG); 2883 IRLoadG* lg = lgu->Ist.LoadG.details; 2884 IRExpr* guard = lg->guard; 2885 vassert(guard->Iex.Const.con->tag == Ico_U1); 2886 vassert(guard->Iex.Const.con->Ico.U1 == True); 2887 /* Figure out the load and result types, and the implied 2888 conversion operation. */ 2889 IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID; 2890 typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg); 2891 IROp cvtOp = Iop_INVALID; 2892 switch (lg->cvt) { 2893 case ILGop_IdentV128: 2894 case ILGop_Ident64: 2895 case ILGop_Ident32: break; 2896 case ILGop_8Uto32: cvtOp = Iop_8Uto32; break; 2897 case ILGop_8Sto32: cvtOp = Iop_8Sto32; break; 2898 case ILGop_16Uto32: cvtOp = Iop_16Uto32; break; 2899 case ILGop_16Sto32: cvtOp = Iop_16Sto32; break; 2900 default: vpanic("cprop_BB: unhandled ILGOp"); 2901 } 2902 /* Replace the placeholder NoOp by the required unconditional 2903 load. */ 2904 IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg); 2905 out->stmts[ix] 2906 = IRStmt_WrTmp(tLoaded, 2907 IRExpr_Load(lg->end, cvtArg, lg->addr)); 2908 /* Replace the LoadG by a conversion from the loaded value's 2909 type to the required result type. */ 2910 out->stmts[ix+1] 2911 = IRStmt_WrTmp( 2912 lg->dst, cvtOp == Iop_INVALID 2913 ? IRExpr_RdTmp(tLoaded) 2914 : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded))); 2915 } 2916 2917 return out; 2918} 2919 2920 2921/*---------------------------------------------------------------*/ 2922/*--- Dead code (t = E) removal ---*/ 2923/*---------------------------------------------------------------*/ 2924 2925/* As a side effect, also removes all code following an unconditional 2926 side exit. */ 2927 2928/* The type of the HashHW map is: a map from IRTemp to nothing 2929 -- really just operating a set or IRTemps. 2930*/ 2931 2932inline 2933static void addUses_Temp ( Bool* set, IRTemp tmp ) 2934{ 2935 set[(Int)tmp] = True; 2936} 2937 2938static void addUses_Expr ( Bool* set, IRExpr* e ) 2939{ 2940 Int i; 2941 switch (e->tag) { 2942 case Iex_GetI: 2943 addUses_Expr(set, e->Iex.GetI.ix); 2944 return; 2945 case Iex_ITE: 2946 addUses_Expr(set, e->Iex.ITE.cond); 2947 addUses_Expr(set, e->Iex.ITE.iftrue); 2948 addUses_Expr(set, e->Iex.ITE.iffalse); 2949 return; 2950 case Iex_CCall: 2951 for (i = 0; e->Iex.CCall.args[i]; i++) 2952 addUses_Expr(set, e->Iex.CCall.args[i]); 2953 return; 2954 case Iex_Load: 2955 addUses_Expr(set, e->Iex.Load.addr); 2956 return; 2957 case Iex_Qop: 2958 addUses_Expr(set, e->Iex.Qop.details->arg1); 2959 addUses_Expr(set, e->Iex.Qop.details->arg2); 2960 addUses_Expr(set, e->Iex.Qop.details->arg3); 2961 addUses_Expr(set, e->Iex.Qop.details->arg4); 2962 return; 2963 case Iex_Triop: 2964 addUses_Expr(set, e->Iex.Triop.details->arg1); 2965 addUses_Expr(set, e->Iex.Triop.details->arg2); 2966 addUses_Expr(set, e->Iex.Triop.details->arg3); 2967 return; 2968 case Iex_Binop: 2969 addUses_Expr(set, e->Iex.Binop.arg1); 2970 addUses_Expr(set, e->Iex.Binop.arg2); 2971 return; 2972 case Iex_Unop: 2973 addUses_Expr(set, e->Iex.Unop.arg); 2974 return; 2975 case Iex_RdTmp: 2976 addUses_Temp(set, e->Iex.RdTmp.tmp); 2977 return; 2978 case Iex_Const: 2979 case Iex_Get: 2980 return; 2981 default: 2982 vex_printf("\n"); 2983 ppIRExpr(e); 2984 vpanic("addUses_Expr"); 2985 } 2986} 2987 2988static void addUses_Stmt ( Bool* set, IRStmt* st ) 2989{ 2990 Int i; 2991 IRDirty* d; 2992 IRCAS* cas; 2993 switch (st->tag) { 2994 case Ist_AbiHint: 2995 addUses_Expr(set, st->Ist.AbiHint.base); 2996 addUses_Expr(set, st->Ist.AbiHint.nia); 2997 return; 2998 case Ist_PutI: 2999 addUses_Expr(set, st->Ist.PutI.details->ix); 3000 addUses_Expr(set, st->Ist.PutI.details->data); 3001 return; 3002 case Ist_WrTmp: 3003 addUses_Expr(set, st->Ist.WrTmp.data); 3004 return; 3005 case Ist_Put: 3006 addUses_Expr(set, st->Ist.Put.data); 3007 return; 3008 case Ist_Store: 3009 addUses_Expr(set, st->Ist.Store.addr); 3010 addUses_Expr(set, st->Ist.Store.data); 3011 return; 3012 case Ist_StoreG: { 3013 IRStoreG* sg = st->Ist.StoreG.details; 3014 addUses_Expr(set, sg->addr); 3015 addUses_Expr(set, sg->data); 3016 addUses_Expr(set, sg->guard); 3017 return; 3018 } 3019 case Ist_LoadG: { 3020 IRLoadG* lg = st->Ist.LoadG.details; 3021 addUses_Expr(set, lg->addr); 3022 addUses_Expr(set, lg->alt); 3023 addUses_Expr(set, lg->guard); 3024 return; 3025 } 3026 case Ist_CAS: 3027 cas = st->Ist.CAS.details; 3028 addUses_Expr(set, cas->addr); 3029 if (cas->expdHi) 3030 addUses_Expr(set, cas->expdHi); 3031 addUses_Expr(set, cas->expdLo); 3032 if (cas->dataHi) 3033 addUses_Expr(set, cas->dataHi); 3034 addUses_Expr(set, cas->dataLo); 3035 return; 3036 case Ist_LLSC: 3037 addUses_Expr(set, st->Ist.LLSC.addr); 3038 if (st->Ist.LLSC.storedata) 3039 addUses_Expr(set, st->Ist.LLSC.storedata); 3040 return; 3041 case Ist_Dirty: 3042 d = st->Ist.Dirty.details; 3043 if (d->mFx != Ifx_None) 3044 addUses_Expr(set, d->mAddr); 3045 addUses_Expr(set, d->guard); 3046 for (i = 0; d->args[i] != NULL; i++) { 3047 IRExpr* arg = d->args[i]; 3048 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) 3049 addUses_Expr(set, arg); 3050 } 3051 return; 3052 case Ist_NoOp: 3053 case Ist_IMark: 3054 case Ist_MBE: 3055 return; 3056 case Ist_Exit: 3057 addUses_Expr(set, st->Ist.Exit.guard); 3058 return; 3059 default: 3060 vex_printf("\n"); 3061 ppIRStmt(st); 3062 vpanic("addUses_Stmt"); 3063 } 3064} 3065 3066 3067/* Is this literally IRExpr_Const(IRConst_U1(False)) ? */ 3068static Bool isZeroU1 ( IRExpr* e ) 3069{ 3070 return toBool( e->tag == Iex_Const 3071 && e->Iex.Const.con->tag == Ico_U1 3072 && e->Iex.Const.con->Ico.U1 == False ); 3073} 3074 3075/* Is this literally IRExpr_Const(IRConst_U1(True)) ? */ 3076static Bool isOneU1 ( IRExpr* e ) 3077{ 3078 return toBool( e->tag == Iex_Const 3079 && e->Iex.Const.con->tag == Ico_U1 3080 && e->Iex.Const.con->Ico.U1 == True ); 3081} 3082 3083 3084/* Note, this destructively modifies the given IRSB. */ 3085 3086/* Scan backwards through statements, carrying a set of IRTemps which 3087 are known to be used after the current point. On encountering 't = 3088 E', delete the binding if it is not used. Otherwise, add any temp 3089 uses to the set and keep on moving backwards. 3090 3091 As an enhancement, the first (backwards) pass searches for IR exits 3092 with always-taken conditions and notes the location of the earliest 3093 one in the block. If any such are found, a second pass copies the 3094 exit destination and jump kind to the bb-end. Then, the exit and 3095 all statements following it are turned into no-ops. 3096*/ 3097 3098/* notstatic */ void do_deadcode_BB ( IRSB* bb ) 3099{ 3100 Int i, i_unconditional_exit; 3101 Int n_tmps = bb->tyenv->types_used; 3102 Bool* set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool)); 3103 IRStmt* st; 3104 3105 for (i = 0; i < n_tmps; i++) 3106 set[i] = False; 3107 3108 /* start off by recording IRTemp uses in the next field. */ 3109 addUses_Expr(set, bb->next); 3110 3111 /* First pass */ 3112 3113 /* Work backwards through the stmts */ 3114 i_unconditional_exit = -1; 3115 for (i = bb->stmts_used-1; i >= 0; i--) { 3116 st = bb->stmts[i]; 3117 if (st->tag == Ist_NoOp) 3118 continue; 3119 /* take note of any unconditional exits */ 3120 if (st->tag == Ist_Exit 3121 && isOneU1(st->Ist.Exit.guard)) 3122 i_unconditional_exit = i; 3123 if (st->tag == Ist_WrTmp 3124 && set[(Int)(st->Ist.WrTmp.tmp)] == False) { 3125 /* it's an IRTemp which never got used. Delete it. */ 3126 if (DEBUG_IROPT) { 3127 vex_printf("DEAD: "); 3128 ppIRStmt(st); 3129 vex_printf("\n"); 3130 } 3131 bb->stmts[i] = IRStmt_NoOp(); 3132 } 3133 else 3134 if (st->tag == Ist_Dirty 3135 && st->Ist.Dirty.details->guard 3136 && isZeroU1(st->Ist.Dirty.details->guard)) { 3137 /* This is a dirty helper which will never get called. 3138 Delete it. */ 3139 bb->stmts[i] = IRStmt_NoOp(); 3140 } 3141 else { 3142 /* Note any IRTemp uses made by the current statement. */ 3143 addUses_Stmt(set, st); 3144 } 3145 } 3146 3147 /* Optional second pass: if any unconditional exits were found, 3148 delete them and all following statements. */ 3149 3150 if (i_unconditional_exit != -1) { 3151 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n", 3152 i_unconditional_exit); 3153 vassert(i_unconditional_exit >= 0 3154 && i_unconditional_exit < bb->stmts_used); 3155 bb->next 3156 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst ); 3157 bb->jumpkind 3158 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk; 3159 bb->offsIP 3160 = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP; 3161 for (i = i_unconditional_exit; i < bb->stmts_used; i++) 3162 bb->stmts[i] = IRStmt_NoOp(); 3163 } 3164} 3165 3166 3167/*---------------------------------------------------------------*/ 3168/*--- Specialisation of helper function calls, in ---*/ 3169/*--- collaboration with the front end ---*/ 3170/*---------------------------------------------------------------*/ 3171 3172static 3173IRSB* spec_helpers_BB( 3174 IRSB* bb, 3175 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int) 3176 ) 3177{ 3178 Int i; 3179 IRStmt* st; 3180 IRExpr* ex; 3181 Bool any = False; 3182 3183 for (i = bb->stmts_used-1; i >= 0; i--) { 3184 st = bb->stmts[i]; 3185 3186 if (st->tag != Ist_WrTmp 3187 || st->Ist.WrTmp.data->tag != Iex_CCall) 3188 continue; 3189 3190 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name, 3191 st->Ist.WrTmp.data->Iex.CCall.args, 3192 &bb->stmts[0], i ); 3193 if (!ex) 3194 /* the front end can't think of a suitable replacement */ 3195 continue; 3196 3197 /* We got something better. Install it in the bb. */ 3198 any = True; 3199 bb->stmts[i] 3200 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex); 3201 3202 if (0) { 3203 vex_printf("SPEC: "); 3204 ppIRExpr(st->Ist.WrTmp.data); 3205 vex_printf(" --> "); 3206 ppIRExpr(ex); 3207 vex_printf("\n"); 3208 } 3209 } 3210 3211 if (any) 3212 bb = flatten_BB(bb); 3213 return bb; 3214} 3215 3216 3217/*---------------------------------------------------------------*/ 3218/*--- Determination of guest state aliasing relationships ---*/ 3219/*---------------------------------------------------------------*/ 3220 3221/* These are helper functions for CSE and GetI/PutI transformations. 3222 3223 Determine, to the extent possible, the relationship between two 3224 guest state accesses. The possible outcomes are: 3225 3226 * Exact alias. These two accesses denote precisely the same 3227 piece of the guest state. 3228 3229 * Definitely no alias. These two accesses are guaranteed not to 3230 overlap any part of the guest state. 3231 3232 * Unknown -- if neither of the above can be established. 3233 3234 If in doubt, return Unknown. */ 3235 3236typedef 3237 enum { ExactAlias, NoAlias, UnknownAlias } 3238 GSAliasing; 3239 3240 3241/* Produces the alias relation between an indexed guest 3242 state access and a non-indexed access. */ 3243 3244static 3245GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1, 3246 Int offset2, IRType ty2 ) 3247{ 3248 UInt minoff1, maxoff1, minoff2, maxoff2; 3249 3250 getArrayBounds( descr1, &minoff1, &maxoff1 ); 3251 minoff2 = offset2; 3252 maxoff2 = minoff2 + sizeofIRType(ty2) - 1; 3253 3254 if (maxoff1 < minoff2 || maxoff2 < minoff1) 3255 return NoAlias; 3256 3257 /* Could probably do better here if required. For the moment 3258 however just claim not to know anything more. */ 3259 return UnknownAlias; 3260} 3261 3262 3263/* Produces the alias relation between two indexed guest state 3264 accesses. */ 3265 3266static 3267GSAliasing getAliasingRelation_II ( 3268 IRRegArray* descr1, IRExpr* ix1, Int bias1, 3269 IRRegArray* descr2, IRExpr* ix2, Int bias2 3270 ) 3271{ 3272 UInt minoff1, maxoff1, minoff2, maxoff2; 3273 Int iters; 3274 3275 /* First try hard to show they don't alias. */ 3276 getArrayBounds( descr1, &minoff1, &maxoff1 ); 3277 getArrayBounds( descr2, &minoff2, &maxoff2 ); 3278 if (maxoff1 < minoff2 || maxoff2 < minoff1) 3279 return NoAlias; 3280 3281 /* So the two arrays at least partially overlap. To get any 3282 further we'll have to be sure that the descriptors are 3283 identical. */ 3284 if (!eqIRRegArray(descr1, descr2)) 3285 return UnknownAlias; 3286 3287 /* The descriptors are identical. Now the only difference can be 3288 in the index expressions. If they cannot be shown to be 3289 identical, we have to say we don't know what the aliasing 3290 relation will be. Now, since the IR is flattened, the index 3291 expressions should be atoms -- either consts or tmps. So that 3292 makes the comparison simple. */ 3293 vassert(isIRAtom(ix1)); 3294 vassert(isIRAtom(ix2)); 3295 if (!eqIRAtom(ix1,ix2)) 3296 return UnknownAlias; 3297 3298 /* Ok, the index expressions are identical. So now the only way 3299 they can be different is in the bias. Normalise this 3300 paranoidly, to reliably establish equality/non-equality. */ 3301 3302 /* So now we know that the GetI and PutI index the same array 3303 with the same base. Are the offsets the same, modulo the 3304 array size? Do this paranoidly. */ 3305 vassert(descr1->nElems == descr2->nElems); 3306 vassert(descr1->elemTy == descr2->elemTy); 3307 vassert(descr1->base == descr2->base); 3308 iters = 0; 3309 while (bias1 < 0 || bias2 < 0) { 3310 bias1 += descr1->nElems; 3311 bias2 += descr1->nElems; 3312 iters++; 3313 if (iters > 10) 3314 vpanic("getAliasingRelation: iters"); 3315 } 3316 vassert(bias1 >= 0 && bias2 >= 0); 3317 bias1 %= descr1->nElems; 3318 bias2 %= descr1->nElems; 3319 vassert(bias1 >= 0 && bias1 < descr1->nElems); 3320 vassert(bias2 >= 0 && bias2 < descr1->nElems); 3321 3322 /* Finally, biasP and biasG are normalised into the range 3323 0 .. descrP/G->nElems - 1. And so we can establish 3324 equality/non-equality. */ 3325 3326 return bias1==bias2 ? ExactAlias : NoAlias; 3327} 3328 3329 3330/*---------------------------------------------------------------*/ 3331/*--- Common Subexpression Elimination ---*/ 3332/*---------------------------------------------------------------*/ 3333 3334/* Expensive in time and space. */ 3335 3336/* Uses two environments: 3337 a IRTemp -> IRTemp mapping 3338 a mapping from AvailExpr* to IRTemp 3339*/ 3340 3341typedef 3342 struct { 3343 enum { TCc, TCt } tag; 3344 union { IRTemp tmp; IRConst* con; } u; 3345 } 3346 TmpOrConst; 3347 3348static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 ) 3349{ 3350 if (tc1->tag != tc2->tag) 3351 return False; 3352 switch (tc1->tag) { 3353 case TCc: 3354 return eqIRConst(tc1->u.con, tc2->u.con); 3355 case TCt: 3356 return tc1->u.tmp == tc2->u.tmp; 3357 default: 3358 vpanic("eqTmpOrConst"); 3359 } 3360} 3361 3362static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 ) 3363{ 3364 Bool eq = cee1->addr == cee2->addr; 3365 if (eq) { 3366 vassert(cee1->regparms == cee2->regparms); 3367 vassert(cee1->mcx_mask == cee2->mcx_mask); 3368 /* Names should be the same too, but we don't bother to 3369 check. */ 3370 } 3371 return eq; 3372} 3373 3374/* Convert an atomic IRExpr* to a TmpOrConst. */ 3375static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e ) 3376{ 3377 switch (e->tag) { 3378 case Iex_RdTmp: 3379 tc->tag = TCt; 3380 tc->u.tmp = e->Iex.RdTmp.tmp; 3381 break; 3382 case Iex_Const: 3383 tc->tag = TCc; 3384 tc->u.con = e->Iex.Const.con; 3385 break; 3386 default: 3387 /* Getting here is a serious error. It means that the 3388 presented arg isn't an IR atom, as it should be. */ 3389 vpanic("irExpr_to_TmpOrConst"); 3390 } 3391} 3392 3393/* Convert a TmpOrConst to an atomic IRExpr*. */ 3394static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc ) 3395{ 3396 switch (tc->tag) { 3397 case TCc: return IRExpr_Const(tc->u.con); 3398 case TCt: return IRExpr_RdTmp(tc->u.tmp); 3399 default: vpanic("tmpOrConst_to_IRExpr"); 3400 } 3401} 3402 3403/* Convert a NULL terminated IRExpr* vector to an array of 3404 TmpOrConsts, and a length. */ 3405static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs, 3406 /*OUT*/Int* nOuts, 3407 IRExpr** ins ) 3408{ 3409 Int i, n; 3410 /* We have to make two passes, one to count, one to copy. */ 3411 for (n = 0; ins[n]; n++) 3412 ; 3413 *outs = LibVEX_Alloc_inline(n * sizeof(TmpOrConst)); 3414 *nOuts = n; 3415 /* and now copy .. */ 3416 for (i = 0; i < n; i++) { 3417 IRExpr* arg = ins[i]; 3418 TmpOrConst* dst = &(*outs)[i]; 3419 irExpr_to_TmpOrConst(dst, arg); 3420 } 3421} 3422 3423typedef 3424 struct { 3425 enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt, 3426 CCall, Load 3427 } tag; 3428 union { 3429 /* unop(tmp) */ 3430 struct { 3431 IROp op; 3432 IRTemp arg; 3433 } Ut; 3434 /* binop(tmp,tmp) */ 3435 struct { 3436 IROp op; 3437 IRTemp arg1; 3438 IRTemp arg2; 3439 } Btt; 3440 /* binop(tmp,const) */ 3441 struct { 3442 IROp op; 3443 IRTemp arg1; 3444 IRConst con2; 3445 } Btc; 3446 /* binop(const,tmp) */ 3447 struct { 3448 IROp op; 3449 IRConst con1; 3450 IRTemp arg2; 3451 } Bct; 3452 /* F64i-style const */ 3453 struct { 3454 ULong f64i; 3455 } Cf64i; 3456 /* ITE(tmp,tmp,tmp) */ 3457 struct { 3458 IRTemp co; 3459 IRTemp e1; 3460 IRTemp e0; 3461 } Ittt; 3462 /* ITE(tmp,tmp,const) */ 3463 struct { 3464 IRTemp co; 3465 IRTemp e1; 3466 IRConst con0; 3467 } Ittc; 3468 /* ITE(tmp,const,tmp) */ 3469 struct { 3470 IRTemp co; 3471 IRConst con1; 3472 IRTemp e0; 3473 } Itct; 3474 /* ITE(tmp,const,const) */ 3475 struct { 3476 IRTemp co; 3477 IRConst con1; 3478 IRConst con0; 3479 } Itcc; 3480 /* GetI(descr,tmp,bias)*/ 3481 struct { 3482 IRRegArray* descr; 3483 IRTemp ix; 3484 Int bias; 3485 } GetIt; 3486 /* Clean helper call */ 3487 struct { 3488 IRCallee* cee; 3489 TmpOrConst* args; 3490 Int nArgs; 3491 IRType retty; 3492 } CCall; 3493 /* Load(end,ty,addr) */ 3494 struct { 3495 IREndness end; 3496 IRType ty; 3497 TmpOrConst addr; 3498 } Load; 3499 } u; 3500 } 3501 AvailExpr; 3502 3503static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 ) 3504{ 3505 if (LIKELY(a1->tag != a2->tag)) 3506 return False; 3507 switch (a1->tag) { 3508 case Ut: 3509 return toBool( 3510 a1->u.Ut.op == a2->u.Ut.op 3511 && a1->u.Ut.arg == a2->u.Ut.arg); 3512 case Btt: 3513 return toBool( 3514 a1->u.Btt.op == a2->u.Btt.op 3515 && a1->u.Btt.arg1 == a2->u.Btt.arg1 3516 && a1->u.Btt.arg2 == a2->u.Btt.arg2); 3517 case Btc: 3518 return toBool( 3519 a1->u.Btc.op == a2->u.Btc.op 3520 && a1->u.Btc.arg1 == a2->u.Btc.arg1 3521 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2)); 3522 case Bct: 3523 return toBool( 3524 a1->u.Bct.op == a2->u.Bct.op 3525 && a1->u.Bct.arg2 == a2->u.Bct.arg2 3526 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1)); 3527 case Cf64i: 3528 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i); 3529 case Ittt: 3530 return toBool(a1->u.Ittt.co == a2->u.Ittt.co 3531 && a1->u.Ittt.e1 == a2->u.Ittt.e1 3532 && a1->u.Ittt.e0 == a2->u.Ittt.e0); 3533 case Ittc: 3534 return toBool(a1->u.Ittc.co == a2->u.Ittc.co 3535 && a1->u.Ittc.e1 == a2->u.Ittc.e1 3536 && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0)); 3537 case Itct: 3538 return toBool(a1->u.Itct.co == a2->u.Itct.co 3539 && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1) 3540 && a1->u.Itct.e0 == a2->u.Itct.e0); 3541 case Itcc: 3542 return toBool(a1->u.Itcc.co == a2->u.Itcc.co 3543 && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1) 3544 && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0)); 3545 case GetIt: 3546 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr) 3547 && a1->u.GetIt.ix == a2->u.GetIt.ix 3548 && a1->u.GetIt.bias == a2->u.GetIt.bias); 3549 case CCall: { 3550 Int i, n; 3551 Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs 3552 && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee); 3553 if (eq) { 3554 n = a1->u.CCall.nArgs; 3555 for (i = 0; i < n; i++) { 3556 if (!eqTmpOrConst( &a1->u.CCall.args[i], 3557 &a2->u.CCall.args[i] )) { 3558 eq = False; 3559 break; 3560 } 3561 } 3562 } 3563 if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty); 3564 return eq; 3565 } 3566 case Load: { 3567 Bool eq = toBool(a1->u.Load.end == a2->u.Load.end 3568 && a1->u.Load.ty == a2->u.Load.ty 3569 && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr)); 3570 return eq; 3571 } 3572 default: 3573 vpanic("eq_AvailExpr"); 3574 } 3575} 3576 3577static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae ) 3578{ 3579 IRConst *con, *con0, *con1; 3580 switch (ae->tag) { 3581 case Ut: 3582 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) ); 3583 case Btt: 3584 return IRExpr_Binop( ae->u.Btt.op, 3585 IRExpr_RdTmp(ae->u.Btt.arg1), 3586 IRExpr_RdTmp(ae->u.Btt.arg2) ); 3587 case Btc: 3588 con = LibVEX_Alloc_inline(sizeof(IRConst)); 3589 *con = ae->u.Btc.con2; 3590 return IRExpr_Binop( ae->u.Btc.op, 3591 IRExpr_RdTmp(ae->u.Btc.arg1), 3592 IRExpr_Const(con) ); 3593 case Bct: 3594 con = LibVEX_Alloc_inline(sizeof(IRConst)); 3595 *con = ae->u.Bct.con1; 3596 return IRExpr_Binop( ae->u.Bct.op, 3597 IRExpr_Const(con), 3598 IRExpr_RdTmp(ae->u.Bct.arg2) ); 3599 case Cf64i: 3600 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i)); 3601 case Ittt: 3602 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co), 3603 IRExpr_RdTmp(ae->u.Ittt.e1), 3604 IRExpr_RdTmp(ae->u.Ittt.e0)); 3605 case Ittc: 3606 con0 = LibVEX_Alloc_inline(sizeof(IRConst)); 3607 *con0 = ae->u.Ittc.con0; 3608 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co), 3609 IRExpr_RdTmp(ae->u.Ittc.e1), 3610 IRExpr_Const(con0)); 3611 case Itct: 3612 con1 = LibVEX_Alloc_inline(sizeof(IRConst)); 3613 *con1 = ae->u.Itct.con1; 3614 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co), 3615 IRExpr_Const(con1), 3616 IRExpr_RdTmp(ae->u.Itct.e0)); 3617 3618 case Itcc: 3619 con0 = LibVEX_Alloc_inline(sizeof(IRConst)); 3620 con1 = LibVEX_Alloc_inline(sizeof(IRConst)); 3621 *con0 = ae->u.Itcc.con0; 3622 *con1 = ae->u.Itcc.con1; 3623 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co), 3624 IRExpr_Const(con1), 3625 IRExpr_Const(con0)); 3626 case GetIt: 3627 return IRExpr_GetI(ae->u.GetIt.descr, 3628 IRExpr_RdTmp(ae->u.GetIt.ix), 3629 ae->u.GetIt.bias); 3630 case CCall: { 3631 Int i, n = ae->u.CCall.nArgs; 3632 vassert(n >= 0); 3633 IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*)); 3634 vec[n] = NULL; 3635 for (i = 0; i < n; i++) { 3636 vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]); 3637 } 3638 return IRExpr_CCall(ae->u.CCall.cee, 3639 ae->u.CCall.retty, 3640 vec); 3641 } 3642 case Load: 3643 return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty, 3644 tmpOrConst_to_IRExpr(&ae->u.Load.addr)); 3645 default: 3646 vpanic("availExpr_to_IRExpr"); 3647 } 3648} 3649 3650inline 3651static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp ) 3652{ 3653 HWord res; 3654 /* env :: IRTemp -> IRTemp */ 3655 if (lookupHHW( env, &res, (HWord)tmp )) 3656 return (IRTemp)res; 3657 else 3658 return tmp; 3659} 3660 3661inline 3662static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc, 3663 HashHW* env ) 3664{ 3665 /* env :: IRTemp -> IRTemp */ 3666 if (tc->tag == TCt) { 3667 tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp ); 3668 } 3669} 3670 3671static void subst_AvailExpr ( HashHW* env, AvailExpr* ae ) 3672{ 3673 /* env :: IRTemp -> IRTemp */ 3674 switch (ae->tag) { 3675 case Ut: 3676 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg ); 3677 break; 3678 case Btt: 3679 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 ); 3680 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 ); 3681 break; 3682 case Btc: 3683 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 ); 3684 break; 3685 case Bct: 3686 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 ); 3687 break; 3688 case Cf64i: 3689 break; 3690 case Ittt: 3691 ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co ); 3692 ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 ); 3693 ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 ); 3694 break; 3695 case Ittc: 3696 ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co ); 3697 ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 ); 3698 break; 3699 case Itct: 3700 ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co ); 3701 ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 ); 3702 break; 3703 case Itcc: 3704 ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co ); 3705 break; 3706 case GetIt: 3707 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix ); 3708 break; 3709 case CCall: { 3710 Int i, n = ae->u.CCall.nArgs;; 3711 for (i = 0; i < n; i++) { 3712 subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env); 3713 } 3714 break; 3715 } 3716 case Load: 3717 subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env); 3718 break; 3719 default: 3720 vpanic("subst_AvailExpr"); 3721 } 3722} 3723 3724static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd ) 3725{ 3726 AvailExpr* ae; 3727 3728 switch (e->tag) { 3729 case Iex_Unop: 3730 if (e->Iex.Unop.arg->tag == Iex_RdTmp) { 3731 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3732 ae->tag = Ut; 3733 ae->u.Ut.op = e->Iex.Unop.op; 3734 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp; 3735 return ae; 3736 } 3737 break; 3738 3739 case Iex_Binop: 3740 if (e->Iex.Binop.arg1->tag == Iex_RdTmp) { 3741 if (e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3742 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3743 ae->tag = Btt; 3744 ae->u.Btt.op = e->Iex.Binop.op; 3745 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3746 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3747 return ae; 3748 } 3749 if (e->Iex.Binop.arg2->tag == Iex_Const) { 3750 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3751 ae->tag = Btc; 3752 ae->u.Btc.op = e->Iex.Binop.op; 3753 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3754 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con); 3755 return ae; 3756 } 3757 } else if (e->Iex.Binop.arg1->tag == Iex_Const 3758 && e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3759 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3760 ae->tag = Bct; 3761 ae->u.Bct.op = e->Iex.Binop.op; 3762 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3763 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con); 3764 return ae; 3765 } 3766 break; 3767 3768 case Iex_Const: 3769 if (e->Iex.Const.con->tag == Ico_F64i) { 3770 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3771 ae->tag = Cf64i; 3772 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i; 3773 return ae; 3774 } 3775 break; 3776 3777 case Iex_ITE: 3778 if (e->Iex.ITE.cond->tag == Iex_RdTmp) { 3779 if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) { 3780 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) { 3781 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3782 ae->tag = Ittt; 3783 ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3784 ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp; 3785 ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp; 3786 return ae; 3787 } 3788 if (e->Iex.ITE.iftrue->tag == Iex_Const) { 3789 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3790 ae->tag = Itct; 3791 ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3792 ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con); 3793 ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp; 3794 return ae; 3795 } 3796 } else if (e->Iex.ITE.iffalse->tag == Iex_Const) { 3797 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) { 3798 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3799 ae->tag = Ittc; 3800 ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3801 ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp; 3802 ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con); 3803 return ae; 3804 } 3805 if (e->Iex.ITE.iftrue->tag == Iex_Const) { 3806 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3807 ae->tag = Itcc; 3808 ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3809 ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con); 3810 ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con); 3811 return ae; 3812 } 3813 } 3814 } 3815 break; 3816 3817 case Iex_GetI: 3818 if (e->Iex.GetI.ix->tag == Iex_RdTmp) { 3819 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3820 ae->tag = GetIt; 3821 ae->u.GetIt.descr = e->Iex.GetI.descr; 3822 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp; 3823 ae->u.GetIt.bias = e->Iex.GetI.bias; 3824 return ae; 3825 } 3826 break; 3827 3828 case Iex_CCall: 3829 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3830 ae->tag = CCall; 3831 /* Ok to share only the cee, since it is immutable. */ 3832 ae->u.CCall.cee = e->Iex.CCall.cee; 3833 ae->u.CCall.retty = e->Iex.CCall.retty; 3834 /* irExprVec_to_TmpOrConsts will assert if the args are 3835 neither tmps nor constants, but that's ok .. that's all they 3836 should be. */ 3837 irExprVec_to_TmpOrConsts( 3838 &ae->u.CCall.args, &ae->u.CCall.nArgs, 3839 e->Iex.CCall.args 3840 ); 3841 return ae; 3842 3843 case Iex_Load: 3844 /* If the caller of do_cse_BB has requested that loads also 3845 be CSEd, convert them into AvailExprs. If not, we'll just 3846 return NULL here, and the load never becomes considered 3847 "available", which effectively disables CSEing of them, as 3848 desired. */ 3849 if (allowLoadsToBeCSEd) { 3850 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3851 ae->tag = Load; 3852 ae->u.Load.end = e->Iex.Load.end; 3853 ae->u.Load.ty = e->Iex.Load.ty; 3854 irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr); 3855 return ae; 3856 } 3857 break; 3858 3859 default: 3860 break; 3861 } 3862 3863 return NULL; 3864} 3865 3866 3867/* The BB is modified in-place. Returns True if any changes were 3868 made. The caller can choose whether or not loads should be CSEd. 3869 In the normal course of things we don't do that, since CSEing loads 3870 is something of a dodgy proposition if the guest program is doing 3871 some screwy stuff to do with races and spinloops. */ 3872 3873static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) 3874{ 3875 Int i, j, paranoia; 3876 IRTemp t, q; 3877 IRStmt* st; 3878 AvailExpr* eprime; 3879 AvailExpr* ae; 3880 Bool invalidate; 3881 Bool anyDone = False; 3882 3883 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */ 3884 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */ 3885 3886 vassert(sizeof(IRTemp) <= sizeof(HWord)); 3887 3888 if (0) { ppIRSB(bb); vex_printf("\n\n"); } 3889 3890 /* Iterate forwards over the stmts. 3891 On seeing "t = E", where E is one of the AvailExpr forms: 3892 let E' = apply tenv substitution to E 3893 search aenv for E' 3894 if a mapping E' -> q is found, 3895 replace this stmt by "t = q" 3896 and add binding t -> q to tenv 3897 else 3898 add binding E' -> t to aenv 3899 replace this stmt by "t = E'" 3900 3901 Other statements are only interesting to the extent that they 3902 might invalidate some of the expressions in aenv. So there is 3903 an invalidate-bindings check for each statement seen. 3904 */ 3905 for (i = 0; i < bb->stmts_used; i++) { 3906 st = bb->stmts[i]; 3907 3908 /* ------ BEGIN invalidate aenv bindings ------ */ 3909 /* This is critical: remove from aenv any E' -> .. bindings 3910 which might be invalidated by this statement. The only 3911 vulnerable kind of bindings are the GetI and Load kinds. 3912 Dirty call - dump (paranoia level -> 2) 3913 Store - dump (ditto) 3914 Put, PutI - dump unless no-overlap is proven (.. -> 1) 3915 Uses getAliasingRelation_IC and getAliasingRelation_II 3916 to do the no-overlap assessments needed for Put/PutI. 3917 */ 3918 switch (st->tag) { 3919 case Ist_Dirty: case Ist_Store: case Ist_MBE: 3920 case Ist_CAS: case Ist_LLSC: 3921 case Ist_StoreG: 3922 paranoia = 2; break; 3923 case Ist_Put: case Ist_PutI: 3924 paranoia = 1; break; 3925 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint: 3926 case Ist_WrTmp: case Ist_Exit: case Ist_LoadG: 3927 paranoia = 0; break; 3928 default: 3929 vpanic("do_cse_BB(1)"); 3930 } 3931 3932 if (paranoia > 0) { 3933 for (j = 0; j < aenv->used; j++) { 3934 if (!aenv->inuse[j]) 3935 continue; 3936 ae = (AvailExpr*)aenv->key[j]; 3937 if (ae->tag != GetIt && ae->tag != Load) 3938 continue; 3939 invalidate = False; 3940 if (paranoia >= 2) { 3941 invalidate = True; 3942 } else { 3943 vassert(paranoia == 1); 3944 if (ae->tag == Load) { 3945 /* Loads can be invalidated by anything that could 3946 possibly touch memory. But in that case we 3947 should have |paranoia| == 2 and we won't get 3948 here. So there's nothing to do; we don't have to 3949 invalidate the load. */ 3950 } 3951 else 3952 if (st->tag == Ist_Put) { 3953 if (getAliasingRelation_IC( 3954 ae->u.GetIt.descr, 3955 IRExpr_RdTmp(ae->u.GetIt.ix), 3956 st->Ist.Put.offset, 3957 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) 3958 ) != NoAlias) 3959 invalidate = True; 3960 } 3961 else 3962 if (st->tag == Ist_PutI) { 3963 IRPutI *puti = st->Ist.PutI.details; 3964 if (getAliasingRelation_II( 3965 ae->u.GetIt.descr, 3966 IRExpr_RdTmp(ae->u.GetIt.ix), 3967 ae->u.GetIt.bias, 3968 puti->descr, 3969 puti->ix, 3970 puti->bias 3971 ) != NoAlias) 3972 invalidate = True; 3973 } 3974 else 3975 vpanic("do_cse_BB(2)"); 3976 } 3977 3978 if (invalidate) { 3979 aenv->inuse[j] = False; 3980 aenv->key[j] = (HWord)NULL; /* be sure */ 3981 } 3982 } /* for j */ 3983 } /* paranoia > 0 */ 3984 3985 /* ------ ENV invalidate aenv bindings ------ */ 3986 3987 /* ignore not-interestings */ 3988 if (st->tag != Ist_WrTmp) 3989 continue; 3990 3991 t = st->Ist.WrTmp.tmp; 3992 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd); 3993 /* ignore if not of AvailExpr form */ 3994 if (!eprime) 3995 continue; 3996 3997 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */ 3998 3999 /* apply tenv */ 4000 subst_AvailExpr( tenv, eprime ); 4001 4002 /* search aenv for eprime, unfortunately the hard way */ 4003 for (j = 0; j < aenv->used; j++) 4004 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j])) 4005 break; 4006 4007 if (j < aenv->used) { 4008 /* A binding E' -> q was found. Replace stmt by "t = q" and 4009 note the t->q binding in tenv. */ 4010 /* (this is the core of the CSE action) */ 4011 q = (IRTemp)aenv->val[j]; 4012 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) ); 4013 addToHHW( tenv, (HWord)t, (HWord)q ); 4014 anyDone = True; 4015 } else { 4016 /* No binding was found, so instead we add E' -> t to our 4017 collection of available expressions, replace this stmt 4018 with "t = E'", and move on. */ 4019 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) ); 4020 addToHHW( aenv, (HWord)eprime, (HWord)t ); 4021 } 4022 } 4023 4024 /* 4025 ppIRSB(bb); 4026 sanityCheckIRSB(bb, Ity_I32); 4027 vex_printf("\n\n"); 4028 */ 4029 return anyDone; 4030} 4031 4032 4033/*---------------------------------------------------------------*/ 4034/*--- Add32/Sub32 chain collapsing ---*/ 4035/*---------------------------------------------------------------*/ 4036 4037/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */ 4038 4039/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If 4040 yes, set *tmp and *i32 appropriately. *i32 is set as if the 4041 root node is Add32, not Sub32. */ 4042 4043static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 ) 4044{ 4045 if (e->tag != Iex_Binop) 4046 return False; 4047 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32) 4048 return False; 4049 if (e->Iex.Binop.arg1->tag != Iex_RdTmp) 4050 return False; 4051 if (e->Iex.Binop.arg2->tag != Iex_Const) 4052 return False; 4053 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 4054 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32); 4055 if (e->Iex.Binop.op == Iop_Sub32) 4056 *i32 = -*i32; 4057 return True; 4058} 4059 4060 4061/* Figure out if tmp can be expressed as tmp2 +32 const, for some 4062 other tmp2. Scan backwards from the specified start point -- an 4063 optimisation. */ 4064 4065static Bool collapseChain ( IRSB* bb, Int startHere, 4066 IRTemp tmp, 4067 IRTemp* tmp2, Int* i32 ) 4068{ 4069 Int j, ii; 4070 IRTemp vv; 4071 IRStmt* st; 4072 IRExpr* e; 4073 4074 /* the (var, con) pair contain the current 'representation' for 4075 'tmp'. We start with 'tmp + 0'. */ 4076 IRTemp var = tmp; 4077 Int con = 0; 4078 4079 /* Scan backwards to see if tmp can be replaced by some other tmp 4080 +/- a constant. */ 4081 for (j = startHere; j >= 0; j--) { 4082 st = bb->stmts[j]; 4083 if (st->tag != Ist_WrTmp) 4084 continue; 4085 if (st->Ist.WrTmp.tmp != var) 4086 continue; 4087 e = st->Ist.WrTmp.data; 4088 if (!isAdd32OrSub32(e, &vv, &ii)) 4089 break; 4090 var = vv; 4091 con += ii; 4092 } 4093 if (j == -1) 4094 /* no earlier binding for var .. ill-formed IR */ 4095 vpanic("collapseChain"); 4096 4097 /* so, did we find anything interesting? */ 4098 if (var == tmp) 4099 return False; /* no .. */ 4100 4101 *tmp2 = var; 4102 *i32 = con; 4103 return True; 4104} 4105 4106 4107/* ------- Main function for Add32/Sub32 chain collapsing ------ */ 4108 4109static void collapse_AddSub_chains_BB ( IRSB* bb ) 4110{ 4111 IRStmt *st; 4112 IRTemp var, var2; 4113 Int i, con, con2; 4114 4115 for (i = bb->stmts_used-1; i >= 0; i--) { 4116 st = bb->stmts[i]; 4117 if (st->tag == Ist_NoOp) 4118 continue; 4119 4120 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */ 4121 4122 if (st->tag == Ist_WrTmp 4123 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) { 4124 4125 /* So e1 is of the form Add32(var,con) or Sub32(var,-con). 4126 Find out if var can be expressed as var2 + con2. */ 4127 if (collapseChain(bb, i-1, var, &var2, &con2)) { 4128 if (DEBUG_IROPT) { 4129 vex_printf("replacing1 "); 4130 ppIRStmt(st); 4131 vex_printf(" with "); 4132 } 4133 con2 += con; 4134 bb->stmts[i] 4135 = IRStmt_WrTmp( 4136 st->Ist.WrTmp.tmp, 4137 (con2 >= 0) 4138 ? IRExpr_Binop(Iop_Add32, 4139 IRExpr_RdTmp(var2), 4140 IRExpr_Const(IRConst_U32(con2))) 4141 : IRExpr_Binop(Iop_Sub32, 4142 IRExpr_RdTmp(var2), 4143 IRExpr_Const(IRConst_U32(-con2))) 4144 ); 4145 if (DEBUG_IROPT) { 4146 ppIRStmt(bb->stmts[i]); 4147 vex_printf("\n"); 4148 } 4149 } 4150 4151 continue; 4152 } 4153 4154 /* Try to collapse 't1 = GetI[t2, con]'. */ 4155 4156 if (st->tag == Ist_WrTmp 4157 && st->Ist.WrTmp.data->tag == Iex_GetI 4158 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp 4159 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix 4160 ->Iex.RdTmp.tmp, &var2, &con2)) { 4161 if (DEBUG_IROPT) { 4162 vex_printf("replacing3 "); 4163 ppIRStmt(st); 4164 vex_printf(" with "); 4165 } 4166 con2 += st->Ist.WrTmp.data->Iex.GetI.bias; 4167 bb->stmts[i] 4168 = IRStmt_WrTmp( 4169 st->Ist.WrTmp.tmp, 4170 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr, 4171 IRExpr_RdTmp(var2), 4172 con2)); 4173 if (DEBUG_IROPT) { 4174 ppIRStmt(bb->stmts[i]); 4175 vex_printf("\n"); 4176 } 4177 continue; 4178 } 4179 4180 /* Perhaps st is PutI[t, con] ? */ 4181 IRPutI *puti = st->Ist.PutI.details; 4182 if (st->tag == Ist_PutI 4183 && puti->ix->tag == Iex_RdTmp 4184 && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp, 4185 &var2, &con2)) { 4186 if (DEBUG_IROPT) { 4187 vex_printf("replacing2 "); 4188 ppIRStmt(st); 4189 vex_printf(" with "); 4190 } 4191 con2 += puti->bias; 4192 bb->stmts[i] 4193 = IRStmt_PutI(mkIRPutI(puti->descr, 4194 IRExpr_RdTmp(var2), 4195 con2, 4196 puti->data)); 4197 if (DEBUG_IROPT) { 4198 ppIRStmt(bb->stmts[i]); 4199 vex_printf("\n"); 4200 } 4201 continue; 4202 } 4203 4204 } /* for */ 4205} 4206 4207 4208/*---------------------------------------------------------------*/ 4209/*--- PutI/GetI transformations ---*/ 4210/*---------------------------------------------------------------*/ 4211 4212/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from 4213 the given starting point to find, if any, a PutI which writes 4214 exactly the same piece of guest state, and so return the expression 4215 that the PutI writes. This is the core of PutI-GetI forwarding. */ 4216 4217static 4218IRExpr* findPutI ( IRSB* bb, Int startHere, 4219 IRRegArray* descrG, IRExpr* ixG, Int biasG ) 4220{ 4221 Int j; 4222 IRStmt* st; 4223 GSAliasing relation; 4224 4225 if (0) { 4226 vex_printf("\nfindPutI "); 4227 ppIRRegArray(descrG); 4228 vex_printf(" "); 4229 ppIRExpr(ixG); 4230 vex_printf(" %d\n", biasG); 4231 } 4232 4233 /* Scan backwards in bb from startHere to find a suitable PutI 4234 binding for (descrG, ixG, biasG), if any. */ 4235 4236 for (j = startHere; j >= 0; j--) { 4237 st = bb->stmts[j]; 4238 if (st->tag == Ist_NoOp) 4239 continue; 4240 4241 if (st->tag == Ist_Put) { 4242 /* Non-indexed Put. This can't give a binding, but we do 4243 need to check it doesn't invalidate the search by 4244 overlapping any part of the indexed guest state. */ 4245 4246 relation 4247 = getAliasingRelation_IC( 4248 descrG, ixG, 4249 st->Ist.Put.offset, 4250 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 4251 4252 if (relation == NoAlias) { 4253 /* we're OK; keep going */ 4254 continue; 4255 } else { 4256 /* relation == UnknownAlias || relation == ExactAlias */ 4257 /* If this assertion fails, we've found a Put which writes 4258 an area of guest state which is read by a GetI. Which 4259 is unlikely (although not per se wrong). */ 4260 vassert(relation != ExactAlias); 4261 /* This Put potentially writes guest state that the GetI 4262 reads; we must fail. */ 4263 return NULL; 4264 } 4265 } 4266 4267 if (st->tag == Ist_PutI) { 4268 IRPutI *puti = st->Ist.PutI.details; 4269 4270 relation = getAliasingRelation_II( 4271 descrG, ixG, biasG, 4272 puti->descr, 4273 puti->ix, 4274 puti->bias 4275 ); 4276 4277 if (relation == NoAlias) { 4278 /* This PutI definitely doesn't overlap. Ignore it and 4279 keep going. */ 4280 continue; /* the for j loop */ 4281 } 4282 4283 if (relation == UnknownAlias) { 4284 /* We don't know if this PutI writes to the same guest 4285 state that the GetI, or not. So we have to give up. */ 4286 return NULL; 4287 } 4288 4289 /* Otherwise, we've found what we're looking for. */ 4290 vassert(relation == ExactAlias); 4291 return puti->data; 4292 4293 } /* if (st->tag == Ist_PutI) */ 4294 4295 if (st->tag == Ist_Dirty) { 4296 /* Be conservative. If the dirty call has any guest effects at 4297 all, give up. We could do better -- only give up if there 4298 are any guest writes/modifies. */ 4299 if (st->Ist.Dirty.details->nFxState > 0) 4300 return NULL; 4301 } 4302 4303 } /* for */ 4304 4305 /* No valid replacement was found. */ 4306 return NULL; 4307} 4308 4309 4310 4311/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense 4312 that it writes exactly the same piece of guest state) ? Safe 4313 answer: False. */ 4314 4315static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 ) 4316{ 4317 vassert(pi->tag == Ist_PutI); 4318 if (s2->tag != Ist_PutI) 4319 return False; 4320 4321 IRPutI *p1 = pi->Ist.PutI.details; 4322 IRPutI *p2 = s2->Ist.PutI.details; 4323 4324 return toBool( 4325 getAliasingRelation_II( 4326 p1->descr, p1->ix, p1->bias, 4327 p2->descr, p2->ix, p2->bias 4328 ) 4329 == ExactAlias 4330 ); 4331} 4332 4333 4334/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might 4335 overlap it? Safe answer: True. Note, we could do a lot better 4336 than this if needed. */ 4337 4338static 4339Bool guestAccessWhichMightOverlapPutI ( 4340 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2 4341 ) 4342{ 4343 GSAliasing relation; 4344 UInt minoffP, maxoffP; 4345 4346 vassert(pi->tag == Ist_PutI); 4347 4348 IRPutI *p1 = pi->Ist.PutI.details; 4349 4350 getArrayBounds(p1->descr, &minoffP, &maxoffP); 4351 switch (s2->tag) { 4352 4353 case Ist_NoOp: 4354 case Ist_IMark: 4355 return False; 4356 4357 case Ist_MBE: 4358 case Ist_AbiHint: 4359 /* just be paranoid ... these should be rare. */ 4360 return True; 4361 4362 case Ist_CAS: 4363 /* This is unbelievably lame, but it's probably not 4364 significant from a performance point of view. Really, a 4365 CAS is a load-store op, so it should be safe to say False. 4366 However .. */ 4367 return True; 4368 4369 case Ist_Dirty: 4370 /* If the dirty call has any guest effects at all, give up. 4371 Probably could do better. */ 4372 if (s2->Ist.Dirty.details->nFxState > 0) 4373 return True; 4374 return False; 4375 4376 case Ist_Put: 4377 vassert(isIRAtom(s2->Ist.Put.data)); 4378 relation 4379 = getAliasingRelation_IC( 4380 p1->descr, p1->ix, 4381 s2->Ist.Put.offset, 4382 typeOfIRExpr(tyenv,s2->Ist.Put.data) 4383 ); 4384 goto have_relation; 4385 4386 case Ist_PutI: { 4387 IRPutI *p2 = s2->Ist.PutI.details; 4388 4389 vassert(isIRAtom(p2->ix)); 4390 vassert(isIRAtom(p2->data)); 4391 relation 4392 = getAliasingRelation_II( 4393 p1->descr, p1->ix, p1->bias, 4394 p2->descr, p2->ix, p2->bias 4395 ); 4396 goto have_relation; 4397 } 4398 4399 case Ist_WrTmp: 4400 if (s2->Ist.WrTmp.data->tag == Iex_GetI) { 4401 relation 4402 = getAliasingRelation_II( 4403 p1->descr, p1->ix, p1->bias, 4404 s2->Ist.WrTmp.data->Iex.GetI.descr, 4405 s2->Ist.WrTmp.data->Iex.GetI.ix, 4406 s2->Ist.WrTmp.data->Iex.GetI.bias 4407 ); 4408 goto have_relation; 4409 } 4410 if (s2->Ist.WrTmp.data->tag == Iex_Get) { 4411 relation 4412 = getAliasingRelation_IC( 4413 p1->descr, p1->ix, 4414 s2->Ist.WrTmp.data->Iex.Get.offset, 4415 s2->Ist.WrTmp.data->Iex.Get.ty 4416 ); 4417 goto have_relation; 4418 } 4419 return False; 4420 4421 case Ist_Store: 4422 vassert(isIRAtom(s2->Ist.Store.addr)); 4423 vassert(isIRAtom(s2->Ist.Store.data)); 4424 return False; 4425 4426 default: 4427 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n"); 4428 vpanic("guestAccessWhichMightOverlapPutI"); 4429 } 4430 4431 have_relation: 4432 if (relation == NoAlias) 4433 return False; 4434 else 4435 return True; /* ExactAlias or UnknownAlias */ 4436} 4437 4438 4439 4440/* ---------- PutI/GetI transformations main functions --------- */ 4441 4442/* Remove redundant GetIs, to the extent that they can be detected. 4443 bb is modified in-place. */ 4444 4445static 4446void do_redundant_GetI_elimination ( IRSB* bb ) 4447{ 4448 Int i; 4449 IRStmt* st; 4450 4451 for (i = bb->stmts_used-1; i >= 0; i--) { 4452 st = bb->stmts[i]; 4453 if (st->tag == Ist_NoOp) 4454 continue; 4455 4456 if (st->tag == Ist_WrTmp 4457 && st->Ist.WrTmp.data->tag == Iex_GetI 4458 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) { 4459 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr; 4460 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix; 4461 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias; 4462 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias); 4463 if (replacement 4464 && isIRAtom(replacement) 4465 /* Make sure we're doing a type-safe transformation! */ 4466 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) { 4467 if (DEBUG_IROPT) { 4468 vex_printf("rGI: "); 4469 ppIRExpr(st->Ist.WrTmp.data); 4470 vex_printf(" -> "); 4471 ppIRExpr(replacement); 4472 vex_printf("\n"); 4473 } 4474 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement); 4475 } 4476 } 4477 } 4478 4479} 4480 4481 4482/* Remove redundant PutIs, to the extent which they can be detected. 4483 bb is modified in-place. */ 4484 4485static 4486void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl ) 4487{ 4488 Int i, j; 4489 Bool delete; 4490 IRStmt *st, *stj; 4491 4492 vassert(pxControl < VexRegUpdAllregsAtEachInsn); 4493 4494 for (i = 0; i < bb->stmts_used; i++) { 4495 st = bb->stmts[i]; 4496 if (st->tag != Ist_PutI) 4497 continue; 4498 /* Ok, search forwards from here to see if we can find another 4499 PutI which makes this one redundant, and dodging various 4500 hazards. Search forwards: 4501 * If conditional exit, give up (because anything after that 4502 does not postdominate this put). 4503 * If a Get which might overlap, give up (because this PutI 4504 not necessarily dead). 4505 * If a Put which is identical, stop with success. 4506 * If a Put which might overlap, but is not identical, give up. 4507 * If a dirty helper call which might write guest state, give up. 4508 * If a Put which definitely doesn't overlap, or any other 4509 kind of stmt, continue. 4510 */ 4511 delete = False; 4512 for (j = i+1; j < bb->stmts_used; j++) { 4513 stj = bb->stmts[j]; 4514 if (stj->tag == Ist_NoOp) 4515 continue; 4516 if (identicalPutIs(st, stj)) { 4517 /* success! */ 4518 delete = True; 4519 break; 4520 } 4521 if (stj->tag == Ist_Exit) 4522 /* give up */ 4523 break; 4524 if (st->tag == Ist_Dirty) 4525 /* give up; could do better here */ 4526 break; 4527 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj)) 4528 /* give up */ 4529 break; 4530 } 4531 4532 if (delete) { 4533 if (DEBUG_IROPT) { 4534 vex_printf("rPI: "); 4535 ppIRStmt(st); 4536 vex_printf("\n"); 4537 } 4538 bb->stmts[i] = IRStmt_NoOp(); 4539 } 4540 4541 } 4542} 4543 4544 4545/*---------------------------------------------------------------*/ 4546/*--- Loop unrolling ---*/ 4547/*---------------------------------------------------------------*/ 4548 4549/* Adjust all tmp values (names) in e by delta. e is destructively 4550 modified. */ 4551 4552static void deltaIRExpr ( IRExpr* e, Int delta ) 4553{ 4554 Int i; 4555 switch (e->tag) { 4556 case Iex_RdTmp: 4557 e->Iex.RdTmp.tmp += delta; 4558 break; 4559 case Iex_Get: 4560 case Iex_Const: 4561 break; 4562 case Iex_GetI: 4563 deltaIRExpr(e->Iex.GetI.ix, delta); 4564 break; 4565 case Iex_Qop: 4566 deltaIRExpr(e->Iex.Qop.details->arg1, delta); 4567 deltaIRExpr(e->Iex.Qop.details->arg2, delta); 4568 deltaIRExpr(e->Iex.Qop.details->arg3, delta); 4569 deltaIRExpr(e->Iex.Qop.details->arg4, delta); 4570 break; 4571 case Iex_Triop: 4572 deltaIRExpr(e->Iex.Triop.details->arg1, delta); 4573 deltaIRExpr(e->Iex.Triop.details->arg2, delta); 4574 deltaIRExpr(e->Iex.Triop.details->arg3, delta); 4575 break; 4576 case Iex_Binop: 4577 deltaIRExpr(e->Iex.Binop.arg1, delta); 4578 deltaIRExpr(e->Iex.Binop.arg2, delta); 4579 break; 4580 case Iex_Unop: 4581 deltaIRExpr(e->Iex.Unop.arg, delta); 4582 break; 4583 case Iex_Load: 4584 deltaIRExpr(e->Iex.Load.addr, delta); 4585 break; 4586 case Iex_CCall: 4587 for (i = 0; e->Iex.CCall.args[i]; i++) 4588 deltaIRExpr(e->Iex.CCall.args[i], delta); 4589 break; 4590 case Iex_ITE: 4591 deltaIRExpr(e->Iex.ITE.cond, delta); 4592 deltaIRExpr(e->Iex.ITE.iftrue, delta); 4593 deltaIRExpr(e->Iex.ITE.iffalse, delta); 4594 break; 4595 default: 4596 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4597 vpanic("deltaIRExpr"); 4598 } 4599} 4600 4601/* Adjust all tmp values (names) in st by delta. st is destructively 4602 modified. */ 4603 4604static void deltaIRStmt ( IRStmt* st, Int delta ) 4605{ 4606 Int i; 4607 IRDirty* d; 4608 switch (st->tag) { 4609 case Ist_NoOp: 4610 case Ist_IMark: 4611 case Ist_MBE: 4612 break; 4613 case Ist_AbiHint: 4614 deltaIRExpr(st->Ist.AbiHint.base, delta); 4615 deltaIRExpr(st->Ist.AbiHint.nia, delta); 4616 break; 4617 case Ist_Put: 4618 deltaIRExpr(st->Ist.Put.data, delta); 4619 break; 4620 case Ist_PutI: 4621 deltaIRExpr(st->Ist.PutI.details->ix, delta); 4622 deltaIRExpr(st->Ist.PutI.details->data, delta); 4623 break; 4624 case Ist_WrTmp: 4625 st->Ist.WrTmp.tmp += delta; 4626 deltaIRExpr(st->Ist.WrTmp.data, delta); 4627 break; 4628 case Ist_Exit: 4629 deltaIRExpr(st->Ist.Exit.guard, delta); 4630 break; 4631 case Ist_Store: 4632 deltaIRExpr(st->Ist.Store.addr, delta); 4633 deltaIRExpr(st->Ist.Store.data, delta); 4634 break; 4635 case Ist_StoreG: { 4636 IRStoreG* sg = st->Ist.StoreG.details; 4637 deltaIRExpr(sg->addr, delta); 4638 deltaIRExpr(sg->data, delta); 4639 deltaIRExpr(sg->guard, delta); 4640 break; 4641 } 4642 case Ist_LoadG: { 4643 IRLoadG* lg = st->Ist.LoadG.details; 4644 lg->dst += delta; 4645 deltaIRExpr(lg->addr, delta); 4646 deltaIRExpr(lg->alt, delta); 4647 deltaIRExpr(lg->guard, delta); 4648 break; 4649 } 4650 case Ist_CAS: 4651 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID) 4652 st->Ist.CAS.details->oldHi += delta; 4653 st->Ist.CAS.details->oldLo += delta; 4654 deltaIRExpr(st->Ist.CAS.details->addr, delta); 4655 if (st->Ist.CAS.details->expdHi) 4656 deltaIRExpr(st->Ist.CAS.details->expdHi, delta); 4657 deltaIRExpr(st->Ist.CAS.details->expdLo, delta); 4658 if (st->Ist.CAS.details->dataHi) 4659 deltaIRExpr(st->Ist.CAS.details->dataHi, delta); 4660 deltaIRExpr(st->Ist.CAS.details->dataLo, delta); 4661 break; 4662 case Ist_LLSC: 4663 st->Ist.LLSC.result += delta; 4664 deltaIRExpr(st->Ist.LLSC.addr, delta); 4665 if (st->Ist.LLSC.storedata) 4666 deltaIRExpr(st->Ist.LLSC.storedata, delta); 4667 break; 4668 case Ist_Dirty: 4669 d = st->Ist.Dirty.details; 4670 deltaIRExpr(d->guard, delta); 4671 for (i = 0; d->args[i]; i++) { 4672 IRExpr* arg = d->args[i]; 4673 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) 4674 deltaIRExpr(arg, delta); 4675 } 4676 if (d->tmp != IRTemp_INVALID) 4677 d->tmp += delta; 4678 if (d->mAddr) 4679 deltaIRExpr(d->mAddr, delta); 4680 break; 4681 default: 4682 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 4683 vpanic("deltaIRStmt"); 4684 } 4685} 4686 4687 4688/* If possible, return a loop-unrolled version of bb0. The original 4689 is changed. If not possible, return NULL. */ 4690 4691/* The two schemas considered are: 4692 4693 X: BODY; goto X 4694 4695 which unrolls to (eg) X: BODY;BODY; goto X 4696 4697 and 4698 4699 X: BODY; if (c) goto X; goto Y 4700 which trivially transforms to 4701 X: BODY; if (!c) goto Y; goto X; 4702 so it falls in the scope of the first case. 4703 4704 X and Y must be literal (guest) addresses. 4705*/ 4706 4707static Int calc_unroll_factor( IRSB* bb ) 4708{ 4709 Int n_stmts, i; 4710 4711 n_stmts = 0; 4712 for (i = 0; i < bb->stmts_used; i++) { 4713 if (bb->stmts[i]->tag != Ist_NoOp) 4714 n_stmts++; 4715 } 4716 4717 if (n_stmts <= vex_control.iropt_unroll_thresh/8) { 4718 if (vex_control.iropt_verbosity > 0) 4719 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n", 4720 n_stmts, 8* n_stmts); 4721 return 8; 4722 } 4723 if (n_stmts <= vex_control.iropt_unroll_thresh/4) { 4724 if (vex_control.iropt_verbosity > 0) 4725 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n", 4726 n_stmts, 4* n_stmts); 4727 return 4; 4728 } 4729 4730 if (n_stmts <= vex_control.iropt_unroll_thresh/2) { 4731 if (vex_control.iropt_verbosity > 0) 4732 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n", 4733 n_stmts, 2* n_stmts); 4734 return 2; 4735 } 4736 4737 if (vex_control.iropt_verbosity > 0) 4738 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts); 4739 4740 return 1; 4741} 4742 4743 4744static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr ) 4745{ 4746 Int i, j, jmax, n_vars; 4747 Bool xxx_known; 4748 Addr64 xxx_value, yyy_value; 4749 IRExpr* udst; 4750 IRStmt* st; 4751 IRConst* con; 4752 IRSB *bb1, *bb2; 4753 Int unroll_factor; 4754 4755 if (vex_control.iropt_unroll_thresh <= 0) 4756 return NULL; 4757 4758 /* First off, figure out if we can unroll this loop. Do this 4759 without modifying bb0. */ 4760 4761 if (bb0->jumpkind != Ijk_Boring) 4762 return NULL; 4763 4764 xxx_known = False; 4765 xxx_value = 0; 4766 4767 /* Extract the next-guest address. If it isn't a literal, we 4768 have to give up. */ 4769 4770 udst = bb0->next; 4771 if (udst->tag == Iex_Const 4772 && (udst->Iex.Const.con->tag == Ico_U32 4773 || udst->Iex.Const.con->tag == Ico_U64)) { 4774 /* The BB ends in a jump to a literal location. */ 4775 xxx_known = True; 4776 xxx_value = udst->Iex.Const.con->tag == Ico_U64 4777 ? udst->Iex.Const.con->Ico.U64 4778 : (Addr64)(udst->Iex.Const.con->Ico.U32); 4779 } 4780 4781 if (!xxx_known) 4782 return NULL; 4783 4784 /* Now we know the BB ends to a jump to a literal location. If 4785 it's a jump to itself (viz, idiom #1), move directly to the 4786 unrolling stage, first cloning the bb so the original isn't 4787 modified. */ 4788 if (xxx_value == my_addr) { 4789 unroll_factor = calc_unroll_factor( bb0 ); 4790 if (unroll_factor < 2) 4791 return NULL; 4792 bb1 = deepCopyIRSB( bb0 ); 4793 bb0 = NULL; 4794 udst = NULL; /* is now invalid */ 4795 goto do_unroll; 4796 } 4797 4798 /* Search for the second idiomatic form: 4799 X: BODY; if (c) goto X; goto Y 4800 We know Y, but need to establish that the last stmt 4801 is 'if (c) goto X'. 4802 */ 4803 yyy_value = xxx_value; 4804 for (i = bb0->stmts_used-1; i >= 0; i--) 4805 if (bb0->stmts[i]) 4806 break; 4807 4808 if (i < 0) 4809 return NULL; /* block with no stmts. Strange. */ 4810 4811 st = bb0->stmts[i]; 4812 if (st->tag != Ist_Exit) 4813 return NULL; 4814 if (st->Ist.Exit.jk != Ijk_Boring) 4815 return NULL; 4816 4817 con = st->Ist.Exit.dst; 4818 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4819 4820 xxx_value = con->tag == Ico_U64 4821 ? st->Ist.Exit.dst->Ico.U64 4822 : (Addr64)(st->Ist.Exit.dst->Ico.U32); 4823 4824 /* If this assertion fails, we have some kind of type error. */ 4825 vassert(con->tag == udst->Iex.Const.con->tag); 4826 4827 if (xxx_value != my_addr) 4828 /* We didn't find either idiom. Give up. */ 4829 return NULL; 4830 4831 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and 4832 yyy values (which makes it look like idiom #1), and go into 4833 unrolling proper. This means finding (again) the last stmt, in 4834 the copied BB. */ 4835 4836 unroll_factor = calc_unroll_factor( bb0 ); 4837 if (unroll_factor < 2) 4838 return NULL; 4839 4840 bb1 = deepCopyIRSB( bb0 ); 4841 bb0 = NULL; 4842 udst = NULL; /* is now invalid */ 4843 for (i = bb1->stmts_used-1; i >= 0; i--) 4844 if (bb1->stmts[i]) 4845 break; 4846 4847 /* The next bunch of assertions should be true since we already 4848 found and checked the last stmt in the original bb. */ 4849 4850 vassert(i >= 0); 4851 4852 st = bb1->stmts[i]; 4853 vassert(st->tag == Ist_Exit); 4854 4855 con = st->Ist.Exit.dst; 4856 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4857 4858 udst = bb1->next; 4859 vassert(udst->tag == Iex_Const); 4860 vassert(udst->Iex.Const.con->tag == Ico_U32 4861 || udst->Iex.Const.con->tag == Ico_U64); 4862 vassert(con->tag == udst->Iex.Const.con->tag); 4863 4864 /* switch the xxx and yyy fields around */ 4865 if (con->tag == Ico_U64) { 4866 udst->Iex.Const.con->Ico.U64 = xxx_value; 4867 con->Ico.U64 = yyy_value; 4868 } else { 4869 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value; 4870 con->Ico.U32 = (UInt)yyy_value; 4871 } 4872 4873 /* negate the test condition */ 4874 st->Ist.Exit.guard 4875 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard)); 4876 4877 /* --- The unroller proper. Both idioms are by now --- */ 4878 /* --- now converted to idiom 1. --- */ 4879 4880 do_unroll: 4881 4882 vassert(unroll_factor == 2 4883 || unroll_factor == 4 4884 || unroll_factor == 8); 4885 4886 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1); 4887 for (j = 1; j <= jmax; j++) { 4888 4889 n_vars = bb1->tyenv->types_used; 4890 4891 bb2 = deepCopyIRSB(bb1); 4892 for (i = 0; i < n_vars; i++) 4893 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]); 4894 4895 for (i = 0; i < bb2->stmts_used; i++) { 4896 /* deltaIRStmt destructively modifies the stmt, but 4897 that's OK since bb2 is a complete fresh copy of bb1. */ 4898 deltaIRStmt(bb2->stmts[i], n_vars); 4899 addStmtToIRSB(bb1, bb2->stmts[i]); 4900 } 4901 } 4902 4903 if (DEBUG_IROPT) { 4904 vex_printf("\nUNROLLED (%lx)\n", my_addr); 4905 ppIRSB(bb1); 4906 vex_printf("\n"); 4907 } 4908 4909 /* Flattening; sigh. The unroller succeeds in breaking flatness 4910 by negating the test condition. This should be fixed properly. 4911 For the moment use this shotgun approach. */ 4912 return flatten_BB(bb1); 4913} 4914 4915 4916/*---------------------------------------------------------------*/ 4917/*--- The tree builder ---*/ 4918/*---------------------------------------------------------------*/ 4919 4920/* This isn't part of IR optimisation. Really it's a pass done prior 4921 to instruction selection, which improves the code that the 4922 instruction selector can produce. */ 4923 4924/* --- The 'tmp' environment is the central data structure here --- */ 4925 4926/* The number of outstanding bindings we're prepared to track. 4927 The number of times the env becomes full and we have to dump 4928 the oldest binding (hence reducing code quality) falls very 4929 rapidly as the env size increases. 8 gives reasonable performance 4930 under most circumstances. */ 4931#define A_NENV 10 4932 4933/* An interval. Used to record the bytes in the guest state accessed 4934 by a Put[I] statement or by (one or more) Get[I] expression(s). In 4935 case of several Get[I] expressions, the lower/upper bounds are recorded. 4936 This is conservative but cheap. 4937 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107]. 4938 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at 4939 offset 200 would be recorded as [100,203] */ 4940typedef 4941 struct { 4942 Bool present; 4943 Int low; 4944 Int high; 4945 } 4946 Interval; 4947 4948static inline Bool 4949intervals_overlap(Interval i1, Interval i2) 4950{ 4951 return (i1.low >= i2.low && i1.low <= i2.high) || 4952 (i2.low >= i1.low && i2.low <= i1.high); 4953} 4954 4955static inline void 4956update_interval(Interval *i, Int low, Int high) 4957{ 4958 vassert(low <= high); 4959 4960 if (i->present) { 4961 if (low < i->low) i->low = low; 4962 if (high > i->high) i->high = high; 4963 } else { 4964 i->present = True; 4965 i->low = low; 4966 i->high = high; 4967 } 4968} 4969 4970 4971/* bindee == NULL === slot is not in use 4972 bindee != NULL === slot is in use 4973*/ 4974typedef 4975 struct { 4976 IRTemp binder; 4977 IRExpr* bindee; 4978 Bool doesLoad; 4979 /* Record the bytes of the guest state BINDEE reads from. */ 4980 Interval getInterval; 4981 } 4982 ATmpInfo; 4983 4984__attribute__((unused)) 4985static void ppAEnv ( ATmpInfo* env ) 4986{ 4987 Int i; 4988 for (i = 0; i < A_NENV; i++) { 4989 vex_printf("%d tmp %d val ", i, (Int)env[i].binder); 4990 if (env[i].bindee) 4991 ppIRExpr(env[i].bindee); 4992 else 4993 vex_printf("(null)"); 4994 vex_printf("\n"); 4995 } 4996} 4997 4998/* --- Tree-traversal fns --- */ 4999 5000/* Traverse an expr, and detect if any part of it reads memory or does 5001 a Get. Be careful ... this really controls how much the 5002 tree-builder can reorder the code, so getting it right is critical. 5003*/ 5004static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e ) 5005{ 5006 Int i; 5007 switch (e->tag) { 5008 case Iex_CCall: 5009 for (i = 0; e->Iex.CCall.args[i]; i++) 5010 setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]); 5011 return; 5012 case Iex_ITE: 5013 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond); 5014 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue); 5015 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse); 5016 return; 5017 case Iex_Qop: 5018 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1); 5019 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2); 5020 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3); 5021 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4); 5022 return; 5023 case Iex_Triop: 5024 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1); 5025 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2); 5026 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3); 5027 return; 5028 case Iex_Binop: 5029 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1); 5030 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2); 5031 return; 5032 case Iex_Unop: 5033 setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg); 5034 return; 5035 case Iex_Load: 5036 *doesLoad = True; 5037 setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr); 5038 return; 5039 case Iex_Get: { 5040 Int low = e->Iex.Get.offset; 5041 Int high = low + sizeofIRType(e->Iex.Get.ty) - 1; 5042 update_interval(getInterval, low, high); 5043 return; 5044 } 5045 case Iex_GetI: { 5046 IRRegArray *descr = e->Iex.GetI.descr; 5047 Int size = sizeofIRType(descr->elemTy); 5048 Int low = descr->base; 5049 Int high = low + descr->nElems * size - 1; 5050 update_interval(getInterval, low, high); 5051 setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix); 5052 return; 5053 } 5054 case Iex_RdTmp: 5055 case Iex_Const: 5056 return; 5057 default: 5058 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5059 vpanic("setHints_Expr"); 5060 } 5061} 5062 5063 5064/* Add a binding to the front of the env and slide all the rest 5065 backwards. It should be the case that the last slot is free. */ 5066static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee ) 5067{ 5068 Int i; 5069 vassert(env[A_NENV-1].bindee == NULL); 5070 for (i = A_NENV-1; i >= 1; i--) 5071 env[i] = env[i-1]; 5072 env[0].binder = binder; 5073 env[0].bindee = bindee; 5074 env[0].doesLoad = False; /* filled in later */ 5075 env[0].getInterval.present = False; /* filled in later */ 5076 env[0].getInterval.low = -1; /* filled in later */ 5077 env[0].getInterval.high = -1; /* filled in later */ 5078} 5079 5080/* Given uses :: array of UShort, indexed by IRTemp 5081 Add the use-occurrences of temps in this expression 5082 to the env. 5083*/ 5084static void aoccCount_Expr ( UShort* uses, IRExpr* e ) 5085{ 5086 Int i; 5087 5088 switch (e->tag) { 5089 5090 case Iex_RdTmp: /* the only interesting case */ 5091 uses[e->Iex.RdTmp.tmp]++; 5092 return; 5093 5094 case Iex_ITE: 5095 aoccCount_Expr(uses, e->Iex.ITE.cond); 5096 aoccCount_Expr(uses, e->Iex.ITE.iftrue); 5097 aoccCount_Expr(uses, e->Iex.ITE.iffalse); 5098 return; 5099 5100 case Iex_Qop: 5101 aoccCount_Expr(uses, e->Iex.Qop.details->arg1); 5102 aoccCount_Expr(uses, e->Iex.Qop.details->arg2); 5103 aoccCount_Expr(uses, e->Iex.Qop.details->arg3); 5104 aoccCount_Expr(uses, e->Iex.Qop.details->arg4); 5105 return; 5106 5107 case Iex_Triop: 5108 aoccCount_Expr(uses, e->Iex.Triop.details->arg1); 5109 aoccCount_Expr(uses, e->Iex.Triop.details->arg2); 5110 aoccCount_Expr(uses, e->Iex.Triop.details->arg3); 5111 return; 5112 5113 case Iex_Binop: 5114 aoccCount_Expr(uses, e->Iex.Binop.arg1); 5115 aoccCount_Expr(uses, e->Iex.Binop.arg2); 5116 return; 5117 5118 case Iex_Unop: 5119 aoccCount_Expr(uses, e->Iex.Unop.arg); 5120 return; 5121 5122 case Iex_Load: 5123 aoccCount_Expr(uses, e->Iex.Load.addr); 5124 return; 5125 5126 case Iex_CCall: 5127 for (i = 0; e->Iex.CCall.args[i]; i++) 5128 aoccCount_Expr(uses, e->Iex.CCall.args[i]); 5129 return; 5130 5131 case Iex_GetI: 5132 aoccCount_Expr(uses, e->Iex.GetI.ix); 5133 return; 5134 5135 case Iex_Const: 5136 case Iex_Get: 5137 return; 5138 5139 default: 5140 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5141 vpanic("aoccCount_Expr"); 5142 } 5143} 5144 5145 5146/* Given uses :: array of UShort, indexed by IRTemp 5147 Add the use-occurrences of temps in this statement 5148 to the env. 5149*/ 5150static void aoccCount_Stmt ( UShort* uses, IRStmt* st ) 5151{ 5152 Int i; 5153 IRDirty* d; 5154 IRCAS* cas; 5155 switch (st->tag) { 5156 case Ist_AbiHint: 5157 aoccCount_Expr(uses, st->Ist.AbiHint.base); 5158 aoccCount_Expr(uses, st->Ist.AbiHint.nia); 5159 return; 5160 case Ist_WrTmp: 5161 aoccCount_Expr(uses, st->Ist.WrTmp.data); 5162 return; 5163 case Ist_Put: 5164 aoccCount_Expr(uses, st->Ist.Put.data); 5165 return; 5166 case Ist_PutI: 5167 aoccCount_Expr(uses, st->Ist.PutI.details->ix); 5168 aoccCount_Expr(uses, st->Ist.PutI.details->data); 5169 return; 5170 case Ist_Store: 5171 aoccCount_Expr(uses, st->Ist.Store.addr); 5172 aoccCount_Expr(uses, st->Ist.Store.data); 5173 return; 5174 case Ist_StoreG: { 5175 IRStoreG* sg = st->Ist.StoreG.details; 5176 aoccCount_Expr(uses, sg->addr); 5177 aoccCount_Expr(uses, sg->data); 5178 aoccCount_Expr(uses, sg->guard); 5179 return; 5180 } 5181 case Ist_LoadG: { 5182 IRLoadG* lg = st->Ist.LoadG.details; 5183 aoccCount_Expr(uses, lg->addr); 5184 aoccCount_Expr(uses, lg->alt); 5185 aoccCount_Expr(uses, lg->guard); 5186 return; 5187 } 5188 case Ist_CAS: 5189 cas = st->Ist.CAS.details; 5190 aoccCount_Expr(uses, cas->addr); 5191 if (cas->expdHi) 5192 aoccCount_Expr(uses, cas->expdHi); 5193 aoccCount_Expr(uses, cas->expdLo); 5194 if (cas->dataHi) 5195 aoccCount_Expr(uses, cas->dataHi); 5196 aoccCount_Expr(uses, cas->dataLo); 5197 return; 5198 case Ist_LLSC: 5199 aoccCount_Expr(uses, st->Ist.LLSC.addr); 5200 if (st->Ist.LLSC.storedata) 5201 aoccCount_Expr(uses, st->Ist.LLSC.storedata); 5202 return; 5203 case Ist_Dirty: 5204 d = st->Ist.Dirty.details; 5205 if (d->mFx != Ifx_None) 5206 aoccCount_Expr(uses, d->mAddr); 5207 aoccCount_Expr(uses, d->guard); 5208 for (i = 0; d->args[i]; i++) { 5209 IRExpr* arg = d->args[i]; 5210 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) 5211 aoccCount_Expr(uses, arg); 5212 } 5213 return; 5214 case Ist_NoOp: 5215 case Ist_IMark: 5216 case Ist_MBE: 5217 return; 5218 case Ist_Exit: 5219 aoccCount_Expr(uses, st->Ist.Exit.guard); 5220 return; 5221 default: 5222 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 5223 vpanic("aoccCount_Stmt"); 5224 } 5225} 5226 5227/* Look up a binding for tmp in the env. If found, return the bound 5228 expression, and set the env's binding to NULL so it is marked as 5229 used. If not found, return NULL. */ 5230 5231static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp ) 5232{ 5233 Int i; 5234 for (i = 0; i < A_NENV; i++) { 5235 if (env[i].binder == tmp && env[i].bindee != NULL) { 5236 IRExpr* bindee = env[i].bindee; 5237 env[i].bindee = NULL; 5238 return bindee; 5239 } 5240 } 5241 return NULL; 5242} 5243 5244/* Traverse e, looking for temps. For each observed temp, see if env 5245 contains a binding for the temp, and if so return the bound value. 5246 The env has the property that any binding it holds is 5247 'single-shot', so once a binding is used, it is marked as no longer 5248 available, by setting its .bindee field to NULL. */ 5249 5250static inline Bool is_Unop ( IRExpr* e, IROp op ) { 5251 return e->tag == Iex_Unop && e->Iex.Unop.op == op; 5252} 5253static inline Bool is_Binop ( IRExpr* e, IROp op ) { 5254 return e->tag == Iex_Binop && e->Iex.Binop.op == op; 5255} 5256 5257static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 5258{ 5259 switch (op) { 5260 case Iop_Or32: 5261 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */ 5262 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32)) 5263 return IRExpr_Unop( Iop_CmpwNEZ32, 5264 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg, 5265 a2->Iex.Unop.arg ) ); 5266 break; 5267 5268 case Iop_CmpNE32: 5269 /* Since X has type Ity_I1 we can simplify: 5270 CmpNE32(1Uto32(X),0)) ==> X */ 5271 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2)) 5272 return a1->Iex.Unop.arg; 5273 break; 5274 5275 default: 5276 break; 5277 } 5278 /* no reduction rule applies */ 5279 return IRExpr_Binop( op, a1, a2 ); 5280} 5281 5282static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa ) 5283{ 5284 switch (op) { 5285 case Iop_CmpwNEZ64: 5286 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */ 5287 if (is_Unop(aa, Iop_CmpwNEZ64)) 5288 return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg ); 5289 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 5290 if (is_Binop(aa, Iop_Or64) 5291 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64)) 5292 return fold_IRExpr_Unop( 5293 Iop_CmpwNEZ64, 5294 IRExpr_Binop(Iop_Or64, 5295 aa->Iex.Binop.arg1->Iex.Unop.arg, 5296 aa->Iex.Binop.arg2)); 5297 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 5298 if (is_Binop(aa, Iop_Or64) 5299 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64)) 5300 return fold_IRExpr_Unop( 5301 Iop_CmpwNEZ64, 5302 IRExpr_Binop(Iop_Or64, 5303 aa->Iex.Binop.arg1, 5304 aa->Iex.Binop.arg2->Iex.Unop.arg)); 5305 break; 5306 case Iop_CmpNEZ64: 5307 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */ 5308 if (is_Unop(aa, Iop_Left64)) 5309 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg); 5310 /* CmpNEZ64( 1Uto64(X) ) --> X */ 5311 if (is_Unop(aa, Iop_1Uto64)) 5312 return aa->Iex.Unop.arg; 5313 break; 5314 case Iop_CmpwNEZ32: 5315 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */ 5316 if (is_Unop(aa, Iop_CmpwNEZ32)) 5317 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg ); 5318 break; 5319 case Iop_CmpNEZ32: 5320 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */ 5321 if (is_Unop(aa, Iop_Left32)) 5322 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg); 5323 /* CmpNEZ32( 1Uto32(X) ) --> X */ 5324 if (is_Unop(aa, Iop_1Uto32)) 5325 return aa->Iex.Unop.arg; 5326 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */ 5327 if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64)) 5328 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg); 5329 break; 5330 case Iop_CmpNEZ8: 5331 /* CmpNEZ8( 1Uto8(X) ) --> X */ 5332 if (is_Unop(aa, Iop_1Uto8)) 5333 return aa->Iex.Unop.arg; 5334 break; 5335 case Iop_Left32: 5336 /* Left32( Left32(x) ) --> Left32(x) */ 5337 if (is_Unop(aa, Iop_Left32)) 5338 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg ); 5339 break; 5340 case Iop_Left64: 5341 /* Left64( Left64(x) ) --> Left64(x) */ 5342 if (is_Unop(aa, Iop_Left64)) 5343 return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg ); 5344 break; 5345 case Iop_ZeroHI64ofV128: 5346 /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */ 5347 if (is_Unop(aa, Iop_ZeroHI64ofV128)) 5348 return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg ); 5349 break; 5350 case Iop_32to1: 5351 /* 32to1( 1Uto32 ( x ) ) --> x */ 5352 if (is_Unop(aa, Iop_1Uto32)) 5353 return aa->Iex.Unop.arg; 5354 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */ 5355 if (is_Unop(aa, Iop_CmpwNEZ32)) 5356 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg ); 5357 break; 5358 case Iop_64to1: 5359 /* 64to1( 1Uto64 ( x ) ) --> x */ 5360 if (is_Unop(aa, Iop_1Uto64)) 5361 return aa->Iex.Unop.arg; 5362 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */ 5363 if (is_Unop(aa, Iop_CmpwNEZ64)) 5364 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg ); 5365 break; 5366 case Iop_64to32: 5367 /* 64to32( 32Uto64 ( x )) --> x */ 5368 if (is_Unop(aa, Iop_32Uto64)) 5369 return aa->Iex.Unop.arg; 5370 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */ 5371 if (is_Unop(aa, Iop_8Uto64)) 5372 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg); 5373 break; 5374 5375 case Iop_32Uto64: 5376 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */ 5377 if (is_Unop(aa, Iop_8Uto32)) 5378 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg); 5379 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */ 5380 if (is_Unop(aa, Iop_16Uto32)) 5381 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg); 5382 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh )) 5383 --> Shr64( 32Uto64(64to32(x)), sh )) */ 5384 if (is_Unop(aa, Iop_64to32) 5385 && is_Binop(aa->Iex.Unop.arg, Iop_Shr64) 5386 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 5387 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 5388 Iop_64to32)) { 5389 return aa->Iex.Unop.arg; 5390 } 5391 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh )) 5392 --> 32Uto64(64to32( Shl64( x, sh )) */ 5393 if (is_Unop(aa, Iop_64to32) 5394 && is_Binop(aa->Iex.Unop.arg, Iop_Shl64) 5395 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 5396 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 5397 Iop_64to32)) { 5398 return 5399 IRExpr_Unop( 5400 Iop_32Uto64, 5401 IRExpr_Unop( 5402 Iop_64to32, 5403 IRExpr_Binop( 5404 Iop_Shl64, 5405 aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg, 5406 aa->Iex.Unop.arg->Iex.Binop.arg2 5407 ))); 5408 } 5409 break; 5410 5411 case Iop_1Sto32: 5412 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */ 5413 if (is_Unop(aa, Iop_CmpNEZ8) 5414 && is_Unop(aa->Iex.Unop.arg, Iop_32to8) 5415 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32) 5416 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg, 5417 Iop_CmpNEZ32)) { 5418 return IRExpr_Unop( Iop_CmpwNEZ32, 5419 aa->Iex.Unop.arg->Iex.Unop.arg 5420 ->Iex.Unop.arg->Iex.Unop.arg); 5421 } 5422 break; 5423 5424 default: 5425 break; 5426 } 5427 /* no reduction rule applies */ 5428 return IRExpr_Unop( op, aa ); 5429} 5430 5431static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e ) 5432{ 5433 IRExpr* e2; 5434 IRExpr** args2; 5435 Int i; 5436 5437 switch (e->tag) { 5438 5439 case Iex_CCall: 5440 args2 = shallowCopyIRExprVec(e->Iex.CCall.args); 5441 for (i = 0; args2[i]; i++) 5442 args2[i] = atbSubst_Expr(env,args2[i]); 5443 return IRExpr_CCall( 5444 e->Iex.CCall.cee, 5445 e->Iex.CCall.retty, 5446 args2 5447 ); 5448 case Iex_RdTmp: 5449 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp); 5450 return e2 ? e2 : e; 5451 case Iex_ITE: 5452 return IRExpr_ITE( 5453 atbSubst_Expr(env, e->Iex.ITE.cond), 5454 atbSubst_Expr(env, e->Iex.ITE.iftrue), 5455 atbSubst_Expr(env, e->Iex.ITE.iffalse) 5456 ); 5457 case Iex_Qop: 5458 return IRExpr_Qop( 5459 e->Iex.Qop.details->op, 5460 atbSubst_Expr(env, e->Iex.Qop.details->arg1), 5461 atbSubst_Expr(env, e->Iex.Qop.details->arg2), 5462 atbSubst_Expr(env, e->Iex.Qop.details->arg3), 5463 atbSubst_Expr(env, e->Iex.Qop.details->arg4) 5464 ); 5465 case Iex_Triop: 5466 return IRExpr_Triop( 5467 e->Iex.Triop.details->op, 5468 atbSubst_Expr(env, e->Iex.Triop.details->arg1), 5469 atbSubst_Expr(env, e->Iex.Triop.details->arg2), 5470 atbSubst_Expr(env, e->Iex.Triop.details->arg3) 5471 ); 5472 case Iex_Binop: 5473 return fold_IRExpr_Binop( 5474 e->Iex.Binop.op, 5475 atbSubst_Expr(env, e->Iex.Binop.arg1), 5476 atbSubst_Expr(env, e->Iex.Binop.arg2) 5477 ); 5478 case Iex_Unop: 5479 return fold_IRExpr_Unop( 5480 e->Iex.Unop.op, 5481 atbSubst_Expr(env, e->Iex.Unop.arg) 5482 ); 5483 case Iex_Load: 5484 return IRExpr_Load( 5485 e->Iex.Load.end, 5486 e->Iex.Load.ty, 5487 atbSubst_Expr(env, e->Iex.Load.addr) 5488 ); 5489 case Iex_GetI: 5490 return IRExpr_GetI( 5491 e->Iex.GetI.descr, 5492 atbSubst_Expr(env, e->Iex.GetI.ix), 5493 e->Iex.GetI.bias 5494 ); 5495 case Iex_Const: 5496 case Iex_Get: 5497 return e; 5498 default: 5499 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5500 vpanic("atbSubst_Expr"); 5501 } 5502} 5503 5504/* Same deal as atbSubst_Expr, except for stmts. */ 5505 5506static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) 5507{ 5508 Int i; 5509 IRDirty *d, *d2; 5510 IRCAS *cas, *cas2; 5511 IRPutI *puti, *puti2; 5512 5513 switch (st->tag) { 5514 case Ist_AbiHint: 5515 return IRStmt_AbiHint( 5516 atbSubst_Expr(env, st->Ist.AbiHint.base), 5517 st->Ist.AbiHint.len, 5518 atbSubst_Expr(env, st->Ist.AbiHint.nia) 5519 ); 5520 case Ist_Store: 5521 return IRStmt_Store( 5522 st->Ist.Store.end, 5523 atbSubst_Expr(env, st->Ist.Store.addr), 5524 atbSubst_Expr(env, st->Ist.Store.data) 5525 ); 5526 case Ist_StoreG: { 5527 IRStoreG* sg = st->Ist.StoreG.details; 5528 return IRStmt_StoreG(sg->end, 5529 atbSubst_Expr(env, sg->addr), 5530 atbSubst_Expr(env, sg->data), 5531 atbSubst_Expr(env, sg->guard)); 5532 } 5533 case Ist_LoadG: { 5534 IRLoadG* lg = st->Ist.LoadG.details; 5535 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, 5536 atbSubst_Expr(env, lg->addr), 5537 atbSubst_Expr(env, lg->alt), 5538 atbSubst_Expr(env, lg->guard)); 5539 } 5540 case Ist_WrTmp: 5541 return IRStmt_WrTmp( 5542 st->Ist.WrTmp.tmp, 5543 atbSubst_Expr(env, st->Ist.WrTmp.data) 5544 ); 5545 case Ist_Put: 5546 return IRStmt_Put( 5547 st->Ist.Put.offset, 5548 atbSubst_Expr(env, st->Ist.Put.data) 5549 ); 5550 case Ist_PutI: 5551 puti = st->Ist.PutI.details; 5552 puti2 = mkIRPutI(puti->descr, 5553 atbSubst_Expr(env, puti->ix), 5554 puti->bias, 5555 atbSubst_Expr(env, puti->data)); 5556 return IRStmt_PutI(puti2); 5557 5558 case Ist_Exit: 5559 return IRStmt_Exit( 5560 atbSubst_Expr(env, st->Ist.Exit.guard), 5561 st->Ist.Exit.jk, 5562 st->Ist.Exit.dst, 5563 st->Ist.Exit.offsIP 5564 ); 5565 case Ist_IMark: 5566 return IRStmt_IMark(st->Ist.IMark.addr, 5567 st->Ist.IMark.len, 5568 st->Ist.IMark.delta); 5569 case Ist_NoOp: 5570 return IRStmt_NoOp(); 5571 case Ist_MBE: 5572 return IRStmt_MBE(st->Ist.MBE.event); 5573 case Ist_CAS: 5574 cas = st->Ist.CAS.details; 5575 cas2 = mkIRCAS( 5576 cas->oldHi, cas->oldLo, cas->end, 5577 atbSubst_Expr(env, cas->addr), 5578 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL, 5579 atbSubst_Expr(env, cas->expdLo), 5580 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL, 5581 atbSubst_Expr(env, cas->dataLo) 5582 ); 5583 return IRStmt_CAS(cas2); 5584 case Ist_LLSC: 5585 return IRStmt_LLSC( 5586 st->Ist.LLSC.end, 5587 st->Ist.LLSC.result, 5588 atbSubst_Expr(env, st->Ist.LLSC.addr), 5589 st->Ist.LLSC.storedata 5590 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL 5591 ); 5592 case Ist_Dirty: 5593 d = st->Ist.Dirty.details; 5594 d2 = emptyIRDirty(); 5595 *d2 = *d; 5596 if (d2->mFx != Ifx_None) 5597 d2->mAddr = atbSubst_Expr(env, d2->mAddr); 5598 d2->guard = atbSubst_Expr(env, d2->guard); 5599 for (i = 0; d2->args[i]; i++) { 5600 IRExpr* arg = d2->args[i]; 5601 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) 5602 d2->args[i] = atbSubst_Expr(env, arg); 5603 } 5604 return IRStmt_Dirty(d2); 5605 default: 5606 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 5607 vpanic("atbSubst_Stmt"); 5608 } 5609} 5610 5611inline 5612static Bool dirty_helper_stores ( const IRDirty *d ) 5613{ 5614 return d->mFx == Ifx_Write || d->mFx == Ifx_Modify; 5615} 5616 5617inline 5618static Interval dirty_helper_puts ( 5619 const IRDirty *d, 5620 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 5621 VexRegisterUpdates pxControl, 5622 /*OUT*/Bool *requiresPreciseMemExns 5623 ) 5624{ 5625 Int i; 5626 Interval interval; 5627 5628 /* Passing the guest state pointer opens the door to modifying the 5629 guest state under the covers. It's not allowed, but let's be 5630 extra conservative and assume the worst. */ 5631 for (i = 0; d->args[i]; i++) { 5632 if (UNLIKELY(d->args[i]->tag == Iex_GSPTR)) { 5633 *requiresPreciseMemExns = True; 5634 /* Assume all guest state is written. */ 5635 interval.present = True; 5636 interval.low = 0; 5637 interval.high = 0x7FFFFFFF; 5638 return interval; 5639 } 5640 } 5641 5642 /* Check the side effects on the guest state */ 5643 interval.present = False; 5644 interval.low = interval.high = -1; 5645 *requiresPreciseMemExns = False; 5646 5647 for (i = 0; i < d->nFxState; ++i) { 5648 if (d->fxState[i].fx != Ifx_Read) { 5649 Int offset = d->fxState[i].offset; 5650 Int size = d->fxState[i].size; 5651 Int nRepeats = d->fxState[i].nRepeats; 5652 Int repeatLen = d->fxState[i].repeatLen; 5653 5654 if (preciseMemExnsFn(offset, 5655 offset + nRepeats * repeatLen + size - 1, 5656 pxControl)) { 5657 *requiresPreciseMemExns = True; 5658 } 5659 update_interval(&interval, offset, 5660 offset + nRepeats * repeatLen + size - 1); 5661 } 5662 } 5663 5664 return interval; 5665} 5666 5667/* Return an interval if st modifies the guest state. Via 5668 requiresPreciseMemExns return whether or not that modification 5669 requires precise exceptions. */ 5670static Interval stmt_modifies_guest_state ( 5671 IRSB *bb, const IRStmt *st, 5672 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 5673 VexRegisterUpdates pxControl, 5674 /*OUT*/Bool *requiresPreciseMemExns 5675 ) 5676{ 5677 Interval interval; 5678 5679 switch (st->tag) { 5680 case Ist_Put: { 5681 Int offset = st->Ist.Put.offset; 5682 Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data)); 5683 5684 *requiresPreciseMemExns 5685 = preciseMemExnsFn(offset, offset + size - 1, pxControl); 5686 interval.present = True; 5687 interval.low = offset; 5688 interval.high = offset + size - 1; 5689 return interval; 5690 } 5691 5692 case Ist_PutI: { 5693 IRRegArray *descr = st->Ist.PutI.details->descr; 5694 Int offset = descr->base; 5695 Int size = sizeofIRType(descr->elemTy); 5696 5697 /* We quietly assume here that all segments are contiguous and there 5698 are no holes. This is to avoid a loop. The assumption is conservative 5699 in the sense that we might report that precise memory exceptions are 5700 needed when in fact they are not. */ 5701 *requiresPreciseMemExns 5702 = preciseMemExnsFn(offset, offset + descr->nElems * size - 1, 5703 pxControl); 5704 interval.present = True; 5705 interval.low = offset; 5706 interval.high = offset + descr->nElems * size - 1; 5707 return interval; 5708 } 5709 5710 case Ist_Dirty: 5711 return dirty_helper_puts(st->Ist.Dirty.details, 5712 preciseMemExnsFn, pxControl, 5713 requiresPreciseMemExns); 5714 5715 default: 5716 *requiresPreciseMemExns = False; 5717 interval.present = False; 5718 interval.low = -1; 5719 interval.high = -1; 5720 return interval; 5721 } 5722} 5723 5724/* notstatic */ Addr ado_treebuild_BB ( 5725 IRSB* bb, 5726 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 5727 VexRegisterUpdates pxControl 5728 ) 5729{ 5730 Int i, j, k, m; 5731 Bool stmtStores, invalidateMe; 5732 Interval putInterval; 5733 IRStmt* st; 5734 IRStmt* st2; 5735 ATmpInfo env[A_NENV]; 5736 5737 Bool max_ga_known = False; 5738 Addr max_ga = 0; 5739 5740 Int n_tmps = bb->tyenv->types_used; 5741 UShort* uses = LibVEX_Alloc_inline(n_tmps * sizeof(UShort)); 5742 5743 /* Phase 1. Scan forwards in bb, counting use occurrences of each 5744 temp. Also count occurrences in the bb->next field. Take the 5745 opportunity to also find the maximum guest address in the block, 5746 since that will be needed later for deciding when we can safely 5747 elide event checks. */ 5748 5749 for (i = 0; i < n_tmps; i++) 5750 uses[i] = 0; 5751 5752 for (i = 0; i < bb->stmts_used; i++) { 5753 st = bb->stmts[i]; 5754 switch (st->tag) { 5755 case Ist_NoOp: 5756 continue; 5757 case Ist_IMark: { 5758 UInt len = st->Ist.IMark.len; 5759 Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1; 5760 max_ga_known = True; 5761 if (mga > max_ga) 5762 max_ga = mga; 5763 break; 5764 } 5765 default: 5766 break; 5767 } 5768 aoccCount_Stmt( uses, st ); 5769 } 5770 aoccCount_Expr(uses, bb->next ); 5771 5772# if 0 5773 for (i = 0; i < n_tmps; i++) { 5774 if (uses[i] == 0) 5775 continue; 5776 ppIRTemp( (IRTemp)i ); 5777 vex_printf(" used %d\n", (Int)uses[i] ); 5778 } 5779# endif 5780 5781 /* Phase 2. Scan forwards in bb. For each statement in turn: 5782 5783 If the env is full, emit the end element. This guarantees 5784 there is at least one free slot in the following. 5785 5786 On seeing 't = E', occ(t)==1, 5787 let E'=env(E) 5788 delete this stmt 5789 add t -> E' to the front of the env 5790 Examine E' and set the hints for E' appropriately 5791 (doesLoad? doesGet?) 5792 5793 On seeing any other stmt, 5794 let stmt' = env(stmt) 5795 remove from env any 't=E' binds invalidated by stmt 5796 emit the invalidated stmts 5797 emit stmt' 5798 compact any holes in env 5799 by sliding entries towards the front 5800 5801 Finally, apply env to bb->next. 5802 */ 5803 5804 for (i = 0; i < A_NENV; i++) { 5805 env[i].bindee = NULL; 5806 env[i].binder = IRTemp_INVALID; 5807 } 5808 5809 /* The stmts in bb are being reordered, and we are guaranteed to 5810 end up with no more than the number we started with. Use i to 5811 be the cursor of the current stmt examined and j <= i to be that 5812 for the current stmt being written. 5813 */ 5814 j = 0; 5815 for (i = 0; i < bb->stmts_used; i++) { 5816 5817 st = bb->stmts[i]; 5818 if (st->tag == Ist_NoOp) 5819 continue; 5820 5821 /* Ensure there's at least one space in the env, by emitting 5822 the oldest binding if necessary. */ 5823 if (env[A_NENV-1].bindee != NULL) { 5824 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder, 5825 env[A_NENV-1].bindee ); 5826 j++; 5827 vassert(j <= i); 5828 env[A_NENV-1].bindee = NULL; 5829 } 5830 5831 /* Consider current stmt. */ 5832 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) { 5833 IRExpr *e, *e2; 5834 5835 /* optional extra: dump dead bindings as we find them. 5836 Removes the need for a prior dead-code removal pass. */ 5837 if (uses[st->Ist.WrTmp.tmp] == 0) { 5838 if (0) vex_printf("DEAD binding\n"); 5839 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5840 } 5841 vassert(uses[st->Ist.WrTmp.tmp] == 1); 5842 5843 /* ok, we have 't = E', occ(t)==1. Do the abovementioned 5844 actions. */ 5845 e = st->Ist.WrTmp.data; 5846 e2 = atbSubst_Expr(env, e); 5847 addToEnvFront(env, st->Ist.WrTmp.tmp, e2); 5848 setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2); 5849 /* don't advance j, as we are deleting this stmt and instead 5850 holding it temporarily in the env. */ 5851 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5852 } 5853 5854 /* we get here for any other kind of statement. */ 5855 /* 'use up' any bindings required by the current statement. */ 5856 st2 = atbSubst_Stmt(env, st); 5857 5858 /* Now, before this stmt, dump any bindings in env that it 5859 invalidates. These need to be dumped in the order in which 5860 they originally entered env -- that means from oldest to 5861 youngest. */ 5862 5863 /* putInterval/stmtStores characterise what the stmt under 5864 consideration does, or might do (sidely safe @ True). */ 5865 5866 Bool putRequiresPreciseMemExns; 5867 putInterval = stmt_modifies_guest_state( 5868 bb, st, preciseMemExnsFn, pxControl, 5869 &putRequiresPreciseMemExns 5870 ); 5871 5872 /* be True if this stmt writes memory or might do (==> we don't 5873 want to reorder other loads or stores relative to it). Also, 5874 both LL and SC fall under this classification, since we 5875 really ought to be conservative and not reorder any other 5876 memory transactions relative to them. */ 5877 stmtStores 5878 = toBool( st->tag == Ist_Store 5879 || (st->tag == Ist_Dirty 5880 && dirty_helper_stores(st->Ist.Dirty.details)) 5881 || st->tag == Ist_LLSC 5882 || st->tag == Ist_CAS ); 5883 5884 for (k = A_NENV-1; k >= 0; k--) { 5885 if (env[k].bindee == NULL) 5886 continue; 5887 /* Compare the actions of this stmt with the actions of 5888 binding 'k', to see if they invalidate the binding. */ 5889 invalidateMe 5890 = toBool( 5891 /* a store invalidates loaded data */ 5892 (env[k].doesLoad && stmtStores) 5893 /* a put invalidates get'd data, if they overlap */ 5894 || ((env[k].getInterval.present && putInterval.present) && 5895 intervals_overlap(env[k].getInterval, putInterval)) 5896 /* a put invalidates loaded data. That means, in essense, that 5897 a load expression cannot be substituted into a statement 5898 that follows the put. But there is nothing wrong doing so 5899 except when the put statement requries precise exceptions. 5900 Think of a load that is moved past a put where the put 5901 updates the IP in the guest state. If the load generates 5902 a segfault, the wrong address (line number) would be 5903 reported. */ 5904 || (env[k].doesLoad && putInterval.present && 5905 putRequiresPreciseMemExns) 5906 /* probably overly conservative: a memory bus event 5907 invalidates absolutely everything, so that all 5908 computation prior to it is forced to complete before 5909 proceeding with the event (fence,lock,unlock). */ 5910 || st->tag == Ist_MBE 5911 /* also be (probably overly) paranoid re AbiHints */ 5912 || st->tag == Ist_AbiHint 5913 ); 5914 if (invalidateMe) { 5915 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee ); 5916 j++; 5917 vassert(j <= i); 5918 env[k].bindee = NULL; 5919 } 5920 } 5921 5922 /* Slide in-use entries in env up to the front */ 5923 m = 0; 5924 for (k = 0; k < A_NENV; k++) { 5925 if (env[k].bindee != NULL) { 5926 env[m] = env[k]; 5927 m++; 5928 } 5929 } 5930 for (m = m; m < A_NENV; m++) { 5931 env[m].bindee = NULL; 5932 } 5933 5934 /* finally, emit the substituted statement */ 5935 bb->stmts[j] = st2; 5936 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */ 5937 j++; 5938 5939 vassert(j <= i+1); 5940 } /* for each stmt in the original bb ... */ 5941 5942 /* Finally ... substitute the ->next field as much as possible, and 5943 dump any left-over bindings. Hmm. Perhaps there should be no 5944 left over bindings? Or any left-over bindings are 5945 by definition dead? */ 5946 bb->next = atbSubst_Expr(env, bb->next); 5947 bb->stmts_used = j; 5948 5949 return max_ga_known ? max_ga : ~(Addr)0; 5950} 5951 5952 5953/*---------------------------------------------------------------*/ 5954/*--- MSVC specific transformation hacks ---*/ 5955/*---------------------------------------------------------------*/ 5956 5957/* The purpose of all this is to find MSVC's idiom for non-constant 5958 bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into 5959 gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has 5960 generates a lot of false positives from the MSVC version because it 5961 doesn't understand that XORing an undefined bit with itself gives a 5962 defined result. 5963 5964 This isn't a problem for the simple case "x ^ x", because iropt 5965 folds it to a constant zero before Memcheck ever sees it. But in 5966 this case we have an intervening "& c" which defeats the simple 5967 case. So we have to carefully inspect all expressions rooted at an 5968 XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the 5969 7 other variants resulting from swapping the order of arguments to 5970 the three binary operations. If we get a match, we then replace 5971 the tree with "(a & ~c) | (b & c)", and Memcheck is happy. 5972 5973 The key difficulty is to spot the two uses of "a". To normalise 5974 the IR to maximise the chances of success, we first do a CSE pass, 5975 with CSEing of loads enabled, since the two "a" expressions may be 5976 loads, which need to be commoned up. Then we do a constant folding 5977 pass, so as to remove any tmp-to-tmp assignment chains that would 5978 make matching the original expression more difficult. 5979*/ 5980 5981 5982/* Helper function for debug printing */ 5983__attribute__((unused)) 5984static void print_flat_expr ( IRExpr** env, IRExpr* e ) 5985{ 5986 if (e == NULL) { 5987 vex_printf("?"); 5988 return; 5989 } 5990 switch (e->tag) { 5991 case Iex_Binop: { 5992 ppIROp(e->Iex.Binop.op); 5993 vex_printf("("); 5994 print_flat_expr(env, e->Iex.Binop.arg1); 5995 vex_printf(","); 5996 print_flat_expr(env, e->Iex.Binop.arg2); 5997 vex_printf(")"); 5998 break; 5999 } 6000 case Iex_Unop: { 6001 ppIROp(e->Iex.Unop.op); 6002 vex_printf("("); 6003 print_flat_expr(env, e->Iex.Unop.arg); 6004 vex_printf(")"); 6005 break; 6006 } 6007 case Iex_RdTmp: 6008 ppIRTemp(e->Iex.RdTmp.tmp); 6009 vex_printf("="); 6010 print_flat_expr(env, chase1(env, e)); 6011 break; 6012 case Iex_Const: 6013 case Iex_CCall: 6014 case Iex_Load: 6015 case Iex_ITE: 6016 case Iex_Get: 6017 ppIRExpr(e); 6018 break; 6019 default: 6020 vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n"); 6021 vassert(0); 6022 } 6023} 6024 6025/* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms) 6026 or any of the other 7 variants generated by switching the order 6027 of arguments to the outer ^, the inner ^ and the &. 6028*/ 6029static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb, 6030 /*OUT*/IRExpr** cc, 6031 IRExpr** env, IRExpr* e, 6032 IROp opAND, IROp opXOR) 6033{ 6034# define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \ 6035 && (_e)->Iex.Binop.op == (_op)) 6036# define ISATOM(_e) isIRAtom(_e) 6037# define STEP(_e) chase1(env, (_e)) 6038# define LL(_e) ((_e)->Iex.Binop.arg1) 6039# define RR(_e) ((_e)->Iex.Binop.arg2) 6040 6041 IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR; 6042 6043 /* This is common to all 8 cases */ 6044 if (!ISBIN(e, opXOR)) goto fail; 6045 6046 /* -----and------ */ 6047 /* --xor--- */ 6048 /* find variant 1: a1 ^ ((a2 ^ b) & c) */ 6049 /* find variant 2: a1 ^ ((b ^ a2) & c) */ 6050 a1 = and = xor = c = a2bL = a2bR = NULL; 6051 6052 a1 = LL(e); 6053 and = STEP(RR(e)); 6054 if (!ISBIN(and, opAND)) goto v34; 6055 xor = STEP(LL(and)); 6056 c = RR(and); 6057 if (!ISBIN(xor, opXOR)) goto v34; 6058 a2bL = LL(xor); 6059 a2bR = RR(xor); 6060 6061 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6062 *aa = a1; 6063 *bb = a2bR; 6064 *cc = c; 6065 return 1; 6066 } 6067 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6068 *aa = a1; 6069 *bb = a2bL; 6070 *cc = c; 6071 return 2; 6072 } 6073 6074 v34: 6075 /* -----and------ */ 6076 /* --xor--- */ 6077 /* find variant 3: ((a2 ^ b) & c) ^ a1 */ 6078 /* find variant 4: ((b ^ a2) & c) ^ a1 */ 6079 a1 = and = xor = c = a2bL = a2bR = NULL; 6080 6081 a1 = RR(e); 6082 and = STEP(LL(e)); 6083 if (!ISBIN(and, opAND)) goto v56; 6084 xor = STEP(LL(and)); 6085 c = RR(and); 6086 if (!ISBIN(xor, opXOR)) goto v56; 6087 a2bL = LL(xor); 6088 a2bR = RR(xor); 6089 6090 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6091 *aa = a1; 6092 *bb = a2bR; 6093 *cc = c; 6094 return 3; 6095 } 6096 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6097 *aa = a1; 6098 *bb = a2bL; 6099 *cc = c; 6100 return 4; 6101 } 6102 6103 v56: 6104 /* -----and------ */ 6105 /* --xor--- */ 6106 /* find variant 5: a1 ^ (c & (a2 ^ b)) */ 6107 /* find variant 6: a1 ^ (c & (b ^ a2)) */ 6108 a1 = and = xor = c = a2bL = a2bR = NULL; 6109 6110 a1 = LL(e); 6111 and = STEP(RR(e)); 6112 if (!ISBIN(and, opAND)) goto v78; 6113 xor = STEP(RR(and)); 6114 c = LL(and); 6115 if (!ISBIN(xor, opXOR)) goto v78; 6116 a2bL = LL(xor); 6117 a2bR = RR(xor); 6118 6119 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6120 *aa = a1; 6121 *bb = a2bR; 6122 *cc = c; 6123 vassert(5-5); // ATC 6124 return 5; 6125 } 6126 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6127 *aa = a1; 6128 *bb = a2bL; 6129 *cc = c; 6130 vassert(6-6); // ATC 6131 return 6; 6132 } 6133 6134 v78: 6135 /* -----and------ */ 6136 /* --xor--- */ 6137 /* find variant 7: (c & (a2 ^ b)) ^ a1 */ 6138 /* find variant 8: (c & (b ^ a2)) ^ a1 */ 6139 a1 = and = xor = c = a2bL = a2bR = NULL; 6140 6141 a1 = RR(e); 6142 and = STEP(LL(e)); 6143 if (!ISBIN(and, opAND)) goto fail; 6144 xor = STEP(RR(and)); 6145 c = LL(and); 6146 if (!ISBIN(xor, opXOR)) goto fail; 6147 a2bL = LL(xor); 6148 a2bR = RR(xor); 6149 6150 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6151 *aa = a1; 6152 *bb = a2bR; 6153 *cc = c; 6154 return 7; 6155 } 6156 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6157 *aa = a1; 6158 *bb = a2bL; 6159 *cc = c; 6160 return 8; 6161 } 6162 6163 fail: 6164 return 0; 6165 6166# undef ISBIN 6167# undef ISATOM 6168# undef STEP 6169# undef LL 6170# undef RR 6171} 6172 6173/* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other 6174 variants thereof generated by switching arguments around), return 6175 the IRExpr* for (a & ~c) | (b & c). Else return NULL. */ 6176static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e ) 6177{ 6178 if (e->tag != Iex_Binop) 6179 return NULL; 6180 6181 const HChar* tyNm = NULL; 6182 IROp opOR = Iop_INVALID; 6183 IROp opAND = Iop_INVALID; 6184 IROp opNOT = Iop_INVALID; 6185 IROp opXOR = Iop_INVALID; 6186 switch (e->Iex.Binop.op) { 6187 case Iop_Xor32: 6188 tyNm = "I32"; 6189 opOR = Iop_Or32; opAND = Iop_And32; 6190 opNOT = Iop_Not32; opXOR = Iop_Xor32; 6191 break; 6192 case Iop_Xor16: 6193 tyNm = "I16"; 6194 opOR = Iop_Or16; opAND = Iop_And16; 6195 opNOT = Iop_Not16; opXOR = Iop_Xor16; 6196 break; 6197 case Iop_Xor8: 6198 tyNm = "I8"; 6199 opOR = Iop_Or8; opAND = Iop_And8; 6200 opNOT = Iop_Not8; opXOR = Iop_Xor8; 6201 break; 6202 default: 6203 return NULL; 6204 } 6205 6206 IRExpr* aa = NULL; 6207 IRExpr* bb = NULL; 6208 IRExpr* cc = NULL; 6209 UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR); 6210 if (variant > 0) { 6211 static UInt ctr = 0; 6212 if (0) 6213 vex_printf("XXXXXXXXXX Bitfield Assignment number %u, " 6214 "type %s, variant %u\n", 6215 ++ctr, tyNm, variant); 6216 /* It's vitally important that the returned aa, bb and cc are 6217 atoms -- either constants or tmps. If it's anything else 6218 (eg, a GET) then incorporating them in a tree at this point 6219 in the SB may erroneously pull them forwards (eg of a PUT 6220 that originally was after the GET) and so transform the IR 6221 wrongly. spotBitfieldAssignment should guarantee only to 6222 give us atoms, but we check here anyway. */ 6223 vassert(aa && isIRAtom(aa)); 6224 vassert(bb && isIRAtom(bb)); 6225 vassert(cc && isIRAtom(cc)); 6226 return IRExpr_Binop( 6227 opOR, 6228 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)), 6229 IRExpr_Binop(opAND, bb, cc) 6230 ); 6231 } 6232 return NULL; 6233} 6234 6235 6236/* SB is modified in-place. Visit all the IRExprs and, for those 6237 which are allowed to be non-atomic, perform the XOR transform if 6238 possible. This makes |sb| be non-flat, but that's ok, the caller 6239 can re-flatten it. Returns True iff any changes were made. */ 6240static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb ) 6241{ 6242 Int i; 6243 Bool changed = False; 6244 6245 /* Make the tmp->expr environment, so we can use it for 6246 chasing expressions. */ 6247 Int n_tmps = sb->tyenv->types_used; 6248 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*)); 6249 for (i = 0; i < n_tmps; i++) 6250 env[i] = NULL; 6251 6252 for (i = 0; i < sb->stmts_used; i++) { 6253 IRStmt* st = sb->stmts[i]; 6254 if (st->tag != Ist_WrTmp) 6255 continue; 6256 IRTemp t = st->Ist.WrTmp.tmp; 6257 vassert(t >= 0 && t < n_tmps); 6258 env[t] = st->Ist.WrTmp.data; 6259 } 6260 6261 for (i = 0; i < sb->stmts_used; i++) { 6262 IRStmt* st = sb->stmts[i]; 6263 6264 switch (st->tag) { 6265 case Ist_AbiHint: 6266 vassert(isIRAtom(st->Ist.AbiHint.base)); 6267 vassert(isIRAtom(st->Ist.AbiHint.nia)); 6268 break; 6269 case Ist_Put: 6270 vassert(isIRAtom(st->Ist.Put.data)); 6271 break; 6272 case Ist_PutI: { 6273 IRPutI* puti = st->Ist.PutI.details; 6274 vassert(isIRAtom(puti->ix)); 6275 vassert(isIRAtom(puti->data)); 6276 break; 6277 } 6278 case Ist_WrTmp: { 6279 /* This is the one place where an expr (st->Ist.WrTmp.data) is 6280 allowed to be more than just a constant or a tmp. */ 6281 IRExpr* mb_new_data 6282 = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data); 6283 if (mb_new_data) { 6284 //ppIRSB(sb); 6285 st->Ist.WrTmp.data = mb_new_data; 6286 //ppIRSB(sb); 6287 changed = True; 6288 } 6289 break; 6290 } 6291 case Ist_Store: 6292 vassert(isIRAtom(st->Ist.Store.addr)); 6293 vassert(isIRAtom(st->Ist.Store.data)); 6294 break; 6295 case Ist_StoreG: { 6296 IRStoreG* sg = st->Ist.StoreG.details; 6297 vassert(isIRAtom(sg->addr)); 6298 vassert(isIRAtom(sg->data)); 6299 vassert(isIRAtom(sg->guard)); 6300 break; 6301 } 6302 case Ist_LoadG: { 6303 IRLoadG* lg = st->Ist.LoadG.details; 6304 vassert(isIRAtom(lg->addr)); 6305 vassert(isIRAtom(lg->alt)); 6306 vassert(isIRAtom(lg->guard)); 6307 break; 6308 } 6309 case Ist_CAS: { 6310 IRCAS* cas = st->Ist.CAS.details; 6311 vassert(isIRAtom(cas->addr)); 6312 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 6313 vassert(isIRAtom(cas->expdLo)); 6314 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 6315 vassert(isIRAtom(cas->dataLo)); 6316 break; 6317 } 6318 case Ist_LLSC: 6319 vassert(isIRAtom(st->Ist.LLSC.addr)); 6320 if (st->Ist.LLSC.storedata) 6321 vassert(isIRAtom(st->Ist.LLSC.storedata)); 6322 break; 6323 case Ist_Dirty: { 6324 IRDirty* d = st->Ist.Dirty.details; 6325 if (d->mFx != Ifx_None) { 6326 vassert(isIRAtom(d->mAddr)); 6327 } 6328 vassert(isIRAtom(d->guard)); 6329 for (Int j = 0; d->args[j]; j++) { 6330 IRExpr* arg = d->args[j]; 6331 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) { 6332 vassert(isIRAtom(arg)); 6333 } 6334 } 6335 break; 6336 } 6337 case Ist_IMark: 6338 case Ist_NoOp: 6339 case Ist_MBE: 6340 break; 6341 case Ist_Exit: 6342 vassert(isIRAtom(st->Ist.Exit.guard)); 6343 break; 6344 default: 6345 vex_printf("\n"); ppIRStmt(st); 6346 vpanic("do_XOR_TRANSFORMS_IRSB"); 6347 } 6348 } 6349 6350 vassert(isIRAtom(sb->next)); 6351 return changed; 6352} 6353 6354 6355static IRSB* do_MSVC_HACKS ( IRSB* sb ) 6356{ 6357 // Normalise as much as we can. This is the one-and-only place 6358 // where we call do_cse_BB with allowLoadsToBeCSEd set to True. 6359 Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ ); 6360 if (any_cse_changes) { 6361 // CSEing might have created dead code. Remove it. 6362 sb = cprop_BB ( sb ); 6363 do_deadcode_BB(sb); 6364 } 6365 6366 // Visit all atoms, do the transformation proper. bb is modified 6367 // in-place. 6368 Bool changed = do_XOR_TRANSFORM_IRSB(sb); 6369 6370 if (changed) { 6371 // The transformation generates non-flat expressions, so we now 6372 // need to re-flatten the block. 6373 sb = flatten_BB(sb); 6374 } 6375 6376 return sb; 6377} 6378 6379 6380/*---------------------------------------------------------------*/ 6381/*--- iropt main ---*/ 6382/*---------------------------------------------------------------*/ 6383 6384static Bool iropt_verbose = False; /* True; */ 6385 6386 6387/* Do a simple cleanup pass on bb. This is: redundant Get removal, 6388 redundant Put removal, constant propagation, dead code removal, 6389 clean helper specialisation, and dead code removal (again). 6390*/ 6391 6392 6393static 6394IRSB* cheap_transformations ( 6395 IRSB* bb, 6396 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int), 6397 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 6398 VexRegisterUpdates pxControl 6399 ) 6400{ 6401 redundant_get_removal_BB ( bb ); 6402 if (iropt_verbose) { 6403 vex_printf("\n========= REDUNDANT GET\n\n" ); 6404 ppIRSB(bb); 6405 } 6406 6407 if (pxControl < VexRegUpdAllregsAtEachInsn) { 6408 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl ); 6409 } 6410 if (iropt_verbose) { 6411 vex_printf("\n========= REDUNDANT PUT\n\n" ); 6412 ppIRSB(bb); 6413 } 6414 6415 bb = cprop_BB ( bb ); 6416 if (iropt_verbose) { 6417 vex_printf("\n========= CPROPD\n\n" ); 6418 ppIRSB(bb); 6419 } 6420 6421 do_deadcode_BB ( bb ); 6422 if (iropt_verbose) { 6423 vex_printf("\n========= DEAD\n\n" ); 6424 ppIRSB(bb); 6425 } 6426 6427 bb = spec_helpers_BB ( bb, specHelper ); 6428 do_deadcode_BB ( bb ); 6429 if (iropt_verbose) { 6430 vex_printf("\n========= SPECd \n\n" ); 6431 ppIRSB(bb); 6432 } 6433 6434 return bb; 6435} 6436 6437 6438/* Do some more expensive transformations on bb, which are aimed at 6439 optimising as much as possible in the presence of GetI and PutI. */ 6440 6441static 6442IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl ) 6443{ 6444 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6445 collapse_AddSub_chains_BB( bb ); 6446 do_redundant_GetI_elimination( bb ); 6447 if (pxControl < VexRegUpdAllregsAtEachInsn) { 6448 do_redundant_PutI_elimination( bb, pxControl ); 6449 } 6450 do_deadcode_BB( bb ); 6451 return bb; 6452} 6453 6454 6455/* Scan a flattened BB to look for signs that more expensive 6456 optimisations might be useful: 6457 - find out if there are any GetIs and PutIs 6458 - find out if there are any floating or vector-typed temporaries 6459*/ 6460 6461static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI, 6462 /*OUT*/Bool* hasVorFtemps, 6463 IRSB* bb ) 6464{ 6465 Int i, j; 6466 IRStmt* st; 6467 IRDirty* d; 6468 IRCAS* cas; 6469 6470 *hasGetIorPutI = False; 6471 *hasVorFtemps = False; 6472 6473 for (i = 0; i < bb->stmts_used; i++) { 6474 st = bb->stmts[i]; 6475 switch (st->tag) { 6476 case Ist_AbiHint: 6477 vassert(isIRAtom(st->Ist.AbiHint.base)); 6478 vassert(isIRAtom(st->Ist.AbiHint.nia)); 6479 break; 6480 case Ist_PutI: 6481 *hasGetIorPutI = True; 6482 break; 6483 case Ist_WrTmp: 6484 if (st->Ist.WrTmp.data->tag == Iex_GetI) 6485 *hasGetIorPutI = True; 6486 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) { 6487 case Ity_I1: case Ity_I8: case Ity_I16: 6488 case Ity_I32: case Ity_I64: case Ity_I128: 6489 break; 6490 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128: 6491 case Ity_V128: case Ity_V256: 6492 *hasVorFtemps = True; 6493 break; 6494 case Ity_D32: case Ity_D64: case Ity_D128: 6495 *hasVorFtemps = True; 6496 break; 6497 default: 6498 goto bad; 6499 } 6500 break; 6501 case Ist_Put: 6502 vassert(isIRAtom(st->Ist.Put.data)); 6503 break; 6504 case Ist_Store: 6505 vassert(isIRAtom(st->Ist.Store.addr)); 6506 vassert(isIRAtom(st->Ist.Store.data)); 6507 break; 6508 case Ist_StoreG: { 6509 IRStoreG* sg = st->Ist.StoreG.details; 6510 vassert(isIRAtom(sg->addr)); 6511 vassert(isIRAtom(sg->data)); 6512 vassert(isIRAtom(sg->guard)); 6513 break; 6514 } 6515 case Ist_LoadG: { 6516 IRLoadG* lg = st->Ist.LoadG.details; 6517 vassert(isIRAtom(lg->addr)); 6518 vassert(isIRAtom(lg->alt)); 6519 vassert(isIRAtom(lg->guard)); 6520 break; 6521 } 6522 case Ist_CAS: 6523 cas = st->Ist.CAS.details; 6524 vassert(isIRAtom(cas->addr)); 6525 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 6526 vassert(isIRAtom(cas->expdLo)); 6527 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 6528 vassert(isIRAtom(cas->dataLo)); 6529 break; 6530 case Ist_LLSC: 6531 vassert(isIRAtom(st->Ist.LLSC.addr)); 6532 if (st->Ist.LLSC.storedata) 6533 vassert(isIRAtom(st->Ist.LLSC.storedata)); 6534 break; 6535 case Ist_Dirty: 6536 d = st->Ist.Dirty.details; 6537 vassert(isIRAtom(d->guard)); 6538 for (j = 0; d->args[j]; j++) { 6539 IRExpr* arg = d->args[j]; 6540 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) 6541 vassert(isIRAtom(arg)); 6542 } 6543 if (d->mFx != Ifx_None) 6544 vassert(isIRAtom(d->mAddr)); 6545 break; 6546 case Ist_NoOp: 6547 case Ist_IMark: 6548 case Ist_MBE: 6549 break; 6550 case Ist_Exit: 6551 vassert(isIRAtom(st->Ist.Exit.guard)); 6552 break; 6553 default: 6554 bad: 6555 ppIRStmt(st); 6556 vpanic("considerExpensives"); 6557 } 6558 } 6559} 6560 6561 6562/* ---------------- The main iropt entry point. ---------------- */ 6563 6564/* exported from this file */ 6565/* Rules of the game: 6566 6567 - IRExpr/IRStmt trees should be treated as immutable, as they 6568 may get shared. So never change a field of such a tree node; 6569 instead construct and return a new one if needed. 6570*/ 6571 6572 6573IRSB* do_iropt_BB( 6574 IRSB* bb0, 6575 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int), 6576 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 6577 VexRegisterUpdates pxControl, 6578 Addr guest_addr, 6579 VexArch guest_arch 6580 ) 6581{ 6582 static Int n_total = 0; 6583 static Int n_expensive = 0; 6584 6585 Bool hasGetIorPutI, hasVorFtemps; 6586 IRSB *bb, *bb2; 6587 6588 n_total++; 6589 6590 /* First flatten the block out, since all other 6591 phases assume flat code. */ 6592 6593 bb = flatten_BB ( bb0 ); 6594 6595 if (iropt_verbose) { 6596 vex_printf("\n========= FLAT\n\n" ); 6597 ppIRSB(bb); 6598 } 6599 6600 /* If at level 0, stop now. */ 6601 if (vex_control.iropt_level <= 0) return bb; 6602 6603 /* Now do a preliminary cleanup pass, and figure out if we also 6604 need to do 'expensive' optimisations. Expensive optimisations 6605 are deemed necessary if the block contains any GetIs or PutIs. 6606 If needed, do expensive transformations and then another cheap 6607 cleanup pass. */ 6608 6609 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl ); 6610 6611 if (guest_arch == VexArchARM) { 6612 /* Translating Thumb2 code produces a lot of chaff. We have to 6613 work extra hard to get rid of it. */ 6614 bb = cprop_BB(bb); 6615 bb = spec_helpers_BB ( bb, specHelper ); 6616 if (pxControl < VexRegUpdAllregsAtEachInsn) { 6617 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl ); 6618 } 6619 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6620 do_deadcode_BB( bb ); 6621 } 6622 6623 if (vex_control.iropt_level > 1) { 6624 6625 /* Peer at what we have, to decide how much more effort to throw 6626 at it. */ 6627 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb ); 6628 6629 if (hasVorFtemps && !hasGetIorPutI) { 6630 /* If any evidence of FP or Vector activity, CSE, as that 6631 tends to mop up all manner of lardy code to do with 6632 rounding modes. Don't bother if hasGetIorPutI since that 6633 case leads into the expensive transformations, which do 6634 CSE anyway. */ 6635 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6636 do_deadcode_BB( bb ); 6637 } 6638 6639 if (hasGetIorPutI) { 6640 Bool cses; 6641 n_expensive++; 6642 if (DEBUG_IROPT) 6643 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive); 6644 bb = expensive_transformations( bb, pxControl ); 6645 bb = cheap_transformations( bb, specHelper, 6646 preciseMemExnsFn, pxControl ); 6647 /* Potentially common up GetIs */ 6648 cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6649 if (cses) 6650 bb = cheap_transformations( bb, specHelper, 6651 preciseMemExnsFn, pxControl ); 6652 } 6653 6654 /////////////////////////////////////////////////////////// 6655 // BEGIN MSVC optimised code transformation hacks 6656 if (0) 6657 bb = do_MSVC_HACKS(bb); 6658 // END MSVC optimised code transformation hacks 6659 /////////////////////////////////////////////////////////// 6660 6661 /* Now have a go at unrolling simple (single-BB) loops. If 6662 successful, clean up the results as much as possible. */ 6663 6664 bb2 = maybe_loop_unroll_BB( bb, guest_addr ); 6665 if (bb2) { 6666 bb = cheap_transformations( bb2, specHelper, 6667 preciseMemExnsFn, pxControl ); 6668 if (hasGetIorPutI) { 6669 bb = expensive_transformations( bb, pxControl ); 6670 bb = cheap_transformations( bb, specHelper, 6671 preciseMemExnsFn, pxControl ); 6672 } else { 6673 /* at least do CSE and dead code removal */ 6674 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6675 do_deadcode_BB( bb ); 6676 } 6677 if (0) vex_printf("vex iropt: unrolled a loop\n"); 6678 } 6679 6680 } 6681 6682 return bb; 6683} 6684 6685 6686/*---------------------------------------------------------------*/ 6687/*--- end ir_opt.c ---*/ 6688/*---------------------------------------------------------------*/ 6689