GVN.cpp revision bc9a28dd544e37074693333ea96c508a3d3bc3b4
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/BasicBlock.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/IntrinsicInst.h" 25#include "llvm/LLVMContext.h" 26#include "llvm/Operator.h" 27#include "llvm/Value.h" 28#include "llvm/ADT/DenseMap.h" 29#include "llvm/ADT/DepthFirstIterator.h" 30#include "llvm/ADT/PostOrderIterator.h" 31#include "llvm/ADT/SmallPtrSet.h" 32#include "llvm/ADT/SmallVector.h" 33#include "llvm/ADT/Statistic.h" 34#include "llvm/Analysis/AliasAnalysis.h" 35#include "llvm/Analysis/ConstantFolding.h" 36#include "llvm/Analysis/Dominators.h" 37#include "llvm/Analysis/MemoryBuiltins.h" 38#include "llvm/Analysis/MemoryDependenceAnalysis.h" 39#include "llvm/Support/CFG.h" 40#include "llvm/Support/CommandLine.h" 41#include "llvm/Support/Debug.h" 42#include "llvm/Support/ErrorHandling.h" 43#include "llvm/Support/GetElementPtrTypeIterator.h" 44#include "llvm/Support/IRBuilder.h" 45#include "llvm/Support/raw_ostream.h" 46#include "llvm/Target/TargetData.h" 47#include "llvm/Transforms/Utils/BasicBlockUtils.h" 48#include "llvm/Transforms/Utils/Local.h" 49#include "llvm/Transforms/Utils/SSAUpdater.h" 50#include <cstdio> 51using namespace llvm; 52 53STATISTIC(NumGVNInstr, "Number of instructions deleted"); 54STATISTIC(NumGVNLoad, "Number of loads deleted"); 55STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 56STATISTIC(NumGVNBlocks, "Number of blocks merged"); 57STATISTIC(NumPRELoad, "Number of loads PRE'd"); 58 59static cl::opt<bool> EnablePRE("enable-pre", 60 cl::init(true), cl::Hidden); 61static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 62 63//===----------------------------------------------------------------------===// 64// ValueTable Class 65//===----------------------------------------------------------------------===// 66 67/// This class holds the mapping between values and value numbers. It is used 68/// as an efficient mechanism to determine the expression-wise equivalence of 69/// two values. 70namespace { 71 struct Expression { 72 enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, 73 UDIV, SDIV, FDIV, UREM, SREM, 74 FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 75 ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 76 ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 77 FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 78 FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 79 FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, 80 SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, 81 FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 82 PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, 83 INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; 84 85 ExpressionOpcode opcode; 86 const Type* type; 87 SmallVector<uint32_t, 4> varargs; 88 Value *function; 89 90 Expression() { } 91 Expression(ExpressionOpcode o) : opcode(o) { } 92 93 bool operator==(const Expression &other) const { 94 if (opcode != other.opcode) 95 return false; 96 else if (opcode == EMPTY || opcode == TOMBSTONE) 97 return true; 98 else if (type != other.type) 99 return false; 100 else if (function != other.function) 101 return false; 102 else { 103 if (varargs.size() != other.varargs.size()) 104 return false; 105 106 for (size_t i = 0; i < varargs.size(); ++i) 107 if (varargs[i] != other.varargs[i]) 108 return false; 109 110 return true; 111 } 112 } 113 114 bool operator!=(const Expression &other) const { 115 return !(*this == other); 116 } 117 }; 118 119 class ValueTable { 120 private: 121 DenseMap<Value*, uint32_t> valueNumbering; 122 DenseMap<Expression, uint32_t> expressionNumbering; 123 AliasAnalysis* AA; 124 MemoryDependenceAnalysis* MD; 125 DominatorTree* DT; 126 127 uint32_t nextValueNumber; 128 129 Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); 130 Expression::ExpressionOpcode getOpcode(CmpInst* C); 131 Expression::ExpressionOpcode getOpcode(CastInst* C); 132 Expression create_expression(BinaryOperator* BO); 133 Expression create_expression(CmpInst* C); 134 Expression create_expression(ShuffleVectorInst* V); 135 Expression create_expression(ExtractElementInst* C); 136 Expression create_expression(InsertElementInst* V); 137 Expression create_expression(SelectInst* V); 138 Expression create_expression(CastInst* C); 139 Expression create_expression(GetElementPtrInst* G); 140 Expression create_expression(CallInst* C); 141 Expression create_expression(Constant* C); 142 Expression create_expression(ExtractValueInst* C); 143 Expression create_expression(InsertValueInst* C); 144 145 uint32_t lookup_or_add_call(CallInst* C); 146 public: 147 ValueTable() : nextValueNumber(1) { } 148 uint32_t lookup_or_add(Value *V); 149 uint32_t lookup(Value *V) const; 150 void add(Value *V, uint32_t num); 151 void clear(); 152 void erase(Value *v); 153 unsigned size(); 154 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 155 AliasAnalysis *getAliasAnalysis() const { return AA; } 156 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 157 void setDomTree(DominatorTree* D) { DT = D; } 158 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 159 void verifyRemoved(const Value *) const; 160 }; 161} 162 163namespace llvm { 164template <> struct DenseMapInfo<Expression> { 165 static inline Expression getEmptyKey() { 166 return Expression(Expression::EMPTY); 167 } 168 169 static inline Expression getTombstoneKey() { 170 return Expression(Expression::TOMBSTONE); 171 } 172 173 static unsigned getHashValue(const Expression e) { 174 unsigned hash = e.opcode; 175 176 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 177 (unsigned)((uintptr_t)e.type >> 9)); 178 179 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 180 E = e.varargs.end(); I != E; ++I) 181 hash = *I + hash * 37; 182 183 hash = ((unsigned)((uintptr_t)e.function >> 4) ^ 184 (unsigned)((uintptr_t)e.function >> 9)) + 185 hash * 37; 186 187 return hash; 188 } 189 static bool isEqual(const Expression &LHS, const Expression &RHS) { 190 return LHS == RHS; 191 } 192 static bool isPod() { return true; } 193}; 194} 195 196//===----------------------------------------------------------------------===// 197// ValueTable Internal Functions 198//===----------------------------------------------------------------------===// 199Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { 200 switch(BO->getOpcode()) { 201 default: // THIS SHOULD NEVER HAPPEN 202 llvm_unreachable("Binary operator with unknown opcode?"); 203 case Instruction::Add: return Expression::ADD; 204 case Instruction::FAdd: return Expression::FADD; 205 case Instruction::Sub: return Expression::SUB; 206 case Instruction::FSub: return Expression::FSUB; 207 case Instruction::Mul: return Expression::MUL; 208 case Instruction::FMul: return Expression::FMUL; 209 case Instruction::UDiv: return Expression::UDIV; 210 case Instruction::SDiv: return Expression::SDIV; 211 case Instruction::FDiv: return Expression::FDIV; 212 case Instruction::URem: return Expression::UREM; 213 case Instruction::SRem: return Expression::SREM; 214 case Instruction::FRem: return Expression::FREM; 215 case Instruction::Shl: return Expression::SHL; 216 case Instruction::LShr: return Expression::LSHR; 217 case Instruction::AShr: return Expression::ASHR; 218 case Instruction::And: return Expression::AND; 219 case Instruction::Or: return Expression::OR; 220 case Instruction::Xor: return Expression::XOR; 221 } 222} 223 224Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { 225 if (isa<ICmpInst>(C)) { 226 switch (C->getPredicate()) { 227 default: // THIS SHOULD NEVER HAPPEN 228 llvm_unreachable("Comparison with unknown predicate?"); 229 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; 230 case ICmpInst::ICMP_NE: return Expression::ICMPNE; 231 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; 232 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; 233 case ICmpInst::ICMP_ULT: return Expression::ICMPULT; 234 case ICmpInst::ICMP_ULE: return Expression::ICMPULE; 235 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; 236 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; 237 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; 238 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; 239 } 240 } else { 241 switch (C->getPredicate()) { 242 default: // THIS SHOULD NEVER HAPPEN 243 llvm_unreachable("Comparison with unknown predicate?"); 244 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; 245 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; 246 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; 247 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; 248 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; 249 case FCmpInst::FCMP_ONE: return Expression::FCMPONE; 250 case FCmpInst::FCMP_ORD: return Expression::FCMPORD; 251 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; 252 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; 253 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; 254 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; 255 case FCmpInst::FCMP_ULT: return Expression::FCMPULT; 256 case FCmpInst::FCMP_ULE: return Expression::FCMPULE; 257 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; 258 } 259 } 260} 261 262Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { 263 switch(C->getOpcode()) { 264 default: // THIS SHOULD NEVER HAPPEN 265 llvm_unreachable("Cast operator with unknown opcode?"); 266 case Instruction::Trunc: return Expression::TRUNC; 267 case Instruction::ZExt: return Expression::ZEXT; 268 case Instruction::SExt: return Expression::SEXT; 269 case Instruction::FPToUI: return Expression::FPTOUI; 270 case Instruction::FPToSI: return Expression::FPTOSI; 271 case Instruction::UIToFP: return Expression::UITOFP; 272 case Instruction::SIToFP: return Expression::SITOFP; 273 case Instruction::FPTrunc: return Expression::FPTRUNC; 274 case Instruction::FPExt: return Expression::FPEXT; 275 case Instruction::PtrToInt: return Expression::PTRTOINT; 276 case Instruction::IntToPtr: return Expression::INTTOPTR; 277 case Instruction::BitCast: return Expression::BITCAST; 278 } 279} 280 281Expression ValueTable::create_expression(CallInst* C) { 282 Expression e; 283 284 e.type = C->getType(); 285 e.function = C->getCalledFunction(); 286 e.opcode = Expression::CALL; 287 288 for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); 289 I != E; ++I) 290 e.varargs.push_back(lookup_or_add(*I)); 291 292 return e; 293} 294 295Expression ValueTable::create_expression(BinaryOperator* BO) { 296 Expression e; 297 e.varargs.push_back(lookup_or_add(BO->getOperand(0))); 298 e.varargs.push_back(lookup_or_add(BO->getOperand(1))); 299 e.function = 0; 300 e.type = BO->getType(); 301 e.opcode = getOpcode(BO); 302 303 return e; 304} 305 306Expression ValueTable::create_expression(CmpInst* C) { 307 Expression e; 308 309 e.varargs.push_back(lookup_or_add(C->getOperand(0))); 310 e.varargs.push_back(lookup_or_add(C->getOperand(1))); 311 e.function = 0; 312 e.type = C->getType(); 313 e.opcode = getOpcode(C); 314 315 return e; 316} 317 318Expression ValueTable::create_expression(CastInst* C) { 319 Expression e; 320 321 e.varargs.push_back(lookup_or_add(C->getOperand(0))); 322 e.function = 0; 323 e.type = C->getType(); 324 e.opcode = getOpcode(C); 325 326 return e; 327} 328 329Expression ValueTable::create_expression(ShuffleVectorInst* S) { 330 Expression e; 331 332 e.varargs.push_back(lookup_or_add(S->getOperand(0))); 333 e.varargs.push_back(lookup_or_add(S->getOperand(1))); 334 e.varargs.push_back(lookup_or_add(S->getOperand(2))); 335 e.function = 0; 336 e.type = S->getType(); 337 e.opcode = Expression::SHUFFLE; 338 339 return e; 340} 341 342Expression ValueTable::create_expression(ExtractElementInst* E) { 343 Expression e; 344 345 e.varargs.push_back(lookup_or_add(E->getOperand(0))); 346 e.varargs.push_back(lookup_or_add(E->getOperand(1))); 347 e.function = 0; 348 e.type = E->getType(); 349 e.opcode = Expression::EXTRACT; 350 351 return e; 352} 353 354Expression ValueTable::create_expression(InsertElementInst* I) { 355 Expression e; 356 357 e.varargs.push_back(lookup_or_add(I->getOperand(0))); 358 e.varargs.push_back(lookup_or_add(I->getOperand(1))); 359 e.varargs.push_back(lookup_or_add(I->getOperand(2))); 360 e.function = 0; 361 e.type = I->getType(); 362 e.opcode = Expression::INSERT; 363 364 return e; 365} 366 367Expression ValueTable::create_expression(SelectInst* I) { 368 Expression e; 369 370 e.varargs.push_back(lookup_or_add(I->getCondition())); 371 e.varargs.push_back(lookup_or_add(I->getTrueValue())); 372 e.varargs.push_back(lookup_or_add(I->getFalseValue())); 373 e.function = 0; 374 e.type = I->getType(); 375 e.opcode = Expression::SELECT; 376 377 return e; 378} 379 380Expression ValueTable::create_expression(GetElementPtrInst* G) { 381 Expression e; 382 383 e.varargs.push_back(lookup_or_add(G->getPointerOperand())); 384 e.function = 0; 385 e.type = G->getType(); 386 e.opcode = Expression::GEP; 387 388 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); 389 I != E; ++I) 390 e.varargs.push_back(lookup_or_add(*I)); 391 392 return e; 393} 394 395Expression ValueTable::create_expression(ExtractValueInst* E) { 396 Expression e; 397 398 e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); 399 for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 400 II != IE; ++II) 401 e.varargs.push_back(*II); 402 e.function = 0; 403 e.type = E->getType(); 404 e.opcode = Expression::EXTRACTVALUE; 405 406 return e; 407} 408 409Expression ValueTable::create_expression(InsertValueInst* E) { 410 Expression e; 411 412 e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); 413 e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); 414 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 415 II != IE; ++II) 416 e.varargs.push_back(*II); 417 e.function = 0; 418 e.type = E->getType(); 419 e.opcode = Expression::INSERTVALUE; 420 421 return e; 422} 423 424//===----------------------------------------------------------------------===// 425// ValueTable External Functions 426//===----------------------------------------------------------------------===// 427 428/// add - Insert a value into the table with a specified value number. 429void ValueTable::add(Value *V, uint32_t num) { 430 valueNumbering.insert(std::make_pair(V, num)); 431} 432 433uint32_t ValueTable::lookup_or_add_call(CallInst* C) { 434 if (AA->doesNotAccessMemory(C)) { 435 Expression exp = create_expression(C); 436 uint32_t& e = expressionNumbering[exp]; 437 if (!e) e = nextValueNumber++; 438 valueNumbering[C] = e; 439 return e; 440 } else if (AA->onlyReadsMemory(C)) { 441 Expression exp = create_expression(C); 442 uint32_t& e = expressionNumbering[exp]; 443 if (!e) { 444 e = nextValueNumber++; 445 valueNumbering[C] = e; 446 return e; 447 } 448 if (!MD) { 449 e = nextValueNumber++; 450 valueNumbering[C] = e; 451 return e; 452 } 453 454 MemDepResult local_dep = MD->getDependency(C); 455 456 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 457 valueNumbering[C] = nextValueNumber; 458 return nextValueNumber++; 459 } 460 461 if (local_dep.isDef()) { 462 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 463 464 if (local_cdep->getNumOperands() != C->getNumOperands()) { 465 valueNumbering[C] = nextValueNumber; 466 return nextValueNumber++; 467 } 468 469 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 470 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 471 uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); 472 if (c_vn != cd_vn) { 473 valueNumbering[C] = nextValueNumber; 474 return nextValueNumber++; 475 } 476 } 477 478 uint32_t v = lookup_or_add(local_cdep); 479 valueNumbering[C] = v; 480 return v; 481 } 482 483 // Non-local case. 484 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 485 MD->getNonLocalCallDependency(CallSite(C)); 486 // FIXME: call/call dependencies for readonly calls should return def, not 487 // clobber! Move the checking logic to MemDep! 488 CallInst* cdep = 0; 489 490 // Check to see if we have a single dominating call instruction that is 491 // identical to C. 492 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 493 const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; 494 // Ignore non-local dependencies. 495 if (I->second.isNonLocal()) 496 continue; 497 498 // We don't handle non-depedencies. If we already have a call, reject 499 // instruction dependencies. 500 if (I->second.isClobber() || cdep != 0) { 501 cdep = 0; 502 break; 503 } 504 505 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); 506 // FIXME: All duplicated with non-local case. 507 if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ 508 cdep = NonLocalDepCall; 509 continue; 510 } 511 512 cdep = 0; 513 break; 514 } 515 516 if (!cdep) { 517 valueNumbering[C] = nextValueNumber; 518 return nextValueNumber++; 519 } 520 521 if (cdep->getNumOperands() != C->getNumOperands()) { 522 valueNumbering[C] = nextValueNumber; 523 return nextValueNumber++; 524 } 525 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 526 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 527 uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); 528 if (c_vn != cd_vn) { 529 valueNumbering[C] = nextValueNumber; 530 return nextValueNumber++; 531 } 532 } 533 534 uint32_t v = lookup_or_add(cdep); 535 valueNumbering[C] = v; 536 return v; 537 538 } else { 539 valueNumbering[C] = nextValueNumber; 540 return nextValueNumber++; 541 } 542} 543 544/// lookup_or_add - Returns the value number for the specified value, assigning 545/// it a new number if it did not have one before. 546uint32_t ValueTable::lookup_or_add(Value *V) { 547 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 548 if (VI != valueNumbering.end()) 549 return VI->second; 550 551 if (!isa<Instruction>(V)) { 552 valueNumbering[V] = nextValueNumber; 553 return nextValueNumber++; 554 } 555 556 Instruction* I = cast<Instruction>(V); 557 Expression exp; 558 switch (I->getOpcode()) { 559 case Instruction::Call: 560 return lookup_or_add_call(cast<CallInst>(I)); 561 case Instruction::Add: 562 case Instruction::FAdd: 563 case Instruction::Sub: 564 case Instruction::FSub: 565 case Instruction::Mul: 566 case Instruction::FMul: 567 case Instruction::UDiv: 568 case Instruction::SDiv: 569 case Instruction::FDiv: 570 case Instruction::URem: 571 case Instruction::SRem: 572 case Instruction::FRem: 573 case Instruction::Shl: 574 case Instruction::LShr: 575 case Instruction::AShr: 576 case Instruction::And: 577 case Instruction::Or : 578 case Instruction::Xor: 579 exp = create_expression(cast<BinaryOperator>(I)); 580 break; 581 case Instruction::ICmp: 582 case Instruction::FCmp: 583 exp = create_expression(cast<CmpInst>(I)); 584 break; 585 case Instruction::Trunc: 586 case Instruction::ZExt: 587 case Instruction::SExt: 588 case Instruction::FPToUI: 589 case Instruction::FPToSI: 590 case Instruction::UIToFP: 591 case Instruction::SIToFP: 592 case Instruction::FPTrunc: 593 case Instruction::FPExt: 594 case Instruction::PtrToInt: 595 case Instruction::IntToPtr: 596 case Instruction::BitCast: 597 exp = create_expression(cast<CastInst>(I)); 598 break; 599 case Instruction::Select: 600 exp = create_expression(cast<SelectInst>(I)); 601 break; 602 case Instruction::ExtractElement: 603 exp = create_expression(cast<ExtractElementInst>(I)); 604 break; 605 case Instruction::InsertElement: 606 exp = create_expression(cast<InsertElementInst>(I)); 607 break; 608 case Instruction::ShuffleVector: 609 exp = create_expression(cast<ShuffleVectorInst>(I)); 610 break; 611 case Instruction::ExtractValue: 612 exp = create_expression(cast<ExtractValueInst>(I)); 613 break; 614 case Instruction::InsertValue: 615 exp = create_expression(cast<InsertValueInst>(I)); 616 break; 617 case Instruction::GetElementPtr: 618 exp = create_expression(cast<GetElementPtrInst>(I)); 619 break; 620 default: 621 valueNumbering[V] = nextValueNumber; 622 return nextValueNumber++; 623 } 624 625 uint32_t& e = expressionNumbering[exp]; 626 if (!e) e = nextValueNumber++; 627 valueNumbering[V] = e; 628 return e; 629} 630 631/// lookup - Returns the value number of the specified value. Fails if 632/// the value has not yet been numbered. 633uint32_t ValueTable::lookup(Value *V) const { 634 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 635 assert(VI != valueNumbering.end() && "Value not numbered?"); 636 return VI->second; 637} 638 639/// clear - Remove all entries from the ValueTable 640void ValueTable::clear() { 641 valueNumbering.clear(); 642 expressionNumbering.clear(); 643 nextValueNumber = 1; 644} 645 646/// erase - Remove a value from the value numbering 647void ValueTable::erase(Value *V) { 648 valueNumbering.erase(V); 649} 650 651/// verifyRemoved - Verify that the value is removed from all internal data 652/// structures. 653void ValueTable::verifyRemoved(const Value *V) const { 654 for (DenseMap<Value*, uint32_t>::const_iterator 655 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 656 assert(I->first != V && "Inst still occurs in value numbering map!"); 657 } 658} 659 660//===----------------------------------------------------------------------===// 661// GVN Pass 662//===----------------------------------------------------------------------===// 663 664namespace { 665 struct ValueNumberScope { 666 ValueNumberScope* parent; 667 DenseMap<uint32_t, Value*> table; 668 669 ValueNumberScope(ValueNumberScope* p) : parent(p) { } 670 }; 671} 672 673namespace { 674 675 class GVN : public FunctionPass { 676 bool runOnFunction(Function &F); 677 public: 678 static char ID; // Pass identification, replacement for typeid 679 explicit GVN(bool nopre = false, bool noloads = false) 680 : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { } 681 682 private: 683 bool NoPRE; 684 bool NoLoads; 685 MemoryDependenceAnalysis *MD; 686 DominatorTree *DT; 687 688 ValueTable VN; 689 DenseMap<BasicBlock*, ValueNumberScope*> localAvail; 690 691 // This transformation requires dominator postdominator info 692 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 693 AU.addRequired<DominatorTree>(); 694 if (!NoLoads) 695 AU.addRequired<MemoryDependenceAnalysis>(); 696 AU.addRequired<AliasAnalysis>(); 697 698 AU.addPreserved<DominatorTree>(); 699 AU.addPreserved<AliasAnalysis>(); 700 } 701 702 // Helper fuctions 703 // FIXME: eliminate or document these better 704 bool processLoad(LoadInst* L, 705 SmallVectorImpl<Instruction*> &toErase); 706 bool processInstruction(Instruction *I, 707 SmallVectorImpl<Instruction*> &toErase); 708 bool processNonLocalLoad(LoadInst* L, 709 SmallVectorImpl<Instruction*> &toErase); 710 bool processBlock(BasicBlock *BB); 711 void dump(DenseMap<uint32_t, Value*>& d); 712 bool iterateOnFunction(Function &F); 713 Value *CollapsePhi(PHINode* p); 714 bool performPRE(Function& F); 715 Value *lookupNumber(BasicBlock *BB, uint32_t num); 716 void cleanupGlobalSets(); 717 void verifyRemoved(const Instruction *I) const; 718 }; 719 720 char GVN::ID = 0; 721} 722 723// createGVNPass - The public interface to this file... 724FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) { 725 return new GVN(NoPRE, NoLoads); 726} 727 728static RegisterPass<GVN> X("gvn", 729 "Global Value Numbering"); 730 731void GVN::dump(DenseMap<uint32_t, Value*>& d) { 732 printf("{\n"); 733 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 734 E = d.end(); I != E; ++I) { 735 printf("%d\n", I->first); 736 I->second->dump(); 737 } 738 printf("}\n"); 739} 740 741static bool isSafeReplacement(PHINode* p, Instruction *inst) { 742 if (!isa<PHINode>(inst)) 743 return true; 744 745 for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); 746 UI != E; ++UI) 747 if (PHINode* use_phi = dyn_cast<PHINode>(UI)) 748 if (use_phi->getParent() == inst->getParent()) 749 return false; 750 751 return true; 752} 753 754Value *GVN::CollapsePhi(PHINode *PN) { 755 Value *ConstVal = PN->hasConstantValue(DT); 756 if (!ConstVal) return 0; 757 758 Instruction *Inst = dyn_cast<Instruction>(ConstVal); 759 if (!Inst) 760 return ConstVal; 761 762 if (DT->dominates(Inst, PN)) 763 if (isSafeReplacement(PN, Inst)) 764 return Inst; 765 return 0; 766} 767 768/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 769/// we're analyzing is fully available in the specified block. As we go, keep 770/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 771/// map is actually a tri-state map with the following values: 772/// 0) we know the block *is not* fully available. 773/// 1) we know the block *is* fully available. 774/// 2) we do not know whether the block is fully available or not, but we are 775/// currently speculating that it will be. 776/// 3) we are speculating for this block and have used that to speculate for 777/// other blocks. 778static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 779 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 780 // Optimistically assume that the block is fully available and check to see 781 // if we already know about this block in one lookup. 782 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 783 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 784 785 // If the entry already existed for this block, return the precomputed value. 786 if (!IV.second) { 787 // If this is a speculative "available" value, mark it as being used for 788 // speculation of other blocks. 789 if (IV.first->second == 2) 790 IV.first->second = 3; 791 return IV.first->second != 0; 792 } 793 794 // Otherwise, see if it is fully available in all predecessors. 795 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 796 797 // If this block has no predecessors, it isn't live-in here. 798 if (PI == PE) 799 goto SpeculationFailure; 800 801 for (; PI != PE; ++PI) 802 // If the value isn't fully available in one of our predecessors, then it 803 // isn't fully available in this block either. Undo our previous 804 // optimistic assumption and bail out. 805 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 806 goto SpeculationFailure; 807 808 return true; 809 810// SpeculationFailure - If we get here, we found out that this is not, after 811// all, a fully-available block. We have a problem if we speculated on this and 812// used the speculation to mark other blocks as available. 813SpeculationFailure: 814 char &BBVal = FullyAvailableBlocks[BB]; 815 816 // If we didn't speculate on this, just return with it set to false. 817 if (BBVal == 2) { 818 BBVal = 0; 819 return false; 820 } 821 822 // If we did speculate on this value, we could have blocks set to 1 that are 823 // incorrect. Walk the (transitive) successors of this block and mark them as 824 // 0 if set to one. 825 SmallVector<BasicBlock*, 32> BBWorklist; 826 BBWorklist.push_back(BB); 827 828 while (!BBWorklist.empty()) { 829 BasicBlock *Entry = BBWorklist.pop_back_val(); 830 // Note that this sets blocks to 0 (unavailable) if they happen to not 831 // already be in FullyAvailableBlocks. This is safe. 832 char &EntryVal = FullyAvailableBlocks[Entry]; 833 if (EntryVal == 0) continue; // Already unavailable. 834 835 // Mark as unavailable. 836 EntryVal = 0; 837 838 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 839 BBWorklist.push_back(*I); 840 } 841 842 return false; 843} 844 845 846/// CanCoerceMustAliasedValueToLoad - Return true if 847/// CoerceAvailableValueToLoadType will succeed. 848static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 849 const Type *LoadTy, 850 const TargetData &TD) { 851 // If the loaded or stored value is an first class array or struct, don't try 852 // to transform them. We need to be able to bitcast to integer. 853 if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) || 854 isa<StructType>(StoredVal->getType()) || 855 isa<ArrayType>(StoredVal->getType())) 856 return false; 857 858 // The store has to be at least as big as the load. 859 if (TD.getTypeSizeInBits(StoredVal->getType()) < 860 TD.getTypeSizeInBits(LoadTy)) 861 return false; 862 863 return true; 864} 865 866 867/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 868/// then a load from a must-aliased pointer of a different type, try to coerce 869/// the stored value. LoadedTy is the type of the load we want to replace and 870/// InsertPt is the place to insert new instructions. 871/// 872/// If we can't do it, return null. 873static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 874 const Type *LoadedTy, 875 Instruction *InsertPt, 876 const TargetData &TD) { 877 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) 878 return 0; 879 880 const Type *StoredValTy = StoredVal->getType(); 881 882 uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); 883 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); 884 885 // If the store and reload are the same size, we can always reuse it. 886 if (StoreSize == LoadSize) { 887 if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) { 888 // Pointer to Pointer -> use bitcast. 889 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 890 } 891 892 // Convert source pointers to integers, which can be bitcast. 893 if (isa<PointerType>(StoredValTy)) { 894 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 895 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 896 } 897 898 const Type *TypeToCastTo = LoadedTy; 899 if (isa<PointerType>(TypeToCastTo)) 900 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); 901 902 if (StoredValTy != TypeToCastTo) 903 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 904 905 // Cast to pointer if the load needs a pointer type. 906 if (isa<PointerType>(LoadedTy)) 907 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 908 909 return StoredVal; 910 } 911 912 // If the loaded value is smaller than the available value, then we can 913 // extract out a piece from it. If the available value is too small, then we 914 // can't do anything. 915 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 916 917 // Convert source pointers to integers, which can be manipulated. 918 if (isa<PointerType>(StoredValTy)) { 919 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 920 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 921 } 922 923 // Convert vectors and fp to integer, which can be manipulated. 924 if (!isa<IntegerType>(StoredValTy)) { 925 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 926 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 927 } 928 929 // If this is a big-endian system, we need to shift the value down to the low 930 // bits so that a truncate will work. 931 if (TD.isBigEndian()) { 932 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 933 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 934 } 935 936 // Truncate the integer to the right size now. 937 const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 938 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 939 940 if (LoadedTy == NewIntTy) 941 return StoredVal; 942 943 // If the result is a pointer, inttoptr. 944 if (isa<PointerType>(LoadedTy)) 945 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 946 947 // Otherwise, bitcast. 948 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 949} 950 951/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can 952/// be expressed as a base pointer plus a constant offset. Return the base and 953/// offset to the caller. 954static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, 955 const TargetData &TD) { 956 Operator *PtrOp = dyn_cast<Operator>(Ptr); 957 if (PtrOp == 0) return Ptr; 958 959 // Just look through bitcasts. 960 if (PtrOp->getOpcode() == Instruction::BitCast) 961 return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); 962 963 // If this is a GEP with constant indices, we can look through it. 964 GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); 965 if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; 966 967 gep_type_iterator GTI = gep_type_begin(GEP); 968 for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; 969 ++I, ++GTI) { 970 ConstantInt *OpC = cast<ConstantInt>(*I); 971 if (OpC->isZero()) continue; 972 973 // Handle a struct and array indices which add their offset to the pointer. 974 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 975 Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 976 } else { 977 uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); 978 Offset += OpC->getSExtValue()*Size; 979 } 980 } 981 982 // Re-sign extend from the pointer size if needed to get overflow edge cases 983 // right. 984 unsigned PtrSize = TD.getPointerSizeInBits(); 985 if (PtrSize < 64) 986 Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); 987 988 return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); 989} 990 991 992/// AnalyzeLoadFromClobberingWrite - This function is called when we have a 993/// memdep query of a load that ends up being a clobbering memory write (store, 994/// memset, memcpy, memmove). This means that the write *may* provide bits used 995/// by the load but we can't be sure because the pointers don't mustalias. 996/// 997/// Check this case to see if there is anything more we can do before we give 998/// up. This returns -1 if we have to give up, or a byte number in the stored 999/// value of the piece that feeds the load. 1000static int AnalyzeLoadFromClobberingWrite(LoadInst *L, Value *WritePtr, 1001 uint64_t WriteSizeInBits, 1002 const TargetData &TD) { 1003 // If the loaded or stored value is an first class array or struct, don't try 1004 // to transform them. We need to be able to bitcast to integer. 1005 if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType())) 1006 return -1; 1007 1008 int64_t StoreOffset = 0, LoadOffset = 0; 1009 Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); 1010 Value *LoadBase = 1011 GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD); 1012 if (StoreBase != LoadBase) 1013 return -1; 1014 1015 // If the load and store are to the exact same address, they should have been 1016 // a must alias. AA must have gotten confused. 1017 // FIXME: Study to see if/when this happens. 1018 if (LoadOffset == StoreOffset) { 1019#if 0 1020 errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 1021 << "Base = " << *StoreBase << "\n" 1022 << "Store Ptr = " << *WritePtr << "\n" 1023 << "Store Offs = " << StoreOffset << "\n" 1024 << "Load Ptr = " << *L->getPointerOperand() << "\n" 1025 << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; 1026 errs() << "'" << L->getParent()->getParent()->getName() << "'" 1027 << *L->getParent(); 1028#endif 1029 return -1; 1030 } 1031 1032 // If the load and store don't overlap at all, the store doesn't provide 1033 // anything to the load. In this case, they really don't alias at all, AA 1034 // must have gotten confused. 1035 // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then 1036 // remove this check, as it is duplicated with what we have below. 1037 uint64_t LoadSize = TD.getTypeSizeInBits(L->getType()); 1038 1039 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 1040 return -1; 1041 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 1042 LoadSize >>= 3; 1043 1044 1045 bool isAAFailure = false; 1046 if (StoreOffset < LoadOffset) { 1047 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 1048 } else { 1049 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 1050 } 1051 if (isAAFailure) { 1052#if 0 1053 errs() << "STORE LOAD DEP WITH COMMON BASE:\n" 1054 << "Base = " << *StoreBase << "\n" 1055 << "Store Ptr = " << *WritePtr << "\n" 1056 << "Store Offs = " << StoreOffset << "\n" 1057 << "Load Ptr = " << *L->getPointerOperand() << "\n" 1058 << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; 1059 errs() << "'" << L->getParent()->getParent()->getName() << "'" 1060 << *L->getParent(); 1061#endif 1062 return -1; 1063 } 1064 1065 // If the Load isn't completely contained within the stored bits, we don't 1066 // have all the bits to feed it. We could do something crazy in the future 1067 // (issue a smaller load then merge the bits in) but this seems unlikely to be 1068 // valuable. 1069 if (StoreOffset > LoadOffset || 1070 StoreOffset+StoreSize < LoadOffset+LoadSize) 1071 return -1; 1072 1073 // Okay, we can do this transformation. Return the number of bytes into the 1074 // store that the load is. 1075 return LoadOffset-StoreOffset; 1076} 1077 1078/// AnalyzeLoadFromClobberingStore - This function is called when we have a 1079/// memdep query of a load that ends up being a clobbering store. 1080static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, 1081 const TargetData &TD) { 1082 // Cannot handle reading from store of first-class aggregate yet. 1083 if (isa<StructType>(DepSI->getOperand(0)->getType()) || 1084 isa<ArrayType>(DepSI->getOperand(0)->getType())) 1085 return -1; 1086 1087 Value *StorePtr = DepSI->getPointerOperand(); 1088 uint64_t StoreSize = TD.getTypeSizeInBits(StorePtr->getType()); 1089 return AnalyzeLoadFromClobberingWrite(L, StorePtr, StoreSize, TD); 1090} 1091 1092static int AnalyzeLoadFromClobberingMemInst(LoadInst *L, MemIntrinsic *MI, 1093 const TargetData &TD) { 1094 // If the mem operation is a non-constant size, we can't handle it. 1095 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 1096 if (SizeCst == 0) return -1; 1097 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 1098 1099 // If this is memset, we just need to see if the offset is valid in the size 1100 // of the memset.. 1101 if (MI->getIntrinsicID() == Intrinsic::memset) 1102 return AnalyzeLoadFromClobberingWrite(L, MI->getDest(), MemSizeInBits, TD); 1103 1104 // If we have a memcpy/memmove, the only case we can handle is if this is a 1105 // copy from constant memory. In that case, we can read directly from the 1106 // constant memory. 1107 MemTransferInst *MTI = cast<MemTransferInst>(MI); 1108 1109 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 1110 if (Src == 0) return -1; 1111 1112 GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject()); 1113 if (GV == 0 || !GV->isConstant()) return -1; 1114 1115 // See if the access is within the bounds of the transfer. 1116 int Offset = 1117 AnalyzeLoadFromClobberingWrite(L, MI->getDest(), MemSizeInBits, TD); 1118 if (Offset == -1) 1119 return Offset; 1120 1121 // Otherwise, see if we can constant fold a load from the constant with the 1122 // offset applied as appropriate. 1123 Src = ConstantExpr::getBitCast(Src, 1124 llvm::Type::getInt8PtrTy(Src->getContext())); 1125 Constant *OffsetCst = 1126 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1127 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 1128 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(L->getType())); 1129 if (ConstantFoldLoadFromConstPtr(Src, &TD)) 1130 return Offset; 1131 return -1; 1132} 1133 1134 1135/// GetStoreValueForLoad - This function is called when we have a 1136/// memdep query of a load that ends up being a clobbering store. This means 1137/// that the store *may* provide bits used by the load but we can't be sure 1138/// because the pointers don't mustalias. Check this case to see if there is 1139/// anything more we can do before we give up. 1140static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 1141 const Type *LoadTy, 1142 Instruction *InsertPt, const TargetData &TD){ 1143 LLVMContext &Ctx = SrcVal->getType()->getContext(); 1144 1145 uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; 1146 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 1147 1148 1149 // Compute which bits of the stored value are being used by the load. Convert 1150 // to an integer type to start with. 1151 if (isa<PointerType>(SrcVal->getType())) 1152 SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt); 1153 if (!isa<IntegerType>(SrcVal->getType())) 1154 SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8), 1155 "tmp", InsertPt); 1156 1157 // Shift the bits to the least significant depending on endianness. 1158 unsigned ShiftAmt; 1159 if (TD.isLittleEndian()) 1160 ShiftAmt = Offset*8; 1161 else 1162 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 1163 1164 if (ShiftAmt) 1165 SrcVal = BinaryOperator::CreateLShr(SrcVal, 1166 ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt); 1167 1168 if (LoadSize != StoreSize) 1169 SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8), 1170 "tmp", InsertPt); 1171 1172 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); 1173} 1174 1175/// GetMemInstValueForLoad - This function is called when we have a 1176/// memdep query of a load that ends up being a clobbering mem intrinsic. 1177static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 1178 const Type *LoadTy, Instruction *InsertPt, 1179 const TargetData &TD){ 1180 LLVMContext &Ctx = LoadTy->getContext(); 1181 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 1182 1183 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1184 1185 // We know that this method is only called when the mem transfer fully 1186 // provides the bits for the load. 1187 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 1188 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 1189 // independently of what the offset is. 1190 Value *Val = MSI->getValue(); 1191 if (LoadSize != 1) 1192 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 1193 1194 Value *OneElt = Val; 1195 1196 // Splat the value out to the right number of bits. 1197 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 1198 // If we can double the number of bytes set, do it. 1199 if (NumBytesSet*2 <= LoadSize) { 1200 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 1201 Val = Builder.CreateOr(Val, ShVal); 1202 NumBytesSet <<= 1; 1203 continue; 1204 } 1205 1206 // Otherwise insert one byte at a time. 1207 Value *ShVal = Builder.CreateShl(Val, 1*8); 1208 Val = Builder.CreateOr(OneElt, ShVal); 1209 ++NumBytesSet; 1210 } 1211 1212 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); 1213 } 1214 1215 // Otherwise, this is a memcpy/memmove from a constant global. 1216 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 1217 Constant *Src = cast<Constant>(MTI->getSource()); 1218 1219 // Otherwise, see if we can constant fold a load from the constant with the 1220 // offset applied as appropriate. 1221 Src = ConstantExpr::getBitCast(Src, 1222 llvm::Type::getInt8PtrTy(Src->getContext())); 1223 Constant *OffsetCst = 1224 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1225 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 1226 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 1227 return ConstantFoldLoadFromConstPtr(Src, &TD); 1228} 1229 1230 1231 1232struct AvailableValueInBlock { 1233 /// BB - The basic block in question. 1234 BasicBlock *BB; 1235 enum ValType { 1236 SimpleVal, // A simple offsetted value that is accessed. 1237 MemIntrin // A memory intrinsic which is loaded from. 1238 }; 1239 1240 /// V - The value that is live out of the block. 1241 PointerIntPair<Value *, 1, ValType> Val; 1242 1243 /// Offset - The byte offset in Val that is interesting for the load query. 1244 unsigned Offset; 1245 1246 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 1247 unsigned Offset = 0) { 1248 AvailableValueInBlock Res; 1249 Res.BB = BB; 1250 Res.Val.setPointer(V); 1251 Res.Val.setInt(SimpleVal); 1252 Res.Offset = Offset; 1253 return Res; 1254 } 1255 1256 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 1257 unsigned Offset = 0) { 1258 AvailableValueInBlock Res; 1259 Res.BB = BB; 1260 Res.Val.setPointer(MI); 1261 Res.Val.setInt(MemIntrin); 1262 Res.Offset = Offset; 1263 return Res; 1264 } 1265 1266 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 1267 Value *getSimpleValue() const { 1268 assert(isSimpleValue() && "Wrong accessor"); 1269 return Val.getPointer(); 1270 } 1271 1272 MemIntrinsic *getMemIntrinValue() const { 1273 assert(!isSimpleValue() && "Wrong accessor"); 1274 return cast<MemIntrinsic>(Val.getPointer()); 1275 } 1276}; 1277 1278/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 1279/// construct SSA form, allowing us to eliminate LI. This returns the value 1280/// that should be used at LI's definition site. 1281static Value *ConstructSSAForLoadSet(LoadInst *LI, 1282 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1283 const TargetData *TD, 1284 AliasAnalysis *AA) { 1285 SmallVector<PHINode*, 8> NewPHIs; 1286 SSAUpdater SSAUpdate(&NewPHIs); 1287 SSAUpdate.Initialize(LI); 1288 1289 const Type *LoadTy = LI->getType(); 1290 1291 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1292 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1293 BasicBlock *BB = AV.BB; 1294 1295 if (SSAUpdate.HasValueForBlock(BB)) 1296 continue; 1297 1298 unsigned Offset = AV.Offset; 1299 1300 Value *AvailableVal; 1301 if (AV.isSimpleValue()) { 1302 AvailableVal = AV.getSimpleValue(); 1303 if (AvailableVal->getType() != LoadTy) { 1304 assert(TD && "Need target data to handle type mismatch case"); 1305 AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, 1306 BB->getTerminator(), *TD); 1307 1308 DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 1309 << *AV.getSimpleValue() << '\n' 1310 << *AvailableVal << '\n' << "\n\n\n"); 1311 } 1312 } else { 1313 AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset, 1314 LoadTy, BB->getTerminator(), *TD); 1315 DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1316 << " " << *AV.getMemIntrinValue() << '\n' 1317 << *AvailableVal << '\n' << "\n\n\n"); 1318 } 1319 SSAUpdate.AddAvailableValue(BB, AvailableVal); 1320 } 1321 1322 // Perform PHI construction. 1323 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1324 1325 // If new PHI nodes were created, notify alias analysis. 1326 if (isa<PointerType>(V->getType())) 1327 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1328 AA->copyValue(LI, NewPHIs[i]); 1329 1330 return V; 1331} 1332 1333static bool isLifetimeStart(Instruction *Inst) { 1334 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1335 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1336 return false; 1337} 1338 1339/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 1340/// non-local by performing PHI construction. 1341bool GVN::processNonLocalLoad(LoadInst *LI, 1342 SmallVectorImpl<Instruction*> &toErase) { 1343 // Find the non-local dependencies of the load. 1344 SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; 1345 MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), 1346 Deps); 1347 //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " 1348 // << Deps.size() << *LI << '\n'); 1349 1350 // If we had to process more than one hundred blocks to find the 1351 // dependencies, this load isn't worth worrying about. Optimizing 1352 // it will be too expensive. 1353 if (Deps.size() > 100) 1354 return false; 1355 1356 // If we had a phi translation failure, we'll have a single entry which is a 1357 // clobber in the current block. Reject this early. 1358 if (Deps.size() == 1 && Deps[0].second.isClobber()) { 1359 DEBUG( 1360 errs() << "GVN: non-local load "; 1361 WriteAsOperand(errs(), LI); 1362 errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n'; 1363 ); 1364 return false; 1365 } 1366 1367 // Filter out useless results (non-locals, etc). Keep track of the blocks 1368 // where we have a value available in repl, also keep track of whether we see 1369 // dependencies that produce an unknown value for the load (such as a call 1370 // that could potentially clobber the load). 1371 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; 1372 SmallVector<BasicBlock*, 16> UnavailableBlocks; 1373 1374 const TargetData *TD = 0; 1375 1376 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 1377 BasicBlock *DepBB = Deps[i].first; 1378 MemDepResult DepInfo = Deps[i].second; 1379 1380 if (DepInfo.isClobber()) { 1381 // If the dependence is to a store that writes to a superset of the bits 1382 // read by the load, we can extract the bits we need for the load from the 1383 // stored value. 1384 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1385 if (TD == 0) 1386 TD = getAnalysisIfAvailable<TargetData>(); 1387 if (TD) { 1388 int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD); 1389 if (Offset != -1) { 1390 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1391 DepSI->getOperand(0), 1392 Offset)); 1393 continue; 1394 } 1395 } 1396 } 1397 1398 // If the clobbering value is a memset/memcpy/memmove, see if we can 1399 // forward a value on from it. 1400 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1401 if (TD == 0) 1402 TD = getAnalysisIfAvailable<TargetData>(); 1403 if (TD) { 1404 int Offset = AnalyzeLoadFromClobberingMemInst(LI, DepMI, *TD); 1405 if (Offset != -1) { 1406 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1407 Offset)); 1408 continue; 1409 } 1410 } 1411 } 1412 1413 UnavailableBlocks.push_back(DepBB); 1414 continue; 1415 } 1416 1417 Instruction *DepInst = DepInfo.getInst(); 1418 1419 // Loading the allocation -> undef. 1420 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || 1421 // Loading immediately after lifetime begin -> undef. 1422 isLifetimeStart(DepInst)) { 1423 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1424 UndefValue::get(LI->getType()))); 1425 continue; 1426 } 1427 1428 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1429 // Reject loads and stores that are to the same address but are of 1430 // different types if we have to. 1431 if (S->getOperand(0)->getType() != LI->getType()) { 1432 if (TD == 0) 1433 TD = getAnalysisIfAvailable<TargetData>(); 1434 1435 // If the stored value is larger or equal to the loaded value, we can 1436 // reuse it. 1437 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), 1438 LI->getType(), *TD)) { 1439 UnavailableBlocks.push_back(DepBB); 1440 continue; 1441 } 1442 } 1443 1444 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1445 S->getOperand(0))); 1446 continue; 1447 } 1448 1449 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1450 // If the types mismatch and we can't handle it, reject reuse of the load. 1451 if (LD->getType() != LI->getType()) { 1452 if (TD == 0) 1453 TD = getAnalysisIfAvailable<TargetData>(); 1454 1455 // If the stored value is larger or equal to the loaded value, we can 1456 // reuse it. 1457 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ 1458 UnavailableBlocks.push_back(DepBB); 1459 continue; 1460 } 1461 } 1462 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); 1463 continue; 1464 } 1465 1466 UnavailableBlocks.push_back(DepBB); 1467 continue; 1468 } 1469 1470 // If we have no predecessors that produce a known value for this load, exit 1471 // early. 1472 if (ValuesPerBlock.empty()) return false; 1473 1474 // If all of the instructions we depend on produce a known value for this 1475 // load, then it is fully redundant and we can use PHI insertion to compute 1476 // its value. Insert PHIs and remove the fully redundant value now. 1477 if (UnavailableBlocks.empty()) { 1478 DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1479 1480 // Perform PHI construction. 1481 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, 1482 VN.getAliasAnalysis()); 1483 LI->replaceAllUsesWith(V); 1484 1485 if (isa<PHINode>(V)) 1486 V->takeName(LI); 1487 if (isa<PointerType>(V->getType())) 1488 MD->invalidateCachedPointerInfo(V); 1489 toErase.push_back(LI); 1490 NumGVNLoad++; 1491 return true; 1492 } 1493 1494 if (!EnablePRE || !EnableLoadPRE) 1495 return false; 1496 1497 // Okay, we have *some* definitions of the value. This means that the value 1498 // is available in some of our (transitive) predecessors. Lets think about 1499 // doing PRE of this load. This will involve inserting a new load into the 1500 // predecessor when it's not available. We could do this in general, but 1501 // prefer to not increase code size. As such, we only do this when we know 1502 // that we only have to insert *one* load (which means we're basically moving 1503 // the load, not inserting a new one). 1504 1505 SmallPtrSet<BasicBlock *, 4> Blockers; 1506 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1507 Blockers.insert(UnavailableBlocks[i]); 1508 1509 // Lets find first basic block with more than one predecessor. Walk backwards 1510 // through predecessors if needed. 1511 BasicBlock *LoadBB = LI->getParent(); 1512 BasicBlock *TmpBB = LoadBB; 1513 1514 bool isSinglePred = false; 1515 bool allSingleSucc = true; 1516 while (TmpBB->getSinglePredecessor()) { 1517 isSinglePred = true; 1518 TmpBB = TmpBB->getSinglePredecessor(); 1519 if (!TmpBB) // If haven't found any, bail now. 1520 return false; 1521 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1522 return false; 1523 if (Blockers.count(TmpBB)) 1524 return false; 1525 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1526 allSingleSucc = false; 1527 } 1528 1529 assert(TmpBB); 1530 LoadBB = TmpBB; 1531 1532 // If we have a repl set with LI itself in it, this means we have a loop where 1533 // at least one of the values is LI. Since this means that we won't be able 1534 // to eliminate LI even if we insert uses in the other predecessors, we will 1535 // end up increasing code size. Reject this by scanning for LI. 1536 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1537 if (ValuesPerBlock[i].isSimpleValue() && 1538 ValuesPerBlock[i].getSimpleValue() == LI) 1539 return false; 1540 1541 // FIXME: It is extremely unclear what this loop is doing, other than 1542 // artificially restricting loadpre. 1543 if (isSinglePred) { 1544 bool isHot = false; 1545 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1546 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1547 if (AV.isSimpleValue()) 1548 // "Hot" Instruction is in some loop (because it dominates its dep. 1549 // instruction). 1550 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) 1551 if (DT->dominates(LI, I)) { 1552 isHot = true; 1553 break; 1554 } 1555 } 1556 1557 // We are interested only in "hot" instructions. We don't want to do any 1558 // mis-optimizations here. 1559 if (!isHot) 1560 return false; 1561 } 1562 1563 // Okay, we have some hope :). Check to see if the loaded value is fully 1564 // available in all but one predecessor. 1565 // FIXME: If we could restructure the CFG, we could make a common pred with 1566 // all the preds that don't have an available LI and insert a new load into 1567 // that one block. 1568 BasicBlock *UnavailablePred = 0; 1569 1570 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1571 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1572 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1573 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1574 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1575 1576 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1577 PI != E; ++PI) { 1578 if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 1579 continue; 1580 1581 // If this load is not available in multiple predecessors, reject it. 1582 if (UnavailablePred && UnavailablePred != *PI) 1583 return false; 1584 UnavailablePred = *PI; 1585 } 1586 1587 assert(UnavailablePred != 0 && 1588 "Fully available value should be eliminated above!"); 1589 1590 // We don't currently handle critical edges :( 1591 if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { 1592 DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" 1593 << UnavailablePred->getName() << "': " << *LI << '\n'); 1594 return false; 1595 } 1596 1597 // Do PHI translation to get its value in the predecessor if necessary. The 1598 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1599 // 1600 // FIXME: This may insert a computation, but we don't tell scalar GVN 1601 // optimization stuff about it. How do we do this? 1602 SmallVector<Instruction*, 8> NewInsts; 1603 Value *LoadPtr = 0; 1604 1605 // If all preds have a single successor, then we know it is safe to insert the 1606 // load on the pred (?!?), so we can insert code to materialize the pointer if 1607 // it is not available. 1608 if (allSingleSucc) { 1609 LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB, 1610 UnavailablePred, TD, *DT,NewInsts); 1611 } else { 1612 LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB, 1613 UnavailablePred, TD, *DT); 1614 } 1615 1616 // Assign value numbers to these new instructions. 1617 for (SmallVector<Instruction*, 8>::iterator NI = NewInsts.begin(), 1618 NE = NewInsts.end(); NI != NE; ++NI) { 1619 // FIXME: We really _ought_ to insert these value numbers into their 1620 // parent's availability map. However, in doing so, we risk getting into 1621 // ordering issues. If a block hasn't been processed yet, we would be 1622 // marking a value as AVAIL-IN, which isn't what we intend. 1623 VN.lookup_or_add(*NI); 1624 } 1625 1626 // If we couldn't find or insert a computation of this phi translated value, 1627 // we fail PRE. 1628 if (LoadPtr == 0) { 1629 DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1630 << *LI->getOperand(0) << "\n"); 1631 return false; 1632 } 1633 1634 // Make sure it is valid to move this load here. We have to watch out for: 1635 // @1 = getelementptr (i8* p, ... 1636 // test p and branch if == 0 1637 // load @1 1638 // It is valid to have the getelementptr before the test, even if p can be 0, 1639 // as getelementptr only does address arithmetic. 1640 // If we are not pushing the value through any multiple-successor blocks 1641 // we do not have this case. Otherwise, check that the load is safe to 1642 // put anywhere; this can be improved, but should be conservatively safe. 1643 if (!allSingleSucc && 1644 // FIXME: REEVALUTE THIS. 1645 !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) { 1646 assert(NewInsts.empty() && "Should not have inserted instructions"); 1647 return false; 1648 } 1649 1650 // Okay, we can eliminate this load by inserting a reload in the predecessor 1651 // and using PHI construction to get the value in the other predecessors, do 1652 // it. 1653 DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1654 DEBUG(if (!NewInsts.empty()) 1655 errs() << "INSERTED " << NewInsts.size() << " INSTS: " 1656 << *NewInsts.back() << '\n'); 1657 1658 Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1659 LI->getAlignment(), 1660 UnavailablePred->getTerminator()); 1661 1662 // Add the newly created load. 1663 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad)); 1664 1665 // Perform PHI construction. 1666 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, 1667 VN.getAliasAnalysis()); 1668 LI->replaceAllUsesWith(V); 1669 if (isa<PHINode>(V)) 1670 V->takeName(LI); 1671 if (isa<PointerType>(V->getType())) 1672 MD->invalidateCachedPointerInfo(V); 1673 toErase.push_back(LI); 1674 NumPRELoad++; 1675 return true; 1676} 1677 1678/// processLoad - Attempt to eliminate a load, first by eliminating it 1679/// locally, and then attempting non-local elimination if that fails. 1680bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1681 if (!MD) 1682 return false; 1683 1684 if (L->isVolatile()) 1685 return false; 1686 1687 // ... to a pointer that has been loaded from before... 1688 MemDepResult Dep = MD->getDependency(L); 1689 1690 // If the value isn't available, don't do anything! 1691 if (Dep.isClobber()) { 1692 // Check to see if we have something like this: 1693 // store i32 123, i32* %P 1694 // %A = bitcast i32* %P to i8* 1695 // %B = gep i8* %A, i32 1 1696 // %C = load i8* %B 1697 // 1698 // We could do that by recognizing if the clobber instructions are obviously 1699 // a common base + constant offset, and if the previous store (or memset) 1700 // completely covers this load. This sort of thing can happen in bitfield 1701 // access code. 1702 Value *AvailVal = 0; 1703 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) 1704 if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { 1705 int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD); 1706 if (Offset != -1) 1707 AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, 1708 L->getType(), L, *TD); 1709 } 1710 1711 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1712 // a value on from it. 1713 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1714 if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { 1715 int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD); 1716 if (Offset != -1) 1717 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); 1718 } 1719 } 1720 1721 if (AvailVal) { 1722 DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1723 << *AvailVal << '\n' << *L << "\n\n\n"); 1724 1725 // Replace the load! 1726 L->replaceAllUsesWith(AvailVal); 1727 if (isa<PointerType>(AvailVal->getType())) 1728 MD->invalidateCachedPointerInfo(AvailVal); 1729 toErase.push_back(L); 1730 NumGVNLoad++; 1731 return true; 1732 } 1733 1734 DEBUG( 1735 // fast print dep, using operator<< on instruction would be too slow 1736 errs() << "GVN: load "; 1737 WriteAsOperand(errs(), L); 1738 Instruction *I = Dep.getInst(); 1739 errs() << " is clobbered by " << *I << '\n'; 1740 ); 1741 return false; 1742 } 1743 1744 // If it is defined in another block, try harder. 1745 if (Dep.isNonLocal()) 1746 return processNonLocalLoad(L, toErase); 1747 1748 Instruction *DepInst = Dep.getInst(); 1749 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1750 Value *StoredVal = DepSI->getOperand(0); 1751 1752 // The store and load are to a must-aliased pointer, but they may not 1753 // actually have the same type. See if we know how to reuse the stored 1754 // value (depending on its type). 1755 const TargetData *TD = 0; 1756 if (StoredVal->getType() != L->getType()) { 1757 if ((TD = getAnalysisIfAvailable<TargetData>())) { 1758 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 1759 L, *TD); 1760 if (StoredVal == 0) 1761 return false; 1762 1763 DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1764 << '\n' << *L << "\n\n\n"); 1765 } 1766 else 1767 return false; 1768 } 1769 1770 // Remove it! 1771 L->replaceAllUsesWith(StoredVal); 1772 if (isa<PointerType>(StoredVal->getType())) 1773 MD->invalidateCachedPointerInfo(StoredVal); 1774 toErase.push_back(L); 1775 NumGVNLoad++; 1776 return true; 1777 } 1778 1779 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1780 Value *AvailableVal = DepLI; 1781 1782 // The loads are of a must-aliased pointer, but they may not actually have 1783 // the same type. See if we know how to reuse the previously loaded value 1784 // (depending on its type). 1785 const TargetData *TD = 0; 1786 if (DepLI->getType() != L->getType()) { 1787 if ((TD = getAnalysisIfAvailable<TargetData>())) { 1788 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); 1789 if (AvailableVal == 0) 1790 return false; 1791 1792 DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1793 << "\n" << *L << "\n\n\n"); 1794 } 1795 else 1796 return false; 1797 } 1798 1799 // Remove it! 1800 L->replaceAllUsesWith(AvailableVal); 1801 if (isa<PointerType>(DepLI->getType())) 1802 MD->invalidateCachedPointerInfo(DepLI); 1803 toErase.push_back(L); 1804 NumGVNLoad++; 1805 return true; 1806 } 1807 1808 // If this load really doesn't depend on anything, then we must be loading an 1809 // undef value. This can happen when loading for a fresh allocation with no 1810 // intervening stores, for example. 1811 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { 1812 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1813 toErase.push_back(L); 1814 NumGVNLoad++; 1815 return true; 1816 } 1817 1818 // If this load occurs either right after a lifetime begin, 1819 // then the loaded value is undefined. 1820 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { 1821 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1822 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1823 toErase.push_back(L); 1824 NumGVNLoad++; 1825 return true; 1826 } 1827 } 1828 1829 return false; 1830} 1831 1832Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { 1833 DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB); 1834 if (I == localAvail.end()) 1835 return 0; 1836 1837 ValueNumberScope *Locals = I->second; 1838 while (Locals) { 1839 DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num); 1840 if (I != Locals->table.end()) 1841 return I->second; 1842 Locals = Locals->parent; 1843 } 1844 1845 return 0; 1846} 1847 1848 1849/// processInstruction - When calculating availability, handle an instruction 1850/// by inserting it into the appropriate sets 1851bool GVN::processInstruction(Instruction *I, 1852 SmallVectorImpl<Instruction*> &toErase) { 1853 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1854 bool Changed = processLoad(LI, toErase); 1855 1856 if (!Changed) { 1857 unsigned Num = VN.lookup_or_add(LI); 1858 localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); 1859 } 1860 1861 return Changed; 1862 } 1863 1864 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1865 unsigned Num = VN.lookup_or_add(I); 1866 1867 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1868 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1869 1870 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 1871 return false; 1872 1873 Value *BranchCond = BI->getCondition(); 1874 uint32_t CondVN = VN.lookup_or_add(BranchCond); 1875 1876 BasicBlock *TrueSucc = BI->getSuccessor(0); 1877 BasicBlock *FalseSucc = BI->getSuccessor(1); 1878 1879 if (TrueSucc->getSinglePredecessor()) 1880 localAvail[TrueSucc]->table[CondVN] = 1881 ConstantInt::getTrue(TrueSucc->getContext()); 1882 if (FalseSucc->getSinglePredecessor()) 1883 localAvail[FalseSucc]->table[CondVN] = 1884 ConstantInt::getFalse(TrueSucc->getContext()); 1885 1886 return false; 1887 1888 // Allocations are always uniquely numbered, so we can save time and memory 1889 // by fast failing them. 1890 } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) { 1891 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1892 return false; 1893 } 1894 1895 // Collapse PHI nodes 1896 if (PHINode* p = dyn_cast<PHINode>(I)) { 1897 Value *constVal = CollapsePhi(p); 1898 1899 if (constVal) { 1900 p->replaceAllUsesWith(constVal); 1901 if (MD && isa<PointerType>(constVal->getType())) 1902 MD->invalidateCachedPointerInfo(constVal); 1903 VN.erase(p); 1904 1905 toErase.push_back(p); 1906 } else { 1907 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1908 } 1909 1910 // If the number we were assigned was a brand new VN, then we don't 1911 // need to do a lookup to see if the number already exists 1912 // somewhere in the domtree: it can't! 1913 } else if (Num == NextNum) { 1914 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1915 1916 // Perform fast-path value-number based elimination of values inherited from 1917 // dominators. 1918 } else if (Value *repl = lookupNumber(I->getParent(), Num)) { 1919 // Remove it! 1920 VN.erase(I); 1921 I->replaceAllUsesWith(repl); 1922 if (MD && isa<PointerType>(repl->getType())) 1923 MD->invalidateCachedPointerInfo(repl); 1924 toErase.push_back(I); 1925 return true; 1926 1927 } else { 1928 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1929 } 1930 1931 return false; 1932} 1933 1934/// runOnFunction - This is the main transformation entry point for a function. 1935bool GVN::runOnFunction(Function& F) { 1936 if (!NoLoads) 1937 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1938 DT = &getAnalysis<DominatorTree>(); 1939 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1940 VN.setMemDep(MD); 1941 VN.setDomTree(DT); 1942 1943 bool Changed = false; 1944 bool ShouldContinue = true; 1945 1946 // Merge unconditional branches, allowing PRE to catch more 1947 // optimization opportunities. 1948 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1949 BasicBlock *BB = FI; 1950 ++FI; 1951 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1952 if (removedBlock) NumGVNBlocks++; 1953 1954 Changed |= removedBlock; 1955 } 1956 1957 unsigned Iteration = 0; 1958 1959 while (ShouldContinue) { 1960 DEBUG(errs() << "GVN iteration: " << Iteration << "\n"); 1961 ShouldContinue = iterateOnFunction(F); 1962 Changed |= ShouldContinue; 1963 ++Iteration; 1964 } 1965 1966 if (EnablePRE) { 1967 bool PREChanged = true; 1968 while (PREChanged) { 1969 PREChanged = performPRE(F); 1970 Changed |= PREChanged; 1971 } 1972 } 1973 // FIXME: Should perform GVN again after PRE does something. PRE can move 1974 // computations into blocks where they become fully redundant. Note that 1975 // we can't do this until PRE's critical edge splitting updates memdep. 1976 // Actually, when this happens, we should just fully integrate PRE into GVN. 1977 1978 cleanupGlobalSets(); 1979 1980 return Changed; 1981} 1982 1983 1984bool GVN::processBlock(BasicBlock *BB) { 1985 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 1986 // incrementing BI before processing an instruction). 1987 SmallVector<Instruction*, 8> toErase; 1988 bool ChangedFunction = false; 1989 1990 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1991 BI != BE;) { 1992 ChangedFunction |= processInstruction(BI, toErase); 1993 if (toErase.empty()) { 1994 ++BI; 1995 continue; 1996 } 1997 1998 // If we need some instructions deleted, do it now. 1999 NumGVNInstr += toErase.size(); 2000 2001 // Avoid iterator invalidation. 2002 bool AtStart = BI == BB->begin(); 2003 if (!AtStart) 2004 --BI; 2005 2006 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 2007 E = toErase.end(); I != E; ++I) { 2008 DEBUG(errs() << "GVN removed: " << **I << '\n'); 2009 if (MD) MD->removeInstruction(*I); 2010 (*I)->eraseFromParent(); 2011 DEBUG(verifyRemoved(*I)); 2012 } 2013 toErase.clear(); 2014 2015 if (AtStart) 2016 BI = BB->begin(); 2017 else 2018 ++BI; 2019 } 2020 2021 return ChangedFunction; 2022} 2023 2024/// performPRE - Perform a purely local form of PRE that looks for diamond 2025/// control flow patterns and attempts to perform simple PRE at the join point. 2026bool GVN::performPRE(Function &F) { 2027 bool Changed = false; 2028 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 2029 DenseMap<BasicBlock*, Value*> predMap; 2030 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 2031 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 2032 BasicBlock *CurrentBlock = *DI; 2033 2034 // Nothing to PRE in the entry block. 2035 if (CurrentBlock == &F.getEntryBlock()) continue; 2036 2037 for (BasicBlock::iterator BI = CurrentBlock->begin(), 2038 BE = CurrentBlock->end(); BI != BE; ) { 2039 Instruction *CurInst = BI++; 2040 2041 if (isa<AllocaInst>(CurInst) || 2042 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 2043 CurInst->getType()->isVoidTy() || 2044 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 2045 isa<DbgInfoIntrinsic>(CurInst)) 2046 continue; 2047 2048 uint32_t ValNo = VN.lookup(CurInst); 2049 2050 // Look for the predecessors for PRE opportunities. We're 2051 // only trying to solve the basic diamond case, where 2052 // a value is computed in the successor and one predecessor, 2053 // but not the other. We also explicitly disallow cases 2054 // where the successor is its own predecessor, because they're 2055 // more complicated to get right. 2056 unsigned NumWith = 0; 2057 unsigned NumWithout = 0; 2058 BasicBlock *PREPred = 0; 2059 predMap.clear(); 2060 2061 for (pred_iterator PI = pred_begin(CurrentBlock), 2062 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 2063 // We're not interested in PRE where the block is its 2064 // own predecessor, on in blocks with predecessors 2065 // that are not reachable. 2066 if (*PI == CurrentBlock) { 2067 NumWithout = 2; 2068 break; 2069 } else if (!localAvail.count(*PI)) { 2070 NumWithout = 2; 2071 break; 2072 } 2073 2074 DenseMap<uint32_t, Value*>::iterator predV = 2075 localAvail[*PI]->table.find(ValNo); 2076 if (predV == localAvail[*PI]->table.end()) { 2077 PREPred = *PI; 2078 NumWithout++; 2079 } else if (predV->second == CurInst) { 2080 NumWithout = 2; 2081 } else { 2082 predMap[*PI] = predV->second; 2083 NumWith++; 2084 } 2085 } 2086 2087 // Don't do PRE when it might increase code size, i.e. when 2088 // we would need to insert instructions in more than one pred. 2089 if (NumWithout != 1 || NumWith == 0) 2090 continue; 2091 2092 // Don't do PRE across indirect branch. 2093 if (isa<IndirectBrInst>(PREPred->getTerminator())) 2094 continue; 2095 2096 // We can't do PRE safely on a critical edge, so instead we schedule 2097 // the edge to be split and perform the PRE the next time we iterate 2098 // on the function. 2099 unsigned SuccNum = 0; 2100 for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); 2101 i != e; ++i) 2102 if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { 2103 SuccNum = i; 2104 break; 2105 } 2106 2107 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 2108 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 2109 continue; 2110 } 2111 2112 // Instantiate the expression the in predecessor that lacked it. 2113 // Because we are going top-down through the block, all value numbers 2114 // will be available in the predecessor by the time we need them. Any 2115 // that weren't original present will have been instantiated earlier 2116 // in this loop. 2117 Instruction *PREInstr = CurInst->clone(); 2118 bool success = true; 2119 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 2120 Value *Op = PREInstr->getOperand(i); 2121 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 2122 continue; 2123 2124 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { 2125 PREInstr->setOperand(i, V); 2126 } else { 2127 success = false; 2128 break; 2129 } 2130 } 2131 2132 // Fail out if we encounter an operand that is not available in 2133 // the PRE predecessor. This is typically because of loads which 2134 // are not value numbered precisely. 2135 if (!success) { 2136 delete PREInstr; 2137 DEBUG(verifyRemoved(PREInstr)); 2138 continue; 2139 } 2140 2141 PREInstr->insertBefore(PREPred->getTerminator()); 2142 PREInstr->setName(CurInst->getName() + ".pre"); 2143 predMap[PREPred] = PREInstr; 2144 VN.add(PREInstr, ValNo); 2145 NumGVNPRE++; 2146 2147 // Update the availability map to include the new instruction. 2148 localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); 2149 2150 // Create a PHI to make the value available in this block. 2151 PHINode* Phi = PHINode::Create(CurInst->getType(), 2152 CurInst->getName() + ".pre-phi", 2153 CurrentBlock->begin()); 2154 for (pred_iterator PI = pred_begin(CurrentBlock), 2155 PE = pred_end(CurrentBlock); PI != PE; ++PI) 2156 Phi->addIncoming(predMap[*PI], *PI); 2157 2158 VN.add(Phi, ValNo); 2159 localAvail[CurrentBlock]->table[ValNo] = Phi; 2160 2161 CurInst->replaceAllUsesWith(Phi); 2162 if (MD && isa<PointerType>(Phi->getType())) 2163 MD->invalidateCachedPointerInfo(Phi); 2164 VN.erase(CurInst); 2165 2166 DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); 2167 if (MD) MD->removeInstruction(CurInst); 2168 CurInst->eraseFromParent(); 2169 DEBUG(verifyRemoved(CurInst)); 2170 Changed = true; 2171 } 2172 } 2173 2174 for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator 2175 I = toSplit.begin(), E = toSplit.end(); I != E; ++I) 2176 SplitCriticalEdge(I->first, I->second, this); 2177 2178 return Changed || toSplit.size(); 2179} 2180 2181/// iterateOnFunction - Executes one iteration of GVN 2182bool GVN::iterateOnFunction(Function &F) { 2183 cleanupGlobalSets(); 2184 2185 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2186 DE = df_end(DT->getRootNode()); DI != DE; ++DI) { 2187 if (DI->getIDom()) 2188 localAvail[DI->getBlock()] = 2189 new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); 2190 else 2191 localAvail[DI->getBlock()] = new ValueNumberScope(0); 2192 } 2193 2194 // Top-down walk of the dominator tree 2195 bool Changed = false; 2196#if 0 2197 // Needed for value numbering with phi construction to work. 2198 ReversePostOrderTraversal<Function*> RPOT(&F); 2199 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 2200 RE = RPOT.end(); RI != RE; ++RI) 2201 Changed |= processBlock(*RI); 2202#else 2203 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2204 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 2205 Changed |= processBlock(DI->getBlock()); 2206#endif 2207 2208 return Changed; 2209} 2210 2211void GVN::cleanupGlobalSets() { 2212 VN.clear(); 2213 2214 for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator 2215 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) 2216 delete I->second; 2217 localAvail.clear(); 2218} 2219 2220/// verifyRemoved - Verify that the specified instruction does not occur in our 2221/// internal data structures. 2222void GVN::verifyRemoved(const Instruction *Inst) const { 2223 VN.verifyRemoved(Inst); 2224 2225 // Walk through the value number scope to make sure the instruction isn't 2226 // ferreted away in it. 2227 for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator 2228 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { 2229 const ValueNumberScope *VNS = I->second; 2230 2231 while (VNS) { 2232 for (DenseMap<uint32_t, Value*>::const_iterator 2233 II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { 2234 assert(II->second != Inst && "Inst still in value numbering scope!"); 2235 } 2236 2237 VNS = VNS->parent; 2238 } 2239 } 2240} 2241