GVN.cpp revision d32c4a1756b3c236b7f96cf11ba61e78ceb862bc
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/BasicBlock.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/IntrinsicInst.h" 26#include "llvm/LLVMContext.h" 27#include "llvm/Operator.h" 28#include "llvm/Value.h" 29#include "llvm/ADT/DenseMap.h" 30#include "llvm/ADT/DepthFirstIterator.h" 31#include "llvm/ADT/PostOrderIterator.h" 32#include "llvm/ADT/SmallPtrSet.h" 33#include "llvm/ADT/SmallVector.h" 34#include "llvm/ADT/Statistic.h" 35#include "llvm/Analysis/AliasAnalysis.h" 36#include "llvm/Analysis/ConstantFolding.h" 37#include "llvm/Analysis/Dominators.h" 38#include "llvm/Analysis/InstructionSimplify.h" 39#include "llvm/Analysis/Loads.h" 40#include "llvm/Analysis/MemoryBuiltins.h" 41#include "llvm/Analysis/MemoryDependenceAnalysis.h" 42#include "llvm/Analysis/PHITransAddr.h" 43#include "llvm/Support/CFG.h" 44#include "llvm/Support/CommandLine.h" 45#include "llvm/Support/Debug.h" 46#include "llvm/Support/ErrorHandling.h" 47#include "llvm/Support/GetElementPtrTypeIterator.h" 48#include "llvm/Support/IRBuilder.h" 49#include "llvm/Support/raw_ostream.h" 50#include "llvm/Target/TargetData.h" 51#include "llvm/Transforms/Utils/BasicBlockUtils.h" 52#include "llvm/Transforms/Utils/Local.h" 53#include "llvm/Transforms/Utils/SSAUpdater.h" 54using namespace llvm; 55 56STATISTIC(NumGVNInstr, "Number of instructions deleted"); 57STATISTIC(NumGVNLoad, "Number of loads deleted"); 58STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 59STATISTIC(NumGVNBlocks, "Number of blocks merged"); 60STATISTIC(NumPRELoad, "Number of loads PRE'd"); 61 62static cl::opt<bool> EnablePRE("enable-pre", 63 cl::init(true), cl::Hidden); 64static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 65 66//===----------------------------------------------------------------------===// 67// ValueTable Class 68//===----------------------------------------------------------------------===// 69 70/// This class holds the mapping between values and value numbers. It is used 71/// as an efficient mechanism to determine the expression-wise equivalence of 72/// two values. 73namespace { 74 struct Expression { 75 enum ExpressionOpcode { 76 ADD = Instruction::Add, 77 FADD = Instruction::FAdd, 78 SUB = Instruction::Sub, 79 FSUB = Instruction::FSub, 80 MUL = Instruction::Mul, 81 FMUL = Instruction::FMul, 82 UDIV = Instruction::UDiv, 83 SDIV = Instruction::SDiv, 84 FDIV = Instruction::FDiv, 85 UREM = Instruction::URem, 86 SREM = Instruction::SRem, 87 FREM = Instruction::FRem, 88 SHL = Instruction::Shl, 89 LSHR = Instruction::LShr, 90 ASHR = Instruction::AShr, 91 AND = Instruction::And, 92 OR = Instruction::Or, 93 XOR = Instruction::Xor, 94 TRUNC = Instruction::Trunc, 95 ZEXT = Instruction::ZExt, 96 SEXT = Instruction::SExt, 97 FPTOUI = Instruction::FPToUI, 98 FPTOSI = Instruction::FPToSI, 99 UITOFP = Instruction::UIToFP, 100 SITOFP = Instruction::SIToFP, 101 FPTRUNC = Instruction::FPTrunc, 102 FPEXT = Instruction::FPExt, 103 PTRTOINT = Instruction::PtrToInt, 104 INTTOPTR = Instruction::IntToPtr, 105 BITCAST = Instruction::BitCast, 106 ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 107 ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 108 FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 109 FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 110 FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, 111 SHUFFLE, SELECT, GEP, CALL, CONSTANT, 112 INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; 113 114 ExpressionOpcode opcode; 115 const Type* type; 116 SmallVector<uint32_t, 4> varargs; 117 Value *function; 118 119 Expression() { } 120 Expression(ExpressionOpcode o) : opcode(o) { } 121 122 bool operator==(const Expression &other) const { 123 if (opcode != other.opcode) 124 return false; 125 else if (opcode == EMPTY || opcode == TOMBSTONE) 126 return true; 127 else if (type != other.type) 128 return false; 129 else if (function != other.function) 130 return false; 131 else { 132 if (varargs.size() != other.varargs.size()) 133 return false; 134 135 for (size_t i = 0; i < varargs.size(); ++i) 136 if (varargs[i] != other.varargs[i]) 137 return false; 138 139 return true; 140 } 141 } 142 143 /*bool operator!=(const Expression &other) const { 144 return !(*this == other); 145 }*/ 146 }; 147 148 class ValueTable { 149 private: 150 DenseMap<Value*, uint32_t> valueNumbering; 151 DenseMap<Expression, uint32_t> expressionNumbering; 152 AliasAnalysis* AA; 153 MemoryDependenceAnalysis* MD; 154 DominatorTree* DT; 155 156 uint32_t nextValueNumber; 157 158 Expression::ExpressionOpcode getOpcode(CmpInst* C); 159 Expression create_expression(BinaryOperator* BO); 160 Expression create_expression(CmpInst* C); 161 Expression create_expression(ShuffleVectorInst* V); 162 Expression create_expression(ExtractElementInst* C); 163 Expression create_expression(InsertElementInst* V); 164 Expression create_expression(SelectInst* V); 165 Expression create_expression(CastInst* C); 166 Expression create_expression(GetElementPtrInst* G); 167 Expression create_expression(CallInst* C); 168 Expression create_expression(ExtractValueInst* C); 169 Expression create_expression(InsertValueInst* C); 170 171 uint32_t lookup_or_add_call(CallInst* C); 172 public: 173 ValueTable() : nextValueNumber(1) { } 174 uint32_t lookup_or_add(Value *V); 175 uint32_t lookup(Value *V) const; 176 void add(Value *V, uint32_t num); 177 void clear(); 178 void erase(Value *v); 179 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 180 AliasAnalysis *getAliasAnalysis() const { return AA; } 181 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 182 void setDomTree(DominatorTree* D) { DT = D; } 183 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 184 void verifyRemoved(const Value *) const; 185 }; 186} 187 188namespace llvm { 189template <> struct DenseMapInfo<Expression> { 190 static inline Expression getEmptyKey() { 191 return Expression(Expression::EMPTY); 192 } 193 194 static inline Expression getTombstoneKey() { 195 return Expression(Expression::TOMBSTONE); 196 } 197 198 static unsigned getHashValue(const Expression e) { 199 unsigned hash = e.opcode; 200 201 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 202 (unsigned)((uintptr_t)e.type >> 9)); 203 204 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 205 E = e.varargs.end(); I != E; ++I) 206 hash = *I + hash * 37; 207 208 hash = ((unsigned)((uintptr_t)e.function >> 4) ^ 209 (unsigned)((uintptr_t)e.function >> 9)) + 210 hash * 37; 211 212 return hash; 213 } 214 static bool isEqual(const Expression &LHS, const Expression &RHS) { 215 return LHS == RHS; 216 } 217}; 218 219template <> 220struct isPodLike<Expression> { static const bool value = true; }; 221 222} 223 224//===----------------------------------------------------------------------===// 225// ValueTable Internal Functions 226//===----------------------------------------------------------------------===// 227 228Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { 229 if (isa<ICmpInst>(C)) { 230 switch (C->getPredicate()) { 231 default: // THIS SHOULD NEVER HAPPEN 232 llvm_unreachable("Comparison with unknown predicate?"); 233 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; 234 case ICmpInst::ICMP_NE: return Expression::ICMPNE; 235 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; 236 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; 237 case ICmpInst::ICMP_ULT: return Expression::ICMPULT; 238 case ICmpInst::ICMP_ULE: return Expression::ICMPULE; 239 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; 240 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; 241 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; 242 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; 243 } 244 } else { 245 switch (C->getPredicate()) { 246 default: // THIS SHOULD NEVER HAPPEN 247 llvm_unreachable("Comparison with unknown predicate?"); 248 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; 249 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; 250 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; 251 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; 252 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; 253 case FCmpInst::FCMP_ONE: return Expression::FCMPONE; 254 case FCmpInst::FCMP_ORD: return Expression::FCMPORD; 255 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; 256 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; 257 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; 258 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; 259 case FCmpInst::FCMP_ULT: return Expression::FCMPULT; 260 case FCmpInst::FCMP_ULE: return Expression::FCMPULE; 261 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; 262 } 263 } 264} 265 266Expression ValueTable::create_expression(CallInst* C) { 267 Expression e; 268 269 e.type = C->getType(); 270 e.function = C->getCalledFunction(); 271 e.opcode = Expression::CALL; 272 273 CallSite CS(C); 274 for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end(); 275 I != E; ++I) 276 e.varargs.push_back(lookup_or_add(*I)); 277 278 return e; 279} 280 281Expression ValueTable::create_expression(BinaryOperator* BO) { 282 Expression e; 283 e.varargs.push_back(lookup_or_add(BO->getOperand(0))); 284 e.varargs.push_back(lookup_or_add(BO->getOperand(1))); 285 e.function = 0; 286 e.type = BO->getType(); 287 e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode()); 288 289 return e; 290} 291 292Expression ValueTable::create_expression(CmpInst* C) { 293 Expression e; 294 295 e.varargs.push_back(lookup_or_add(C->getOperand(0))); 296 e.varargs.push_back(lookup_or_add(C->getOperand(1))); 297 e.function = 0; 298 e.type = C->getType(); 299 e.opcode = getOpcode(C); 300 301 return e; 302} 303 304Expression ValueTable::create_expression(CastInst* C) { 305 Expression e; 306 307 e.varargs.push_back(lookup_or_add(C->getOperand(0))); 308 e.function = 0; 309 e.type = C->getType(); 310 e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode()); 311 312 return e; 313} 314 315Expression ValueTable::create_expression(ShuffleVectorInst* S) { 316 Expression e; 317 318 e.varargs.push_back(lookup_or_add(S->getOperand(0))); 319 e.varargs.push_back(lookup_or_add(S->getOperand(1))); 320 e.varargs.push_back(lookup_or_add(S->getOperand(2))); 321 e.function = 0; 322 e.type = S->getType(); 323 e.opcode = Expression::SHUFFLE; 324 325 return e; 326} 327 328Expression ValueTable::create_expression(ExtractElementInst* E) { 329 Expression e; 330 331 e.varargs.push_back(lookup_or_add(E->getOperand(0))); 332 e.varargs.push_back(lookup_or_add(E->getOperand(1))); 333 e.function = 0; 334 e.type = E->getType(); 335 e.opcode = Expression::EXTRACT; 336 337 return e; 338} 339 340Expression ValueTable::create_expression(InsertElementInst* I) { 341 Expression e; 342 343 e.varargs.push_back(lookup_or_add(I->getOperand(0))); 344 e.varargs.push_back(lookup_or_add(I->getOperand(1))); 345 e.varargs.push_back(lookup_or_add(I->getOperand(2))); 346 e.function = 0; 347 e.type = I->getType(); 348 e.opcode = Expression::INSERT; 349 350 return e; 351} 352 353Expression ValueTable::create_expression(SelectInst* I) { 354 Expression e; 355 356 e.varargs.push_back(lookup_or_add(I->getCondition())); 357 e.varargs.push_back(lookup_or_add(I->getTrueValue())); 358 e.varargs.push_back(lookup_or_add(I->getFalseValue())); 359 e.function = 0; 360 e.type = I->getType(); 361 e.opcode = Expression::SELECT; 362 363 return e; 364} 365 366Expression ValueTable::create_expression(GetElementPtrInst* G) { 367 Expression e; 368 369 e.varargs.push_back(lookup_or_add(G->getPointerOperand())); 370 e.function = 0; 371 e.type = G->getType(); 372 e.opcode = Expression::GEP; 373 374 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); 375 I != E; ++I) 376 e.varargs.push_back(lookup_or_add(*I)); 377 378 return e; 379} 380 381Expression ValueTable::create_expression(ExtractValueInst* E) { 382 Expression e; 383 384 e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); 385 for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 386 II != IE; ++II) 387 e.varargs.push_back(*II); 388 e.function = 0; 389 e.type = E->getType(); 390 e.opcode = Expression::EXTRACTVALUE; 391 392 return e; 393} 394 395Expression ValueTable::create_expression(InsertValueInst* E) { 396 Expression e; 397 398 e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); 399 e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); 400 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 401 II != IE; ++II) 402 e.varargs.push_back(*II); 403 e.function = 0; 404 e.type = E->getType(); 405 e.opcode = Expression::INSERTVALUE; 406 407 return e; 408} 409 410//===----------------------------------------------------------------------===// 411// ValueTable External Functions 412//===----------------------------------------------------------------------===// 413 414/// add - Insert a value into the table with a specified value number. 415void ValueTable::add(Value *V, uint32_t num) { 416 valueNumbering.insert(std::make_pair(V, num)); 417} 418 419uint32_t ValueTable::lookup_or_add_call(CallInst* C) { 420 if (AA->doesNotAccessMemory(C)) { 421 Expression exp = create_expression(C); 422 uint32_t& e = expressionNumbering[exp]; 423 if (!e) e = nextValueNumber++; 424 valueNumbering[C] = e; 425 return e; 426 } else if (AA->onlyReadsMemory(C)) { 427 Expression exp = create_expression(C); 428 uint32_t& e = expressionNumbering[exp]; 429 if (!e) { 430 e = nextValueNumber++; 431 valueNumbering[C] = e; 432 return e; 433 } 434 if (!MD) { 435 e = nextValueNumber++; 436 valueNumbering[C] = e; 437 return e; 438 } 439 440 MemDepResult local_dep = MD->getDependency(C); 441 442 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 443 valueNumbering[C] = nextValueNumber; 444 return nextValueNumber++; 445 } 446 447 if (local_dep.isDef()) { 448 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 449 450 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 451 valueNumbering[C] = nextValueNumber; 452 return nextValueNumber++; 453 } 454 455 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 456 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 457 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); 458 if (c_vn != cd_vn) { 459 valueNumbering[C] = nextValueNumber; 460 return nextValueNumber++; 461 } 462 } 463 464 uint32_t v = lookup_or_add(local_cdep); 465 valueNumbering[C] = v; 466 return v; 467 } 468 469 // Non-local case. 470 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 471 MD->getNonLocalCallDependency(CallSite(C)); 472 // FIXME: call/call dependencies for readonly calls should return def, not 473 // clobber! Move the checking logic to MemDep! 474 CallInst* cdep = 0; 475 476 // Check to see if we have a single dominating call instruction that is 477 // identical to C. 478 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 479 const NonLocalDepEntry *I = &deps[i]; 480 // Ignore non-local dependencies. 481 if (I->getResult().isNonLocal()) 482 continue; 483 484 // We don't handle non-depedencies. If we already have a call, reject 485 // instruction dependencies. 486 if (I->getResult().isClobber() || cdep != 0) { 487 cdep = 0; 488 break; 489 } 490 491 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 492 // FIXME: All duplicated with non-local case. 493 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 494 cdep = NonLocalDepCall; 495 continue; 496 } 497 498 cdep = 0; 499 break; 500 } 501 502 if (!cdep) { 503 valueNumbering[C] = nextValueNumber; 504 return nextValueNumber++; 505 } 506 507 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 508 valueNumbering[C] = nextValueNumber; 509 return nextValueNumber++; 510 } 511 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 512 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 513 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); 514 if (c_vn != cd_vn) { 515 valueNumbering[C] = nextValueNumber; 516 return nextValueNumber++; 517 } 518 } 519 520 uint32_t v = lookup_or_add(cdep); 521 valueNumbering[C] = v; 522 return v; 523 524 } else { 525 valueNumbering[C] = nextValueNumber; 526 return nextValueNumber++; 527 } 528} 529 530/// lookup_or_add - Returns the value number for the specified value, assigning 531/// it a new number if it did not have one before. 532uint32_t ValueTable::lookup_or_add(Value *V) { 533 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 534 if (VI != valueNumbering.end()) 535 return VI->second; 536 537 if (!isa<Instruction>(V)) { 538 valueNumbering[V] = nextValueNumber; 539 return nextValueNumber++; 540 } 541 542 Instruction* I = cast<Instruction>(V); 543 Expression exp; 544 switch (I->getOpcode()) { 545 case Instruction::Call: 546 return lookup_or_add_call(cast<CallInst>(I)); 547 case Instruction::Add: 548 case Instruction::FAdd: 549 case Instruction::Sub: 550 case Instruction::FSub: 551 case Instruction::Mul: 552 case Instruction::FMul: 553 case Instruction::UDiv: 554 case Instruction::SDiv: 555 case Instruction::FDiv: 556 case Instruction::URem: 557 case Instruction::SRem: 558 case Instruction::FRem: 559 case Instruction::Shl: 560 case Instruction::LShr: 561 case Instruction::AShr: 562 case Instruction::And: 563 case Instruction::Or : 564 case Instruction::Xor: 565 exp = create_expression(cast<BinaryOperator>(I)); 566 break; 567 case Instruction::ICmp: 568 case Instruction::FCmp: 569 exp = create_expression(cast<CmpInst>(I)); 570 break; 571 case Instruction::Trunc: 572 case Instruction::ZExt: 573 case Instruction::SExt: 574 case Instruction::FPToUI: 575 case Instruction::FPToSI: 576 case Instruction::UIToFP: 577 case Instruction::SIToFP: 578 case Instruction::FPTrunc: 579 case Instruction::FPExt: 580 case Instruction::PtrToInt: 581 case Instruction::IntToPtr: 582 case Instruction::BitCast: 583 exp = create_expression(cast<CastInst>(I)); 584 break; 585 case Instruction::Select: 586 exp = create_expression(cast<SelectInst>(I)); 587 break; 588 case Instruction::ExtractElement: 589 exp = create_expression(cast<ExtractElementInst>(I)); 590 break; 591 case Instruction::InsertElement: 592 exp = create_expression(cast<InsertElementInst>(I)); 593 break; 594 case Instruction::ShuffleVector: 595 exp = create_expression(cast<ShuffleVectorInst>(I)); 596 break; 597 case Instruction::ExtractValue: 598 exp = create_expression(cast<ExtractValueInst>(I)); 599 break; 600 case Instruction::InsertValue: 601 exp = create_expression(cast<InsertValueInst>(I)); 602 break; 603 case Instruction::GetElementPtr: 604 exp = create_expression(cast<GetElementPtrInst>(I)); 605 break; 606 default: 607 valueNumbering[V] = nextValueNumber; 608 return nextValueNumber++; 609 } 610 611 uint32_t& e = expressionNumbering[exp]; 612 if (!e) e = nextValueNumber++; 613 valueNumbering[V] = e; 614 return e; 615} 616 617/// lookup - Returns the value number of the specified value. Fails if 618/// the value has not yet been numbered. 619uint32_t ValueTable::lookup(Value *V) const { 620 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 621 assert(VI != valueNumbering.end() && "Value not numbered?"); 622 return VI->second; 623} 624 625/// clear - Remove all entries from the ValueTable 626void ValueTable::clear() { 627 valueNumbering.clear(); 628 expressionNumbering.clear(); 629 nextValueNumber = 1; 630} 631 632/// erase - Remove a value from the value numbering 633void ValueTable::erase(Value *V) { 634 valueNumbering.erase(V); 635} 636 637/// verifyRemoved - Verify that the value is removed from all internal data 638/// structures. 639void ValueTable::verifyRemoved(const Value *V) const { 640 for (DenseMap<Value*, uint32_t>::const_iterator 641 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 642 assert(I->first != V && "Inst still occurs in value numbering map!"); 643 } 644} 645 646//===----------------------------------------------------------------------===// 647// GVN Pass 648//===----------------------------------------------------------------------===// 649 650namespace { 651 struct ValueNumberScope { 652 ValueNumberScope* parent; 653 DenseMap<uint32_t, Value*> table; 654 655 ValueNumberScope(ValueNumberScope* p) : parent(p) { } 656 }; 657} 658 659namespace { 660 661 class GVN : public FunctionPass { 662 bool runOnFunction(Function &F); 663 public: 664 static char ID; // Pass identification, replacement for typeid 665 explicit GVN(bool noloads = false) 666 : FunctionPass(ID), NoLoads(noloads), MD(0) { 667 initializeGVNPass(*PassRegistry::getPassRegistry()); 668 } 669 670 private: 671 bool NoLoads; 672 MemoryDependenceAnalysis *MD; 673 DominatorTree *DT; 674 const TargetData* TD; 675 676 ValueTable VN; 677 DenseMap<BasicBlock*, ValueNumberScope*> localAvail; 678 679 // List of critical edges to be split between iterations. 680 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 681 682 // This transformation requires dominator postdominator info 683 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 684 AU.addRequired<DominatorTree>(); 685 if (!NoLoads) 686 AU.addRequired<MemoryDependenceAnalysis>(); 687 AU.addRequired<AliasAnalysis>(); 688 689 AU.addPreserved<DominatorTree>(); 690 AU.addPreserved<AliasAnalysis>(); 691 } 692 693 // Helper fuctions 694 // FIXME: eliminate or document these better 695 bool processLoad(LoadInst* L, 696 SmallVectorImpl<Instruction*> &toErase); 697 bool processInstruction(Instruction *I, 698 SmallVectorImpl<Instruction*> &toErase); 699 bool processNonLocalLoad(LoadInst* L, 700 SmallVectorImpl<Instruction*> &toErase); 701 bool processBlock(BasicBlock *BB); 702 void dump(DenseMap<uint32_t, Value*>& d); 703 bool iterateOnFunction(Function &F); 704 bool performPRE(Function& F); 705 Value *lookupNumber(BasicBlock *BB, uint32_t num); 706 void cleanupGlobalSets(); 707 void verifyRemoved(const Instruction *I) const; 708 bool splitCriticalEdges(); 709 }; 710 711 char GVN::ID = 0; 712} 713 714// createGVNPass - The public interface to this file... 715FunctionPass *llvm::createGVNPass(bool NoLoads) { 716 return new GVN(NoLoads); 717} 718 719INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) 720INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 721INITIALIZE_PASS_DEPENDENCY(DominatorTree) 722INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 723INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) 724 725void GVN::dump(DenseMap<uint32_t, Value*>& d) { 726 errs() << "{\n"; 727 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 728 E = d.end(); I != E; ++I) { 729 errs() << I->first << "\n"; 730 I->second->dump(); 731 } 732 errs() << "}\n"; 733} 734 735/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 736/// we're analyzing is fully available in the specified block. As we go, keep 737/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 738/// map is actually a tri-state map with the following values: 739/// 0) we know the block *is not* fully available. 740/// 1) we know the block *is* fully available. 741/// 2) we do not know whether the block is fully available or not, but we are 742/// currently speculating that it will be. 743/// 3) we are speculating for this block and have used that to speculate for 744/// other blocks. 745static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 746 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 747 // Optimistically assume that the block is fully available and check to see 748 // if we already know about this block in one lookup. 749 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 750 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 751 752 // If the entry already existed for this block, return the precomputed value. 753 if (!IV.second) { 754 // If this is a speculative "available" value, mark it as being used for 755 // speculation of other blocks. 756 if (IV.first->second == 2) 757 IV.first->second = 3; 758 return IV.first->second != 0; 759 } 760 761 // Otherwise, see if it is fully available in all predecessors. 762 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 763 764 // If this block has no predecessors, it isn't live-in here. 765 if (PI == PE) 766 goto SpeculationFailure; 767 768 for (; PI != PE; ++PI) 769 // If the value isn't fully available in one of our predecessors, then it 770 // isn't fully available in this block either. Undo our previous 771 // optimistic assumption and bail out. 772 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 773 goto SpeculationFailure; 774 775 return true; 776 777// SpeculationFailure - If we get here, we found out that this is not, after 778// all, a fully-available block. We have a problem if we speculated on this and 779// used the speculation to mark other blocks as available. 780SpeculationFailure: 781 char &BBVal = FullyAvailableBlocks[BB]; 782 783 // If we didn't speculate on this, just return with it set to false. 784 if (BBVal == 2) { 785 BBVal = 0; 786 return false; 787 } 788 789 // If we did speculate on this value, we could have blocks set to 1 that are 790 // incorrect. Walk the (transitive) successors of this block and mark them as 791 // 0 if set to one. 792 SmallVector<BasicBlock*, 32> BBWorklist; 793 BBWorklist.push_back(BB); 794 795 do { 796 BasicBlock *Entry = BBWorklist.pop_back_val(); 797 // Note that this sets blocks to 0 (unavailable) if they happen to not 798 // already be in FullyAvailableBlocks. This is safe. 799 char &EntryVal = FullyAvailableBlocks[Entry]; 800 if (EntryVal == 0) continue; // Already unavailable. 801 802 // Mark as unavailable. 803 EntryVal = 0; 804 805 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 806 BBWorklist.push_back(*I); 807 } while (!BBWorklist.empty()); 808 809 return false; 810} 811 812 813/// CanCoerceMustAliasedValueToLoad - Return true if 814/// CoerceAvailableValueToLoadType will succeed. 815static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 816 const Type *LoadTy, 817 const TargetData &TD) { 818 // If the loaded or stored value is an first class array or struct, don't try 819 // to transform them. We need to be able to bitcast to integer. 820 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 821 StoredVal->getType()->isStructTy() || 822 StoredVal->getType()->isArrayTy()) 823 return false; 824 825 // The store has to be at least as big as the load. 826 if (TD.getTypeSizeInBits(StoredVal->getType()) < 827 TD.getTypeSizeInBits(LoadTy)) 828 return false; 829 830 return true; 831} 832 833 834/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 835/// then a load from a must-aliased pointer of a different type, try to coerce 836/// the stored value. LoadedTy is the type of the load we want to replace and 837/// InsertPt is the place to insert new instructions. 838/// 839/// If we can't do it, return null. 840static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 841 const Type *LoadedTy, 842 Instruction *InsertPt, 843 const TargetData &TD) { 844 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) 845 return 0; 846 847 const Type *StoredValTy = StoredVal->getType(); 848 849 uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); 850 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); 851 852 // If the store and reload are the same size, we can always reuse it. 853 if (StoreSize == LoadSize) { 854 if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { 855 // Pointer to Pointer -> use bitcast. 856 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 857 } 858 859 // Convert source pointers to integers, which can be bitcast. 860 if (StoredValTy->isPointerTy()) { 861 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 862 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 863 } 864 865 const Type *TypeToCastTo = LoadedTy; 866 if (TypeToCastTo->isPointerTy()) 867 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); 868 869 if (StoredValTy != TypeToCastTo) 870 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 871 872 // Cast to pointer if the load needs a pointer type. 873 if (LoadedTy->isPointerTy()) 874 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 875 876 return StoredVal; 877 } 878 879 // If the loaded value is smaller than the available value, then we can 880 // extract out a piece from it. If the available value is too small, then we 881 // can't do anything. 882 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 883 884 // Convert source pointers to integers, which can be manipulated. 885 if (StoredValTy->isPointerTy()) { 886 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 887 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 888 } 889 890 // Convert vectors and fp to integer, which can be manipulated. 891 if (!StoredValTy->isIntegerTy()) { 892 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 893 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 894 } 895 896 // If this is a big-endian system, we need to shift the value down to the low 897 // bits so that a truncate will work. 898 if (TD.isBigEndian()) { 899 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 900 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 901 } 902 903 // Truncate the integer to the right size now. 904 const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 905 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 906 907 if (LoadedTy == NewIntTy) 908 return StoredVal; 909 910 // If the result is a pointer, inttoptr. 911 if (LoadedTy->isPointerTy()) 912 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 913 914 // Otherwise, bitcast. 915 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 916} 917 918/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can 919/// be expressed as a base pointer plus a constant offset. Return the base and 920/// offset to the caller. 921static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, 922 const TargetData &TD) { 923 Operator *PtrOp = dyn_cast<Operator>(Ptr); 924 if (PtrOp == 0) return Ptr; 925 926 // Just look through bitcasts. 927 if (PtrOp->getOpcode() == Instruction::BitCast) 928 return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); 929 930 // If this is a GEP with constant indices, we can look through it. 931 GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); 932 if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; 933 934 gep_type_iterator GTI = gep_type_begin(GEP); 935 for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; 936 ++I, ++GTI) { 937 ConstantInt *OpC = cast<ConstantInt>(*I); 938 if (OpC->isZero()) continue; 939 940 // Handle a struct and array indices which add their offset to the pointer. 941 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 942 Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 943 } else { 944 uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); 945 Offset += OpC->getSExtValue()*Size; 946 } 947 } 948 949 // Re-sign extend from the pointer size if needed to get overflow edge cases 950 // right. 951 unsigned PtrSize = TD.getPointerSizeInBits(); 952 if (PtrSize < 64) 953 Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); 954 955 return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); 956} 957 958 959/// AnalyzeLoadFromClobberingWrite - This function is called when we have a 960/// memdep query of a load that ends up being a clobbering memory write (store, 961/// memset, memcpy, memmove). This means that the write *may* provide bits used 962/// by the load but we can't be sure because the pointers don't mustalias. 963/// 964/// Check this case to see if there is anything more we can do before we give 965/// up. This returns -1 if we have to give up, or a byte number in the stored 966/// value of the piece that feeds the load. 967static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, 968 Value *WritePtr, 969 uint64_t WriteSizeInBits, 970 const TargetData &TD) { 971 // If the loaded or stored value is an first class array or struct, don't try 972 // to transform them. We need to be able to bitcast to integer. 973 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 974 return -1; 975 976 int64_t StoreOffset = 0, LoadOffset = 0; 977 Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); 978 Value *LoadBase = 979 GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD); 980 if (StoreBase != LoadBase) 981 return -1; 982 983 // If the load and store are to the exact same address, they should have been 984 // a must alias. AA must have gotten confused. 985 // FIXME: Study to see if/when this happens. One case is forwarding a memset 986 // to a load from the base of the memset. 987#if 0 988 if (LoadOffset == StoreOffset) { 989 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 990 << "Base = " << *StoreBase << "\n" 991 << "Store Ptr = " << *WritePtr << "\n" 992 << "Store Offs = " << StoreOffset << "\n" 993 << "Load Ptr = " << *LoadPtr << "\n"; 994 abort(); 995 } 996#endif 997 998 // If the load and store don't overlap at all, the store doesn't provide 999 // anything to the load. In this case, they really don't alias at all, AA 1000 // must have gotten confused. 1001 // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then 1002 // remove this check, as it is duplicated with what we have below. 1003 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); 1004 1005 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 1006 return -1; 1007 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 1008 LoadSize >>= 3; 1009 1010 1011 bool isAAFailure = false; 1012 if (StoreOffset < LoadOffset) 1013 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 1014 else 1015 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 1016 1017 if (isAAFailure) { 1018#if 0 1019 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" 1020 << "Base = " << *StoreBase << "\n" 1021 << "Store Ptr = " << *WritePtr << "\n" 1022 << "Store Offs = " << StoreOffset << "\n" 1023 << "Load Ptr = " << *LoadPtr << "\n"; 1024 abort(); 1025#endif 1026 return -1; 1027 } 1028 1029 // If the Load isn't completely contained within the stored bits, we don't 1030 // have all the bits to feed it. We could do something crazy in the future 1031 // (issue a smaller load then merge the bits in) but this seems unlikely to be 1032 // valuable. 1033 if (StoreOffset > LoadOffset || 1034 StoreOffset+StoreSize < LoadOffset+LoadSize) 1035 return -1; 1036 1037 // Okay, we can do this transformation. Return the number of bytes into the 1038 // store that the load is. 1039 return LoadOffset-StoreOffset; 1040} 1041 1042/// AnalyzeLoadFromClobberingStore - This function is called when we have a 1043/// memdep query of a load that ends up being a clobbering store. 1044static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, 1045 StoreInst *DepSI, 1046 const TargetData &TD) { 1047 // Cannot handle reading from store of first-class aggregate yet. 1048 if (DepSI->getValueOperand()->getType()->isStructTy() || 1049 DepSI->getValueOperand()->getType()->isArrayTy()) 1050 return -1; 1051 1052 Value *StorePtr = DepSI->getPointerOperand(); 1053 uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 1054 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 1055 StorePtr, StoreSize, TD); 1056} 1057 1058static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, 1059 MemIntrinsic *MI, 1060 const TargetData &TD) { 1061 // If the mem operation is a non-constant size, we can't handle it. 1062 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 1063 if (SizeCst == 0) return -1; 1064 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 1065 1066 // If this is memset, we just need to see if the offset is valid in the size 1067 // of the memset.. 1068 if (MI->getIntrinsicID() == Intrinsic::memset) 1069 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 1070 MemSizeInBits, TD); 1071 1072 // If we have a memcpy/memmove, the only case we can handle is if this is a 1073 // copy from constant memory. In that case, we can read directly from the 1074 // constant memory. 1075 MemTransferInst *MTI = cast<MemTransferInst>(MI); 1076 1077 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 1078 if (Src == 0) return -1; 1079 1080 GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject()); 1081 if (GV == 0 || !GV->isConstant()) return -1; 1082 1083 // See if the access is within the bounds of the transfer. 1084 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 1085 MI->getDest(), MemSizeInBits, TD); 1086 if (Offset == -1) 1087 return Offset; 1088 1089 // Otherwise, see if we can constant fold a load from the constant with the 1090 // offset applied as appropriate. 1091 Src = ConstantExpr::getBitCast(Src, 1092 llvm::Type::getInt8PtrTy(Src->getContext())); 1093 Constant *OffsetCst = 1094 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1095 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 1096 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 1097 if (ConstantFoldLoadFromConstPtr(Src, &TD)) 1098 return Offset; 1099 return -1; 1100} 1101 1102 1103/// GetStoreValueForLoad - This function is called when we have a 1104/// memdep query of a load that ends up being a clobbering store. This means 1105/// that the store *may* provide bits used by the load but we can't be sure 1106/// because the pointers don't mustalias. Check this case to see if there is 1107/// anything more we can do before we give up. 1108static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 1109 const Type *LoadTy, 1110 Instruction *InsertPt, const TargetData &TD){ 1111 LLVMContext &Ctx = SrcVal->getType()->getContext(); 1112 1113 uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 1114 uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; 1115 1116 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1117 1118 // Compute which bits of the stored value are being used by the load. Convert 1119 // to an integer type to start with. 1120 if (SrcVal->getType()->isPointerTy()) 1121 SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); 1122 if (!SrcVal->getType()->isIntegerTy()) 1123 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), 1124 "tmp"); 1125 1126 // Shift the bits to the least significant depending on endianness. 1127 unsigned ShiftAmt; 1128 if (TD.isLittleEndian()) 1129 ShiftAmt = Offset*8; 1130 else 1131 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 1132 1133 if (ShiftAmt) 1134 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); 1135 1136 if (LoadSize != StoreSize) 1137 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), 1138 "tmp"); 1139 1140 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); 1141} 1142 1143/// GetMemInstValueForLoad - This function is called when we have a 1144/// memdep query of a load that ends up being a clobbering mem intrinsic. 1145static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 1146 const Type *LoadTy, Instruction *InsertPt, 1147 const TargetData &TD){ 1148 LLVMContext &Ctx = LoadTy->getContext(); 1149 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 1150 1151 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1152 1153 // We know that this method is only called when the mem transfer fully 1154 // provides the bits for the load. 1155 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 1156 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 1157 // independently of what the offset is. 1158 Value *Val = MSI->getValue(); 1159 if (LoadSize != 1) 1160 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 1161 1162 Value *OneElt = Val; 1163 1164 // Splat the value out to the right number of bits. 1165 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 1166 // If we can double the number of bytes set, do it. 1167 if (NumBytesSet*2 <= LoadSize) { 1168 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 1169 Val = Builder.CreateOr(Val, ShVal); 1170 NumBytesSet <<= 1; 1171 continue; 1172 } 1173 1174 // Otherwise insert one byte at a time. 1175 Value *ShVal = Builder.CreateShl(Val, 1*8); 1176 Val = Builder.CreateOr(OneElt, ShVal); 1177 ++NumBytesSet; 1178 } 1179 1180 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); 1181 } 1182 1183 // Otherwise, this is a memcpy/memmove from a constant global. 1184 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 1185 Constant *Src = cast<Constant>(MTI->getSource()); 1186 1187 // Otherwise, see if we can constant fold a load from the constant with the 1188 // offset applied as appropriate. 1189 Src = ConstantExpr::getBitCast(Src, 1190 llvm::Type::getInt8PtrTy(Src->getContext())); 1191 Constant *OffsetCst = 1192 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1193 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 1194 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 1195 return ConstantFoldLoadFromConstPtr(Src, &TD); 1196} 1197 1198namespace { 1199 1200struct AvailableValueInBlock { 1201 /// BB - The basic block in question. 1202 BasicBlock *BB; 1203 enum ValType { 1204 SimpleVal, // A simple offsetted value that is accessed. 1205 MemIntrin // A memory intrinsic which is loaded from. 1206 }; 1207 1208 /// V - The value that is live out of the block. 1209 PointerIntPair<Value *, 1, ValType> Val; 1210 1211 /// Offset - The byte offset in Val that is interesting for the load query. 1212 unsigned Offset; 1213 1214 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 1215 unsigned Offset = 0) { 1216 AvailableValueInBlock Res; 1217 Res.BB = BB; 1218 Res.Val.setPointer(V); 1219 Res.Val.setInt(SimpleVal); 1220 Res.Offset = Offset; 1221 return Res; 1222 } 1223 1224 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 1225 unsigned Offset = 0) { 1226 AvailableValueInBlock Res; 1227 Res.BB = BB; 1228 Res.Val.setPointer(MI); 1229 Res.Val.setInt(MemIntrin); 1230 Res.Offset = Offset; 1231 return Res; 1232 } 1233 1234 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 1235 Value *getSimpleValue() const { 1236 assert(isSimpleValue() && "Wrong accessor"); 1237 return Val.getPointer(); 1238 } 1239 1240 MemIntrinsic *getMemIntrinValue() const { 1241 assert(!isSimpleValue() && "Wrong accessor"); 1242 return cast<MemIntrinsic>(Val.getPointer()); 1243 } 1244 1245 /// MaterializeAdjustedValue - Emit code into this block to adjust the value 1246 /// defined here to the specified type. This handles various coercion cases. 1247 Value *MaterializeAdjustedValue(const Type *LoadTy, 1248 const TargetData *TD) const { 1249 Value *Res; 1250 if (isSimpleValue()) { 1251 Res = getSimpleValue(); 1252 if (Res->getType() != LoadTy) { 1253 assert(TD && "Need target data to handle type mismatch case"); 1254 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), 1255 *TD); 1256 1257 DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 1258 << *getSimpleValue() << '\n' 1259 << *Res << '\n' << "\n\n\n"); 1260 } 1261 } else { 1262 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, 1263 LoadTy, BB->getTerminator(), *TD); 1264 DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1265 << " " << *getMemIntrinValue() << '\n' 1266 << *Res << '\n' << "\n\n\n"); 1267 } 1268 return Res; 1269 } 1270}; 1271 1272} 1273 1274/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 1275/// construct SSA form, allowing us to eliminate LI. This returns the value 1276/// that should be used at LI's definition site. 1277static Value *ConstructSSAForLoadSet(LoadInst *LI, 1278 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1279 const TargetData *TD, 1280 const DominatorTree &DT, 1281 AliasAnalysis *AA) { 1282 // Check for the fully redundant, dominating load case. In this case, we can 1283 // just use the dominating value directly. 1284 if (ValuesPerBlock.size() == 1 && 1285 DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) 1286 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); 1287 1288 // Otherwise, we have to construct SSA form. 1289 SmallVector<PHINode*, 8> NewPHIs; 1290 SSAUpdater SSAUpdate(&NewPHIs); 1291 SSAUpdate.Initialize(LI->getType(), LI->getName()); 1292 1293 const Type *LoadTy = LI->getType(); 1294 1295 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1296 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1297 BasicBlock *BB = AV.BB; 1298 1299 if (SSAUpdate.HasValueForBlock(BB)) 1300 continue; 1301 1302 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); 1303 } 1304 1305 // Perform PHI construction. 1306 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1307 1308 // If new PHI nodes were created, notify alias analysis. 1309 if (V->getType()->isPointerTy()) 1310 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1311 AA->copyValue(LI, NewPHIs[i]); 1312 1313 return V; 1314} 1315 1316static bool isLifetimeStart(const Instruction *Inst) { 1317 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1318 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1319 return false; 1320} 1321 1322/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 1323/// non-local by performing PHI construction. 1324bool GVN::processNonLocalLoad(LoadInst *LI, 1325 SmallVectorImpl<Instruction*> &toErase) { 1326 // Find the non-local dependencies of the load. 1327 SmallVector<NonLocalDepResult, 64> Deps; 1328 AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); 1329 MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); 1330 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " 1331 // << Deps.size() << *LI << '\n'); 1332 1333 // If we had to process more than one hundred blocks to find the 1334 // dependencies, this load isn't worth worrying about. Optimizing 1335 // it will be too expensive. 1336 if (Deps.size() > 100) 1337 return false; 1338 1339 // If we had a phi translation failure, we'll have a single entry which is a 1340 // clobber in the current block. Reject this early. 1341 if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { 1342 DEBUG( 1343 dbgs() << "GVN: non-local load "; 1344 WriteAsOperand(dbgs(), LI); 1345 dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; 1346 ); 1347 return false; 1348 } 1349 1350 // Filter out useless results (non-locals, etc). Keep track of the blocks 1351 // where we have a value available in repl, also keep track of whether we see 1352 // dependencies that produce an unknown value for the load (such as a call 1353 // that could potentially clobber the load). 1354 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; 1355 SmallVector<BasicBlock*, 16> UnavailableBlocks; 1356 1357 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 1358 BasicBlock *DepBB = Deps[i].getBB(); 1359 MemDepResult DepInfo = Deps[i].getResult(); 1360 1361 if (DepInfo.isClobber()) { 1362 // The address being loaded in this non-local block may not be the same as 1363 // the pointer operand of the load if PHI translation occurs. Make sure 1364 // to consider the right address. 1365 Value *Address = Deps[i].getAddress(); 1366 1367 // If the dependence is to a store that writes to a superset of the bits 1368 // read by the load, we can extract the bits we need for the load from the 1369 // stored value. 1370 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1371 if (TD && Address) { 1372 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, 1373 DepSI, *TD); 1374 if (Offset != -1) { 1375 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1376 DepSI->getValueOperand(), 1377 Offset)); 1378 continue; 1379 } 1380 } 1381 } 1382 1383 // If the clobbering value is a memset/memcpy/memmove, see if we can 1384 // forward a value on from it. 1385 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1386 if (TD && Address) { 1387 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 1388 DepMI, *TD); 1389 if (Offset != -1) { 1390 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1391 Offset)); 1392 continue; 1393 } 1394 } 1395 } 1396 1397 UnavailableBlocks.push_back(DepBB); 1398 continue; 1399 } 1400 1401 Instruction *DepInst = DepInfo.getInst(); 1402 1403 // Loading the allocation -> undef. 1404 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || 1405 // Loading immediately after lifetime begin -> undef. 1406 isLifetimeStart(DepInst)) { 1407 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1408 UndefValue::get(LI->getType()))); 1409 continue; 1410 } 1411 1412 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1413 // Reject loads and stores that are to the same address but are of 1414 // different types if we have to. 1415 if (S->getValueOperand()->getType() != LI->getType()) { 1416 // If the stored value is larger or equal to the loaded value, we can 1417 // reuse it. 1418 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), 1419 LI->getType(), *TD)) { 1420 UnavailableBlocks.push_back(DepBB); 1421 continue; 1422 } 1423 } 1424 1425 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1426 S->getValueOperand())); 1427 continue; 1428 } 1429 1430 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1431 // If the types mismatch and we can't handle it, reject reuse of the load. 1432 if (LD->getType() != LI->getType()) { 1433 // If the stored value is larger or equal to the loaded value, we can 1434 // reuse it. 1435 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ 1436 UnavailableBlocks.push_back(DepBB); 1437 continue; 1438 } 1439 } 1440 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); 1441 continue; 1442 } 1443 1444 UnavailableBlocks.push_back(DepBB); 1445 continue; 1446 } 1447 1448 // If we have no predecessors that produce a known value for this load, exit 1449 // early. 1450 if (ValuesPerBlock.empty()) return false; 1451 1452 // If all of the instructions we depend on produce a known value for this 1453 // load, then it is fully redundant and we can use PHI insertion to compute 1454 // its value. Insert PHIs and remove the fully redundant value now. 1455 if (UnavailableBlocks.empty()) { 1456 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1457 1458 // Perform PHI construction. 1459 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1460 VN.getAliasAnalysis()); 1461 LI->replaceAllUsesWith(V); 1462 1463 if (isa<PHINode>(V)) 1464 V->takeName(LI); 1465 if (V->getType()->isPointerTy()) 1466 MD->invalidateCachedPointerInfo(V); 1467 VN.erase(LI); 1468 toErase.push_back(LI); 1469 ++NumGVNLoad; 1470 return true; 1471 } 1472 1473 if (!EnablePRE || !EnableLoadPRE) 1474 return false; 1475 1476 // Okay, we have *some* definitions of the value. This means that the value 1477 // is available in some of our (transitive) predecessors. Lets think about 1478 // doing PRE of this load. This will involve inserting a new load into the 1479 // predecessor when it's not available. We could do this in general, but 1480 // prefer to not increase code size. As such, we only do this when we know 1481 // that we only have to insert *one* load (which means we're basically moving 1482 // the load, not inserting a new one). 1483 1484 SmallPtrSet<BasicBlock *, 4> Blockers; 1485 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1486 Blockers.insert(UnavailableBlocks[i]); 1487 1488 // Lets find first basic block with more than one predecessor. Walk backwards 1489 // through predecessors if needed. 1490 BasicBlock *LoadBB = LI->getParent(); 1491 BasicBlock *TmpBB = LoadBB; 1492 1493 bool isSinglePred = false; 1494 bool allSingleSucc = true; 1495 while (TmpBB->getSinglePredecessor()) { 1496 isSinglePred = true; 1497 TmpBB = TmpBB->getSinglePredecessor(); 1498 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1499 return false; 1500 if (Blockers.count(TmpBB)) 1501 return false; 1502 1503 // If any of these blocks has more than one successor (i.e. if the edge we 1504 // just traversed was critical), then there are other paths through this 1505 // block along which the load may not be anticipated. Hoisting the load 1506 // above this block would be adding the load to execution paths along 1507 // which it was not previously executed. 1508 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1509 return false; 1510 } 1511 1512 assert(TmpBB); 1513 LoadBB = TmpBB; 1514 1515 // FIXME: It is extremely unclear what this loop is doing, other than 1516 // artificially restricting loadpre. 1517 if (isSinglePred) { 1518 bool isHot = false; 1519 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1520 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1521 if (AV.isSimpleValue()) 1522 // "Hot" Instruction is in some loop (because it dominates its dep. 1523 // instruction). 1524 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) 1525 if (DT->dominates(LI, I)) { 1526 isHot = true; 1527 break; 1528 } 1529 } 1530 1531 // We are interested only in "hot" instructions. We don't want to do any 1532 // mis-optimizations here. 1533 if (!isHot) 1534 return false; 1535 } 1536 1537 // Check to see how many predecessors have the loaded value fully 1538 // available. 1539 DenseMap<BasicBlock*, Value*> PredLoads; 1540 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1541 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1542 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1543 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1544 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1545 1546 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit; 1547 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1548 PI != E; ++PI) { 1549 BasicBlock *Pred = *PI; 1550 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { 1551 continue; 1552 } 1553 PredLoads[Pred] = 0; 1554 1555 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1556 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1557 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1558 << Pred->getName() << "': " << *LI << '\n'); 1559 return false; 1560 } 1561 unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); 1562 NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); 1563 } 1564 } 1565 if (!NeedToSplit.empty()) { 1566 toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); 1567 return false; 1568 } 1569 1570 // Decide whether PRE is profitable for this load. 1571 unsigned NumUnavailablePreds = PredLoads.size(); 1572 assert(NumUnavailablePreds != 0 && 1573 "Fully available value should be eliminated above!"); 1574 1575 // If this load is unavailable in multiple predecessors, reject it. 1576 // FIXME: If we could restructure the CFG, we could make a common pred with 1577 // all the preds that don't have an available LI and insert a new load into 1578 // that one block. 1579 if (NumUnavailablePreds != 1) 1580 return false; 1581 1582 // Check if the load can safely be moved to all the unavailable predecessors. 1583 bool CanDoPRE = true; 1584 SmallVector<Instruction*, 8> NewInsts; 1585 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1586 E = PredLoads.end(); I != E; ++I) { 1587 BasicBlock *UnavailablePred = I->first; 1588 1589 // Do PHI translation to get its value in the predecessor if necessary. The 1590 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1591 1592 // If all preds have a single successor, then we know it is safe to insert 1593 // the load on the pred (?!?), so we can insert code to materialize the 1594 // pointer if it is not available. 1595 PHITransAddr Address(LI->getPointerOperand(), TD); 1596 Value *LoadPtr = 0; 1597 if (allSingleSucc) { 1598 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1599 *DT, NewInsts); 1600 } else { 1601 Address.PHITranslateValue(LoadBB, UnavailablePred, DT); 1602 LoadPtr = Address.getAddr(); 1603 } 1604 1605 // If we couldn't find or insert a computation of this phi translated value, 1606 // we fail PRE. 1607 if (LoadPtr == 0) { 1608 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1609 << *LI->getPointerOperand() << "\n"); 1610 CanDoPRE = false; 1611 break; 1612 } 1613 1614 // Make sure it is valid to move this load here. We have to watch out for: 1615 // @1 = getelementptr (i8* p, ... 1616 // test p and branch if == 0 1617 // load @1 1618 // It is valid to have the getelementptr before the test, even if p can be 0, 1619 // as getelementptr only does address arithmetic. 1620 // If we are not pushing the value through any multiple-successor blocks 1621 // we do not have this case. Otherwise, check that the load is safe to 1622 // put anywhere; this can be improved, but should be conservatively safe. 1623 if (!allSingleSucc && 1624 // FIXME: REEVALUTE THIS. 1625 !isSafeToLoadUnconditionally(LoadPtr, 1626 UnavailablePred->getTerminator(), 1627 LI->getAlignment(), TD)) { 1628 CanDoPRE = false; 1629 break; 1630 } 1631 1632 I->second = LoadPtr; 1633 } 1634 1635 if (!CanDoPRE) { 1636 while (!NewInsts.empty()) 1637 NewInsts.pop_back_val()->eraseFromParent(); 1638 return false; 1639 } 1640 1641 // Okay, we can eliminate this load by inserting a reload in the predecessor 1642 // and using PHI construction to get the value in the other predecessors, do 1643 // it. 1644 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1645 DEBUG(if (!NewInsts.empty()) 1646 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1647 << *NewInsts.back() << '\n'); 1648 1649 // Assign value numbers to the new instructions. 1650 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 1651 // FIXME: We really _ought_ to insert these value numbers into their 1652 // parent's availability map. However, in doing so, we risk getting into 1653 // ordering issues. If a block hasn't been processed yet, we would be 1654 // marking a value as AVAIL-IN, which isn't what we intend. 1655 VN.lookup_or_add(NewInsts[i]); 1656 } 1657 1658 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1659 E = PredLoads.end(); I != E; ++I) { 1660 BasicBlock *UnavailablePred = I->first; 1661 Value *LoadPtr = I->second; 1662 1663 Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1664 LI->getAlignment(), 1665 UnavailablePred->getTerminator()); 1666 1667 // Add the newly created load. 1668 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1669 NewLoad)); 1670 MD->invalidateCachedPointerInfo(LoadPtr); 1671 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1672 } 1673 1674 // Perform PHI construction. 1675 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1676 VN.getAliasAnalysis()); 1677 LI->replaceAllUsesWith(V); 1678 if (isa<PHINode>(V)) 1679 V->takeName(LI); 1680 if (V->getType()->isPointerTy()) 1681 MD->invalidateCachedPointerInfo(V); 1682 VN.erase(LI); 1683 toErase.push_back(LI); 1684 ++NumPRELoad; 1685 return true; 1686} 1687 1688/// processLoad - Attempt to eliminate a load, first by eliminating it 1689/// locally, and then attempting non-local elimination if that fails. 1690bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1691 if (!MD) 1692 return false; 1693 1694 if (L->isVolatile()) 1695 return false; 1696 1697 // ... to a pointer that has been loaded from before... 1698 MemDepResult Dep = MD->getDependency(L); 1699 1700 // If the value isn't available, don't do anything! 1701 if (Dep.isClobber()) { 1702 // Check to see if we have something like this: 1703 // store i32 123, i32* %P 1704 // %A = bitcast i32* %P to i8* 1705 // %B = gep i8* %A, i32 1 1706 // %C = load i8* %B 1707 // 1708 // We could do that by recognizing if the clobber instructions are obviously 1709 // a common base + constant offset, and if the previous store (or memset) 1710 // completely covers this load. This sort of thing can happen in bitfield 1711 // access code. 1712 Value *AvailVal = 0; 1713 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) 1714 if (TD) { 1715 int Offset = AnalyzeLoadFromClobberingStore(L->getType(), 1716 L->getPointerOperand(), 1717 DepSI, *TD); 1718 if (Offset != -1) 1719 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, 1720 L->getType(), L, *TD); 1721 } 1722 1723 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1724 // a value on from it. 1725 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1726 if (TD) { 1727 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), 1728 L->getPointerOperand(), 1729 DepMI, *TD); 1730 if (Offset != -1) 1731 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); 1732 } 1733 } 1734 1735 if (AvailVal) { 1736 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1737 << *AvailVal << '\n' << *L << "\n\n\n"); 1738 1739 // Replace the load! 1740 L->replaceAllUsesWith(AvailVal); 1741 if (AvailVal->getType()->isPointerTy()) 1742 MD->invalidateCachedPointerInfo(AvailVal); 1743 VN.erase(L); 1744 toErase.push_back(L); 1745 ++NumGVNLoad; 1746 return true; 1747 } 1748 1749 DEBUG( 1750 // fast print dep, using operator<< on instruction would be too slow 1751 dbgs() << "GVN: load "; 1752 WriteAsOperand(dbgs(), L); 1753 Instruction *I = Dep.getInst(); 1754 dbgs() << " is clobbered by " << *I << '\n'; 1755 ); 1756 return false; 1757 } 1758 1759 // If it is defined in another block, try harder. 1760 if (Dep.isNonLocal()) 1761 return processNonLocalLoad(L, toErase); 1762 1763 Instruction *DepInst = Dep.getInst(); 1764 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1765 Value *StoredVal = DepSI->getValueOperand(); 1766 1767 // The store and load are to a must-aliased pointer, but they may not 1768 // actually have the same type. See if we know how to reuse the stored 1769 // value (depending on its type). 1770 if (StoredVal->getType() != L->getType()) { 1771 if (TD) { 1772 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 1773 L, *TD); 1774 if (StoredVal == 0) 1775 return false; 1776 1777 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1778 << '\n' << *L << "\n\n\n"); 1779 } 1780 else 1781 return false; 1782 } 1783 1784 // Remove it! 1785 L->replaceAllUsesWith(StoredVal); 1786 if (StoredVal->getType()->isPointerTy()) 1787 MD->invalidateCachedPointerInfo(StoredVal); 1788 VN.erase(L); 1789 toErase.push_back(L); 1790 ++NumGVNLoad; 1791 return true; 1792 } 1793 1794 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1795 Value *AvailableVal = DepLI; 1796 1797 // The loads are of a must-aliased pointer, but they may not actually have 1798 // the same type. See if we know how to reuse the previously loaded value 1799 // (depending on its type). 1800 if (DepLI->getType() != L->getType()) { 1801 if (TD) { 1802 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); 1803 if (AvailableVal == 0) 1804 return false; 1805 1806 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1807 << "\n" << *L << "\n\n\n"); 1808 } 1809 else 1810 return false; 1811 } 1812 1813 // Remove it! 1814 L->replaceAllUsesWith(AvailableVal); 1815 if (DepLI->getType()->isPointerTy()) 1816 MD->invalidateCachedPointerInfo(DepLI); 1817 VN.erase(L); 1818 toErase.push_back(L); 1819 ++NumGVNLoad; 1820 return true; 1821 } 1822 1823 // If this load really doesn't depend on anything, then we must be loading an 1824 // undef value. This can happen when loading for a fresh allocation with no 1825 // intervening stores, for example. 1826 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { 1827 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1828 VN.erase(L); 1829 toErase.push_back(L); 1830 ++NumGVNLoad; 1831 return true; 1832 } 1833 1834 // If this load occurs either right after a lifetime begin, 1835 // then the loaded value is undefined. 1836 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { 1837 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1838 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1839 VN.erase(L); 1840 toErase.push_back(L); 1841 ++NumGVNLoad; 1842 return true; 1843 } 1844 } 1845 1846 return false; 1847} 1848 1849Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { 1850 DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB); 1851 if (I == localAvail.end()) 1852 return 0; 1853 1854 ValueNumberScope *Locals = I->second; 1855 while (Locals) { 1856 DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num); 1857 if (I != Locals->table.end()) 1858 return I->second; 1859 Locals = Locals->parent; 1860 } 1861 1862 return 0; 1863} 1864 1865 1866/// processInstruction - When calculating availability, handle an instruction 1867/// by inserting it into the appropriate sets 1868bool GVN::processInstruction(Instruction *I, 1869 SmallVectorImpl<Instruction*> &toErase) { 1870 // Ignore dbg info intrinsics. 1871 if (isa<DbgInfoIntrinsic>(I)) 1872 return false; 1873 1874 // If the instruction can be easily simplified then do so now in preference 1875 // to value numbering it. Value numbering often exposes redundancies, for 1876 // example if it determines that %y is equal to %x then the instruction 1877 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 1878 if (Value *V = SimplifyInstruction(I, TD, DT)) { 1879 I->replaceAllUsesWith(V); 1880 if (MD && V->getType()->isPointerTy()) 1881 MD->invalidateCachedPointerInfo(V); 1882 VN.erase(I); 1883 toErase.push_back(I); 1884 return true; 1885 } 1886 1887 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1888 bool Changed = processLoad(LI, toErase); 1889 1890 if (!Changed) { 1891 unsigned Num = VN.lookup_or_add(LI); 1892 localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); 1893 } 1894 1895 return Changed; 1896 } 1897 1898 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1899 unsigned Num = VN.lookup_or_add(I); 1900 1901 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1902 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1903 1904 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 1905 return false; 1906 1907 Value *BranchCond = BI->getCondition(); 1908 uint32_t CondVN = VN.lookup_or_add(BranchCond); 1909 1910 BasicBlock *TrueSucc = BI->getSuccessor(0); 1911 BasicBlock *FalseSucc = BI->getSuccessor(1); 1912 1913 if (TrueSucc->getSinglePredecessor()) 1914 localAvail[TrueSucc]->table[CondVN] = 1915 ConstantInt::getTrue(TrueSucc->getContext()); 1916 if (FalseSucc->getSinglePredecessor()) 1917 localAvail[FalseSucc]->table[CondVN] = 1918 ConstantInt::getFalse(TrueSucc->getContext()); 1919 1920 return false; 1921 1922 // Allocations are always uniquely numbered, so we can save time and memory 1923 // by fast failing them. 1924 } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) { 1925 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1926 return false; 1927 } 1928 1929 if (isa<PHINode>(I)) { 1930 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1931 // If the number we were assigned was a brand new VN, then we don't 1932 // need to do a lookup to see if the number already exists 1933 // somewhere in the domtree: it can't! 1934 } else if (Num == NextNum) { 1935 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1936 1937 // Perform fast-path value-number based elimination of values inherited from 1938 // dominators. 1939 } else if (Value *repl = lookupNumber(I->getParent(), Num)) { 1940 // Remove it! 1941 VN.erase(I); 1942 I->replaceAllUsesWith(repl); 1943 if (MD && repl->getType()->isPointerTy()) 1944 MD->invalidateCachedPointerInfo(repl); 1945 toErase.push_back(I); 1946 return true; 1947 1948 } else { 1949 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1950 } 1951 1952 return false; 1953} 1954 1955/// runOnFunction - This is the main transformation entry point for a function. 1956bool GVN::runOnFunction(Function& F) { 1957 if (!NoLoads) 1958 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1959 DT = &getAnalysis<DominatorTree>(); 1960 TD = getAnalysisIfAvailable<TargetData>(); 1961 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1962 VN.setMemDep(MD); 1963 VN.setDomTree(DT); 1964 1965 bool Changed = false; 1966 bool ShouldContinue = true; 1967 1968 // Merge unconditional branches, allowing PRE to catch more 1969 // optimization opportunities. 1970 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1971 BasicBlock *BB = FI; 1972 ++FI; 1973 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1974 if (removedBlock) ++NumGVNBlocks; 1975 1976 Changed |= removedBlock; 1977 } 1978 1979 unsigned Iteration = 0; 1980 1981 while (ShouldContinue) { 1982 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 1983 ShouldContinue = iterateOnFunction(F); 1984 if (splitCriticalEdges()) 1985 ShouldContinue = true; 1986 Changed |= ShouldContinue; 1987 ++Iteration; 1988 } 1989 1990 if (EnablePRE) { 1991 bool PREChanged = true; 1992 while (PREChanged) { 1993 PREChanged = performPRE(F); 1994 Changed |= PREChanged; 1995 } 1996 } 1997 // FIXME: Should perform GVN again after PRE does something. PRE can move 1998 // computations into blocks where they become fully redundant. Note that 1999 // we can't do this until PRE's critical edge splitting updates memdep. 2000 // Actually, when this happens, we should just fully integrate PRE into GVN. 2001 2002 cleanupGlobalSets(); 2003 2004 return Changed; 2005} 2006 2007 2008bool GVN::processBlock(BasicBlock *BB) { 2009 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 2010 // incrementing BI before processing an instruction). 2011 SmallVector<Instruction*, 8> toErase; 2012 bool ChangedFunction = false; 2013 2014 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 2015 BI != BE;) { 2016 ChangedFunction |= processInstruction(BI, toErase); 2017 if (toErase.empty()) { 2018 ++BI; 2019 continue; 2020 } 2021 2022 // If we need some instructions deleted, do it now. 2023 NumGVNInstr += toErase.size(); 2024 2025 // Avoid iterator invalidation. 2026 bool AtStart = BI == BB->begin(); 2027 if (!AtStart) 2028 --BI; 2029 2030 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 2031 E = toErase.end(); I != E; ++I) { 2032 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 2033 if (MD) MD->removeInstruction(*I); 2034 (*I)->eraseFromParent(); 2035 DEBUG(verifyRemoved(*I)); 2036 } 2037 toErase.clear(); 2038 2039 if (AtStart) 2040 BI = BB->begin(); 2041 else 2042 ++BI; 2043 } 2044 2045 return ChangedFunction; 2046} 2047 2048/// performPRE - Perform a purely local form of PRE that looks for diamond 2049/// control flow patterns and attempts to perform simple PRE at the join point. 2050bool GVN::performPRE(Function &F) { 2051 bool Changed = false; 2052 DenseMap<BasicBlock*, Value*> predMap; 2053 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 2054 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 2055 BasicBlock *CurrentBlock = *DI; 2056 2057 // Nothing to PRE in the entry block. 2058 if (CurrentBlock == &F.getEntryBlock()) continue; 2059 2060 for (BasicBlock::iterator BI = CurrentBlock->begin(), 2061 BE = CurrentBlock->end(); BI != BE; ) { 2062 Instruction *CurInst = BI++; 2063 2064 if (isa<AllocaInst>(CurInst) || 2065 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 2066 CurInst->getType()->isVoidTy() || 2067 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 2068 isa<DbgInfoIntrinsic>(CurInst)) 2069 continue; 2070 2071 // We don't currently value number ANY inline asm calls. 2072 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 2073 if (CallI->isInlineAsm()) 2074 continue; 2075 2076 uint32_t ValNo = VN.lookup(CurInst); 2077 2078 // Look for the predecessors for PRE opportunities. We're 2079 // only trying to solve the basic diamond case, where 2080 // a value is computed in the successor and one predecessor, 2081 // but not the other. We also explicitly disallow cases 2082 // where the successor is its own predecessor, because they're 2083 // more complicated to get right. 2084 unsigned NumWith = 0; 2085 unsigned NumWithout = 0; 2086 BasicBlock *PREPred = 0; 2087 predMap.clear(); 2088 2089 for (pred_iterator PI = pred_begin(CurrentBlock), 2090 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 2091 BasicBlock *P = *PI; 2092 // We're not interested in PRE where the block is its 2093 // own predecessor, or in blocks with predecessors 2094 // that are not reachable. 2095 if (P == CurrentBlock) { 2096 NumWithout = 2; 2097 break; 2098 } else if (!localAvail.count(P)) { 2099 NumWithout = 2; 2100 break; 2101 } 2102 2103 DenseMap<uint32_t, Value*>::iterator predV = 2104 localAvail[P]->table.find(ValNo); 2105 if (predV == localAvail[P]->table.end()) { 2106 PREPred = P; 2107 ++NumWithout; 2108 } else if (predV->second == CurInst) { 2109 NumWithout = 2; 2110 } else { 2111 predMap[P] = predV->second; 2112 ++NumWith; 2113 } 2114 } 2115 2116 // Don't do PRE when it might increase code size, i.e. when 2117 // we would need to insert instructions in more than one pred. 2118 if (NumWithout != 1 || NumWith == 0) 2119 continue; 2120 2121 // Don't do PRE across indirect branch. 2122 if (isa<IndirectBrInst>(PREPred->getTerminator())) 2123 continue; 2124 2125 // We can't do PRE safely on a critical edge, so instead we schedule 2126 // the edge to be split and perform the PRE the next time we iterate 2127 // on the function. 2128 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 2129 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 2130 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 2131 continue; 2132 } 2133 2134 // Instantiate the expression in the predecessor that lacked it. 2135 // Because we are going top-down through the block, all value numbers 2136 // will be available in the predecessor by the time we need them. Any 2137 // that weren't originally present will have been instantiated earlier 2138 // in this loop. 2139 Instruction *PREInstr = CurInst->clone(); 2140 bool success = true; 2141 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 2142 Value *Op = PREInstr->getOperand(i); 2143 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 2144 continue; 2145 2146 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { 2147 PREInstr->setOperand(i, V); 2148 } else { 2149 success = false; 2150 break; 2151 } 2152 } 2153 2154 // Fail out if we encounter an operand that is not available in 2155 // the PRE predecessor. This is typically because of loads which 2156 // are not value numbered precisely. 2157 if (!success) { 2158 delete PREInstr; 2159 DEBUG(verifyRemoved(PREInstr)); 2160 continue; 2161 } 2162 2163 PREInstr->insertBefore(PREPred->getTerminator()); 2164 PREInstr->setName(CurInst->getName() + ".pre"); 2165 predMap[PREPred] = PREInstr; 2166 VN.add(PREInstr, ValNo); 2167 ++NumGVNPRE; 2168 2169 // Update the availability map to include the new instruction. 2170 localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); 2171 2172 // Create a PHI to make the value available in this block. 2173 PHINode* Phi = PHINode::Create(CurInst->getType(), 2174 CurInst->getName() + ".pre-phi", 2175 CurrentBlock->begin()); 2176 for (pred_iterator PI = pred_begin(CurrentBlock), 2177 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 2178 BasicBlock *P = *PI; 2179 Phi->addIncoming(predMap[P], P); 2180 } 2181 2182 VN.add(Phi, ValNo); 2183 localAvail[CurrentBlock]->table[ValNo] = Phi; 2184 2185 CurInst->replaceAllUsesWith(Phi); 2186 if (MD && Phi->getType()->isPointerTy()) 2187 MD->invalidateCachedPointerInfo(Phi); 2188 VN.erase(CurInst); 2189 2190 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 2191 if (MD) MD->removeInstruction(CurInst); 2192 CurInst->eraseFromParent(); 2193 DEBUG(verifyRemoved(CurInst)); 2194 Changed = true; 2195 } 2196 } 2197 2198 if (splitCriticalEdges()) 2199 Changed = true; 2200 2201 return Changed; 2202} 2203 2204/// splitCriticalEdges - Split critical edges found during the previous 2205/// iteration that may enable further optimization. 2206bool GVN::splitCriticalEdges() { 2207 if (toSplit.empty()) 2208 return false; 2209 do { 2210 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 2211 SplitCriticalEdge(Edge.first, Edge.second, this); 2212 } while (!toSplit.empty()); 2213 if (MD) MD->invalidateCachedPredecessors(); 2214 return true; 2215} 2216 2217/// iterateOnFunction - Executes one iteration of GVN 2218bool GVN::iterateOnFunction(Function &F) { 2219 cleanupGlobalSets(); 2220 2221 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2222 DE = df_end(DT->getRootNode()); DI != DE; ++DI) { 2223 if (DI->getIDom()) 2224 localAvail[DI->getBlock()] = 2225 new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); 2226 else 2227 localAvail[DI->getBlock()] = new ValueNumberScope(0); 2228 } 2229 2230 // Top-down walk of the dominator tree 2231 bool Changed = false; 2232#if 0 2233 // Needed for value numbering with phi construction to work. 2234 ReversePostOrderTraversal<Function*> RPOT(&F); 2235 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 2236 RE = RPOT.end(); RI != RE; ++RI) 2237 Changed |= processBlock(*RI); 2238#else 2239 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2240 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 2241 Changed |= processBlock(DI->getBlock()); 2242#endif 2243 2244 return Changed; 2245} 2246 2247void GVN::cleanupGlobalSets() { 2248 VN.clear(); 2249 2250 for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator 2251 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) 2252 delete I->second; 2253 localAvail.clear(); 2254} 2255 2256/// verifyRemoved - Verify that the specified instruction does not occur in our 2257/// internal data structures. 2258void GVN::verifyRemoved(const Instruction *Inst) const { 2259 VN.verifyRemoved(Inst); 2260 2261 // Walk through the value number scope to make sure the instruction isn't 2262 // ferreted away in it. 2263 for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator 2264 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { 2265 const ValueNumberScope *VNS = I->second; 2266 2267 while (VNS) { 2268 for (DenseMap<uint32_t, Value*>::const_iterator 2269 II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { 2270 assert(II->second != Inst && "Inst still in value numbering scope!"); 2271 } 2272 2273 VNS = VNS->parent; 2274 } 2275 } 2276} 2277