GVN.cpp revision 3f329cb781492e19a72af267cd3b7c2d8307a818
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/GlobalVariable.h" 21#include "llvm/IntrinsicInst.h" 22#include "llvm/LLVMContext.h" 23#include "llvm/Analysis/AliasAnalysis.h" 24#include "llvm/Analysis/ConstantFolding.h" 25#include "llvm/Analysis/Dominators.h" 26#include "llvm/Analysis/InstructionSimplify.h" 27#include "llvm/Analysis/Loads.h" 28#include "llvm/Analysis/MemoryBuiltins.h" 29#include "llvm/Analysis/MemoryDependenceAnalysis.h" 30#include "llvm/Analysis/PHITransAddr.h" 31#include "llvm/Analysis/ValueTracking.h" 32#include "llvm/Assembly/Writer.h" 33#include "llvm/Target/TargetData.h" 34#include "llvm/Transforms/Utils/BasicBlockUtils.h" 35#include "llvm/Transforms/Utils/SSAUpdater.h" 36#include "llvm/ADT/DenseMap.h" 37#include "llvm/ADT/DepthFirstIterator.h" 38#include "llvm/ADT/SmallPtrSet.h" 39#include "llvm/ADT/Statistic.h" 40#include "llvm/Support/Allocator.h" 41#include "llvm/Support/CommandLine.h" 42#include "llvm/Support/Debug.h" 43#include "llvm/Support/IRBuilder.h" 44#include "llvm/Support/PatternMatch.h" 45using namespace llvm; 46using namespace PatternMatch; 47 48STATISTIC(NumGVNInstr, "Number of instructions deleted"); 49STATISTIC(NumGVNLoad, "Number of loads deleted"); 50STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 51STATISTIC(NumGVNBlocks, "Number of blocks merged"); 52STATISTIC(NumGVNSimpl, "Number of instructions simplified"); 53STATISTIC(NumGVNEqProp, "Number of equalities propagated"); 54STATISTIC(NumPRELoad, "Number of loads PRE'd"); 55 56static cl::opt<bool> EnablePRE("enable-pre", 57 cl::init(true), cl::Hidden); 58static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 59 60//===----------------------------------------------------------------------===// 61// ValueTable Class 62//===----------------------------------------------------------------------===// 63 64/// This class holds the mapping between values and value numbers. It is used 65/// as an efficient mechanism to determine the expression-wise equivalence of 66/// two values. 67namespace { 68 struct Expression { 69 uint32_t opcode; 70 Type *type; 71 SmallVector<uint32_t, 4> varargs; 72 73 Expression(uint32_t o = ~2U) : opcode(o) { } 74 75 bool operator==(const Expression &other) const { 76 if (opcode != other.opcode) 77 return false; 78 if (opcode == ~0U || opcode == ~1U) 79 return true; 80 if (type != other.type) 81 return false; 82 if (varargs != other.varargs) 83 return false; 84 return true; 85 } 86 }; 87 88 class ValueTable { 89 DenseMap<Value*, uint32_t> valueNumbering; 90 DenseMap<Expression, uint32_t> expressionNumbering; 91 AliasAnalysis *AA; 92 MemoryDependenceAnalysis *MD; 93 DominatorTree *DT; 94 95 uint32_t nextValueNumber; 96 97 Expression create_expression(Instruction* I); 98 Expression create_extractvalue_expression(ExtractValueInst* EI); 99 uint32_t lookup_or_add_call(CallInst* C); 100 public: 101 ValueTable() : nextValueNumber(1) { } 102 uint32_t lookup_or_add(Value *V); 103 uint32_t lookup(Value *V) const; 104 void add(Value *V, uint32_t num); 105 void clear(); 106 void erase(Value *v); 107 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 108 AliasAnalysis *getAliasAnalysis() const { return AA; } 109 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 110 void setDomTree(DominatorTree* D) { DT = D; } 111 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 112 void verifyRemoved(const Value *) const; 113 }; 114} 115 116namespace llvm { 117template <> struct DenseMapInfo<Expression> { 118 static inline Expression getEmptyKey() { 119 return ~0U; 120 } 121 122 static inline Expression getTombstoneKey() { 123 return ~1U; 124 } 125 126 static unsigned getHashValue(const Expression e) { 127 unsigned hash = e.opcode; 128 129 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 130 (unsigned)((uintptr_t)e.type >> 9)); 131 132 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 133 E = e.varargs.end(); I != E; ++I) 134 hash = *I + hash * 37; 135 136 return hash; 137 } 138 static bool isEqual(const Expression &LHS, const Expression &RHS) { 139 return LHS == RHS; 140 } 141}; 142 143} 144 145//===----------------------------------------------------------------------===// 146// ValueTable Internal Functions 147//===----------------------------------------------------------------------===// 148 149Expression ValueTable::create_expression(Instruction *I) { 150 Expression e; 151 e.type = I->getType(); 152 e.opcode = I->getOpcode(); 153 for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); 154 OI != OE; ++OI) 155 e.varargs.push_back(lookup_or_add(*OI)); 156 157 if (CmpInst *C = dyn_cast<CmpInst>(I)) { 158 e.opcode = (C->getOpcode() << 8) | C->getPredicate(); 159 } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { 160 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 161 II != IE; ++II) 162 e.varargs.push_back(*II); 163 } 164 165 return e; 166} 167 168Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { 169 assert(EI != 0 && "Not an ExtractValueInst?"); 170 Expression e; 171 e.type = EI->getType(); 172 e.opcode = 0; 173 174 IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); 175 if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { 176 // EI might be an extract from one of our recognised intrinsics. If it 177 // is we'll synthesize a semantically equivalent expression instead on 178 // an extract value expression. 179 switch (I->getIntrinsicID()) { 180 case Intrinsic::sadd_with_overflow: 181 case Intrinsic::uadd_with_overflow: 182 e.opcode = Instruction::Add; 183 break; 184 case Intrinsic::ssub_with_overflow: 185 case Intrinsic::usub_with_overflow: 186 e.opcode = Instruction::Sub; 187 break; 188 case Intrinsic::smul_with_overflow: 189 case Intrinsic::umul_with_overflow: 190 e.opcode = Instruction::Mul; 191 break; 192 default: 193 break; 194 } 195 196 if (e.opcode != 0) { 197 // Intrinsic recognized. Grab its args to finish building the expression. 198 assert(I->getNumArgOperands() == 2 && 199 "Expect two args for recognised intrinsics."); 200 e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); 201 e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); 202 return e; 203 } 204 } 205 206 // Not a recognised intrinsic. Fall back to producing an extract value 207 // expression. 208 e.opcode = EI->getOpcode(); 209 for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); 210 OI != OE; ++OI) 211 e.varargs.push_back(lookup_or_add(*OI)); 212 213 for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); 214 II != IE; ++II) 215 e.varargs.push_back(*II); 216 217 return e; 218} 219 220//===----------------------------------------------------------------------===// 221// ValueTable External Functions 222//===----------------------------------------------------------------------===// 223 224/// add - Insert a value into the table with a specified value number. 225void ValueTable::add(Value *V, uint32_t num) { 226 valueNumbering.insert(std::make_pair(V, num)); 227} 228 229uint32_t ValueTable::lookup_or_add_call(CallInst* C) { 230 if (AA->doesNotAccessMemory(C)) { 231 Expression exp = create_expression(C); 232 uint32_t& e = expressionNumbering[exp]; 233 if (!e) e = nextValueNumber++; 234 valueNumbering[C] = e; 235 return e; 236 } else if (AA->onlyReadsMemory(C)) { 237 Expression exp = create_expression(C); 238 uint32_t& e = expressionNumbering[exp]; 239 if (!e) { 240 e = nextValueNumber++; 241 valueNumbering[C] = e; 242 return e; 243 } 244 if (!MD) { 245 e = nextValueNumber++; 246 valueNumbering[C] = e; 247 return e; 248 } 249 250 MemDepResult local_dep = MD->getDependency(C); 251 252 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 253 valueNumbering[C] = nextValueNumber; 254 return nextValueNumber++; 255 } 256 257 if (local_dep.isDef()) { 258 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 259 260 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 261 valueNumbering[C] = nextValueNumber; 262 return nextValueNumber++; 263 } 264 265 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 266 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 267 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); 268 if (c_vn != cd_vn) { 269 valueNumbering[C] = nextValueNumber; 270 return nextValueNumber++; 271 } 272 } 273 274 uint32_t v = lookup_or_add(local_cdep); 275 valueNumbering[C] = v; 276 return v; 277 } 278 279 // Non-local case. 280 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 281 MD->getNonLocalCallDependency(CallSite(C)); 282 // FIXME: Move the checking logic to MemDep! 283 CallInst* cdep = 0; 284 285 // Check to see if we have a single dominating call instruction that is 286 // identical to C. 287 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 288 const NonLocalDepEntry *I = &deps[i]; 289 if (I->getResult().isNonLocal()) 290 continue; 291 292 // We don't handle non-definitions. If we already have a call, reject 293 // instruction dependencies. 294 if (!I->getResult().isDef() || cdep != 0) { 295 cdep = 0; 296 break; 297 } 298 299 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 300 // FIXME: All duplicated with non-local case. 301 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 302 cdep = NonLocalDepCall; 303 continue; 304 } 305 306 cdep = 0; 307 break; 308 } 309 310 if (!cdep) { 311 valueNumbering[C] = nextValueNumber; 312 return nextValueNumber++; 313 } 314 315 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 316 valueNumbering[C] = nextValueNumber; 317 return nextValueNumber++; 318 } 319 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 320 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 321 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); 322 if (c_vn != cd_vn) { 323 valueNumbering[C] = nextValueNumber; 324 return nextValueNumber++; 325 } 326 } 327 328 uint32_t v = lookup_or_add(cdep); 329 valueNumbering[C] = v; 330 return v; 331 332 } else { 333 valueNumbering[C] = nextValueNumber; 334 return nextValueNumber++; 335 } 336} 337 338/// lookup_or_add - Returns the value number for the specified value, assigning 339/// it a new number if it did not have one before. 340uint32_t ValueTable::lookup_or_add(Value *V) { 341 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 342 if (VI != valueNumbering.end()) 343 return VI->second; 344 345 if (!isa<Instruction>(V)) { 346 valueNumbering[V] = nextValueNumber; 347 return nextValueNumber++; 348 } 349 350 Instruction* I = cast<Instruction>(V); 351 Expression exp; 352 switch (I->getOpcode()) { 353 case Instruction::Call: 354 return lookup_or_add_call(cast<CallInst>(I)); 355 case Instruction::Add: 356 case Instruction::FAdd: 357 case Instruction::Sub: 358 case Instruction::FSub: 359 case Instruction::Mul: 360 case Instruction::FMul: 361 case Instruction::UDiv: 362 case Instruction::SDiv: 363 case Instruction::FDiv: 364 case Instruction::URem: 365 case Instruction::SRem: 366 case Instruction::FRem: 367 case Instruction::Shl: 368 case Instruction::LShr: 369 case Instruction::AShr: 370 case Instruction::And: 371 case Instruction::Or : 372 case Instruction::Xor: 373 case Instruction::ICmp: 374 case Instruction::FCmp: 375 case Instruction::Trunc: 376 case Instruction::ZExt: 377 case Instruction::SExt: 378 case Instruction::FPToUI: 379 case Instruction::FPToSI: 380 case Instruction::UIToFP: 381 case Instruction::SIToFP: 382 case Instruction::FPTrunc: 383 case Instruction::FPExt: 384 case Instruction::PtrToInt: 385 case Instruction::IntToPtr: 386 case Instruction::BitCast: 387 case Instruction::Select: 388 case Instruction::ExtractElement: 389 case Instruction::InsertElement: 390 case Instruction::ShuffleVector: 391 case Instruction::InsertValue: 392 case Instruction::GetElementPtr: 393 exp = create_expression(I); 394 break; 395 case Instruction::ExtractValue: 396 exp = create_extractvalue_expression(cast<ExtractValueInst>(I)); 397 break; 398 default: 399 valueNumbering[V] = nextValueNumber; 400 return nextValueNumber++; 401 } 402 403 uint32_t& e = expressionNumbering[exp]; 404 if (!e) e = nextValueNumber++; 405 valueNumbering[V] = e; 406 return e; 407} 408 409/// lookup - Returns the value number of the specified value. Fails if 410/// the value has not yet been numbered. 411uint32_t ValueTable::lookup(Value *V) const { 412 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 413 assert(VI != valueNumbering.end() && "Value not numbered?"); 414 return VI->second; 415} 416 417/// clear - Remove all entries from the ValueTable. 418void ValueTable::clear() { 419 valueNumbering.clear(); 420 expressionNumbering.clear(); 421 nextValueNumber = 1; 422} 423 424/// erase - Remove a value from the value numbering. 425void ValueTable::erase(Value *V) { 426 valueNumbering.erase(V); 427} 428 429/// verifyRemoved - Verify that the value is removed from all internal data 430/// structures. 431void ValueTable::verifyRemoved(const Value *V) const { 432 for (DenseMap<Value*, uint32_t>::const_iterator 433 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 434 assert(I->first != V && "Inst still occurs in value numbering map!"); 435 } 436} 437 438//===----------------------------------------------------------------------===// 439// GVN Pass 440//===----------------------------------------------------------------------===// 441 442namespace { 443 444 class GVN : public FunctionPass { 445 bool NoLoads; 446 MemoryDependenceAnalysis *MD; 447 DominatorTree *DT; 448 const TargetData *TD; 449 450 ValueTable VN; 451 452 /// LeaderTable - A mapping from value numbers to lists of Value*'s that 453 /// have that value number. Use findLeader to query it. 454 struct LeaderTableEntry { 455 Value *Val; 456 BasicBlock *BB; 457 LeaderTableEntry *Next; 458 }; 459 DenseMap<uint32_t, LeaderTableEntry> LeaderTable; 460 BumpPtrAllocator TableAllocator; 461 462 SmallVector<Instruction*, 8> InstrsToErase; 463 public: 464 static char ID; // Pass identification, replacement for typeid 465 explicit GVN(bool noloads = false) 466 : FunctionPass(ID), NoLoads(noloads), MD(0) { 467 initializeGVNPass(*PassRegistry::getPassRegistry()); 468 } 469 470 bool runOnFunction(Function &F); 471 472 /// markInstructionForDeletion - This removes the specified instruction from 473 /// our various maps and marks it for deletion. 474 void markInstructionForDeletion(Instruction *I) { 475 VN.erase(I); 476 InstrsToErase.push_back(I); 477 } 478 479 const TargetData *getTargetData() const { return TD; } 480 DominatorTree &getDominatorTree() const { return *DT; } 481 AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } 482 MemoryDependenceAnalysis &getMemDep() const { return *MD; } 483 private: 484 /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for 485 /// its value number. 486 void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { 487 LeaderTableEntry &Curr = LeaderTable[N]; 488 if (!Curr.Val) { 489 Curr.Val = V; 490 Curr.BB = BB; 491 return; 492 } 493 494 LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); 495 Node->Val = V; 496 Node->BB = BB; 497 Node->Next = Curr.Next; 498 Curr.Next = Node; 499 } 500 501 /// removeFromLeaderTable - Scan the list of values corresponding to a given 502 /// value number, and remove the given value if encountered. 503 void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { 504 LeaderTableEntry* Prev = 0; 505 LeaderTableEntry* Curr = &LeaderTable[N]; 506 507 while (Curr->Val != V || Curr->BB != BB) { 508 Prev = Curr; 509 Curr = Curr->Next; 510 } 511 512 if (Prev) { 513 Prev->Next = Curr->Next; 514 } else { 515 if (!Curr->Next) { 516 Curr->Val = 0; 517 Curr->BB = 0; 518 } else { 519 LeaderTableEntry* Next = Curr->Next; 520 Curr->Val = Next->Val; 521 Curr->BB = Next->BB; 522 Curr->Next = Next->Next; 523 } 524 } 525 } 526 527 // List of critical edges to be split between iterations. 528 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 529 530 // This transformation requires dominator postdominator info 531 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 532 AU.addRequired<DominatorTree>(); 533 if (!NoLoads) 534 AU.addRequired<MemoryDependenceAnalysis>(); 535 AU.addRequired<AliasAnalysis>(); 536 537 AU.addPreserved<DominatorTree>(); 538 AU.addPreserved<AliasAnalysis>(); 539 } 540 541 542 // Helper fuctions 543 // FIXME: eliminate or document these better 544 bool processLoad(LoadInst *L); 545 bool processInstruction(Instruction *I); 546 bool processNonLocalLoad(LoadInst *L); 547 bool processBlock(BasicBlock *BB); 548 void dump(DenseMap<uint32_t, Value*> &d); 549 bool iterateOnFunction(Function &F); 550 bool performPRE(Function &F); 551 Value *findLeader(BasicBlock *BB, uint32_t num); 552 void cleanupGlobalSets(); 553 void verifyRemoved(const Instruction *I) const; 554 bool splitCriticalEdges(); 555 unsigned replaceAllDominatedUsesWith(Value *From, Value *To, 556 BasicBlock *Root); 557 bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root); 558 }; 559 560 char GVN::ID = 0; 561} 562 563// createGVNPass - The public interface to this file... 564FunctionPass *llvm::createGVNPass(bool NoLoads) { 565 return new GVN(NoLoads); 566} 567 568INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) 569INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 570INITIALIZE_PASS_DEPENDENCY(DominatorTree) 571INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 572INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) 573 574void GVN::dump(DenseMap<uint32_t, Value*>& d) { 575 errs() << "{\n"; 576 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 577 E = d.end(); I != E; ++I) { 578 errs() << I->first << "\n"; 579 I->second->dump(); 580 } 581 errs() << "}\n"; 582} 583 584/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 585/// we're analyzing is fully available in the specified block. As we go, keep 586/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 587/// map is actually a tri-state map with the following values: 588/// 0) we know the block *is not* fully available. 589/// 1) we know the block *is* fully available. 590/// 2) we do not know whether the block is fully available or not, but we are 591/// currently speculating that it will be. 592/// 3) we are speculating for this block and have used that to speculate for 593/// other blocks. 594static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 595 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 596 // Optimistically assume that the block is fully available and check to see 597 // if we already know about this block in one lookup. 598 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 599 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 600 601 // If the entry already existed for this block, return the precomputed value. 602 if (!IV.second) { 603 // If this is a speculative "available" value, mark it as being used for 604 // speculation of other blocks. 605 if (IV.first->second == 2) 606 IV.first->second = 3; 607 return IV.first->second != 0; 608 } 609 610 // Otherwise, see if it is fully available in all predecessors. 611 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 612 613 // If this block has no predecessors, it isn't live-in here. 614 if (PI == PE) 615 goto SpeculationFailure; 616 617 for (; PI != PE; ++PI) 618 // If the value isn't fully available in one of our predecessors, then it 619 // isn't fully available in this block either. Undo our previous 620 // optimistic assumption and bail out. 621 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 622 goto SpeculationFailure; 623 624 return true; 625 626// SpeculationFailure - If we get here, we found out that this is not, after 627// all, a fully-available block. We have a problem if we speculated on this and 628// used the speculation to mark other blocks as available. 629SpeculationFailure: 630 char &BBVal = FullyAvailableBlocks[BB]; 631 632 // If we didn't speculate on this, just return with it set to false. 633 if (BBVal == 2) { 634 BBVal = 0; 635 return false; 636 } 637 638 // If we did speculate on this value, we could have blocks set to 1 that are 639 // incorrect. Walk the (transitive) successors of this block and mark them as 640 // 0 if set to one. 641 SmallVector<BasicBlock*, 32> BBWorklist; 642 BBWorklist.push_back(BB); 643 644 do { 645 BasicBlock *Entry = BBWorklist.pop_back_val(); 646 // Note that this sets blocks to 0 (unavailable) if they happen to not 647 // already be in FullyAvailableBlocks. This is safe. 648 char &EntryVal = FullyAvailableBlocks[Entry]; 649 if (EntryVal == 0) continue; // Already unavailable. 650 651 // Mark as unavailable. 652 EntryVal = 0; 653 654 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 655 BBWorklist.push_back(*I); 656 } while (!BBWorklist.empty()); 657 658 return false; 659} 660 661 662/// CanCoerceMustAliasedValueToLoad - Return true if 663/// CoerceAvailableValueToLoadType will succeed. 664static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 665 Type *LoadTy, 666 const TargetData &TD) { 667 // If the loaded or stored value is an first class array or struct, don't try 668 // to transform them. We need to be able to bitcast to integer. 669 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 670 StoredVal->getType()->isStructTy() || 671 StoredVal->getType()->isArrayTy()) 672 return false; 673 674 // The store has to be at least as big as the load. 675 if (TD.getTypeSizeInBits(StoredVal->getType()) < 676 TD.getTypeSizeInBits(LoadTy)) 677 return false; 678 679 return true; 680} 681 682 683/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 684/// then a load from a must-aliased pointer of a different type, try to coerce 685/// the stored value. LoadedTy is the type of the load we want to replace and 686/// InsertPt is the place to insert new instructions. 687/// 688/// If we can't do it, return null. 689static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 690 Type *LoadedTy, 691 Instruction *InsertPt, 692 const TargetData &TD) { 693 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) 694 return 0; 695 696 // If this is already the right type, just return it. 697 Type *StoredValTy = StoredVal->getType(); 698 699 uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); 700 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); 701 702 // If the store and reload are the same size, we can always reuse it. 703 if (StoreSize == LoadSize) { 704 // Pointer to Pointer -> use bitcast. 705 if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) 706 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 707 708 // Convert source pointers to integers, which can be bitcast. 709 if (StoredValTy->isPointerTy()) { 710 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 711 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 712 } 713 714 Type *TypeToCastTo = LoadedTy; 715 if (TypeToCastTo->isPointerTy()) 716 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); 717 718 if (StoredValTy != TypeToCastTo) 719 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 720 721 // Cast to pointer if the load needs a pointer type. 722 if (LoadedTy->isPointerTy()) 723 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 724 725 return StoredVal; 726 } 727 728 // If the loaded value is smaller than the available value, then we can 729 // extract out a piece from it. If the available value is too small, then we 730 // can't do anything. 731 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 732 733 // Convert source pointers to integers, which can be manipulated. 734 if (StoredValTy->isPointerTy()) { 735 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 736 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 737 } 738 739 // Convert vectors and fp to integer, which can be manipulated. 740 if (!StoredValTy->isIntegerTy()) { 741 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 742 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 743 } 744 745 // If this is a big-endian system, we need to shift the value down to the low 746 // bits so that a truncate will work. 747 if (TD.isBigEndian()) { 748 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 749 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 750 } 751 752 // Truncate the integer to the right size now. 753 Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 754 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 755 756 if (LoadedTy == NewIntTy) 757 return StoredVal; 758 759 // If the result is a pointer, inttoptr. 760 if (LoadedTy->isPointerTy()) 761 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 762 763 // Otherwise, bitcast. 764 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 765} 766 767/// AnalyzeLoadFromClobberingWrite - This function is called when we have a 768/// memdep query of a load that ends up being a clobbering memory write (store, 769/// memset, memcpy, memmove). This means that the write *may* provide bits used 770/// by the load but we can't be sure because the pointers don't mustalias. 771/// 772/// Check this case to see if there is anything more we can do before we give 773/// up. This returns -1 if we have to give up, or a byte number in the stored 774/// value of the piece that feeds the load. 775static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, 776 Value *WritePtr, 777 uint64_t WriteSizeInBits, 778 const TargetData &TD) { 779 // If the loaded or stored value is an first class array or struct, don't try 780 // to transform them. We need to be able to bitcast to integer. 781 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 782 return -1; 783 784 int64_t StoreOffset = 0, LoadOffset = 0; 785 Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); 786 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); 787 if (StoreBase != LoadBase) 788 return -1; 789 790 // If the load and store are to the exact same address, they should have been 791 // a must alias. AA must have gotten confused. 792 // FIXME: Study to see if/when this happens. One case is forwarding a memset 793 // to a load from the base of the memset. 794#if 0 795 if (LoadOffset == StoreOffset) { 796 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 797 << "Base = " << *StoreBase << "\n" 798 << "Store Ptr = " << *WritePtr << "\n" 799 << "Store Offs = " << StoreOffset << "\n" 800 << "Load Ptr = " << *LoadPtr << "\n"; 801 abort(); 802 } 803#endif 804 805 // If the load and store don't overlap at all, the store doesn't provide 806 // anything to the load. In this case, they really don't alias at all, AA 807 // must have gotten confused. 808 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); 809 810 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 811 return -1; 812 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 813 LoadSize >>= 3; 814 815 816 bool isAAFailure = false; 817 if (StoreOffset < LoadOffset) 818 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 819 else 820 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 821 822 if (isAAFailure) { 823#if 0 824 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" 825 << "Base = " << *StoreBase << "\n" 826 << "Store Ptr = " << *WritePtr << "\n" 827 << "Store Offs = " << StoreOffset << "\n" 828 << "Load Ptr = " << *LoadPtr << "\n"; 829 abort(); 830#endif 831 return -1; 832 } 833 834 // If the Load isn't completely contained within the stored bits, we don't 835 // have all the bits to feed it. We could do something crazy in the future 836 // (issue a smaller load then merge the bits in) but this seems unlikely to be 837 // valuable. 838 if (StoreOffset > LoadOffset || 839 StoreOffset+StoreSize < LoadOffset+LoadSize) 840 return -1; 841 842 // Okay, we can do this transformation. Return the number of bytes into the 843 // store that the load is. 844 return LoadOffset-StoreOffset; 845} 846 847/// AnalyzeLoadFromClobberingStore - This function is called when we have a 848/// memdep query of a load that ends up being a clobbering store. 849static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, 850 StoreInst *DepSI, 851 const TargetData &TD) { 852 // Cannot handle reading from store of first-class aggregate yet. 853 if (DepSI->getValueOperand()->getType()->isStructTy() || 854 DepSI->getValueOperand()->getType()->isArrayTy()) 855 return -1; 856 857 Value *StorePtr = DepSI->getPointerOperand(); 858 uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 859 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 860 StorePtr, StoreSize, TD); 861} 862 863/// AnalyzeLoadFromClobberingLoad - This function is called when we have a 864/// memdep query of a load that ends up being clobbered by another load. See if 865/// the other load can feed into the second load. 866static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, 867 LoadInst *DepLI, const TargetData &TD){ 868 // Cannot handle reading from store of first-class aggregate yet. 869 if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) 870 return -1; 871 872 Value *DepPtr = DepLI->getPointerOperand(); 873 uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType()); 874 int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD); 875 if (R != -1) return R; 876 877 // If we have a load/load clobber an DepLI can be widened to cover this load, 878 // then we should widen it! 879 int64_t LoadOffs = 0; 880 const Value *LoadBase = 881 GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); 882 unsigned LoadSize = TD.getTypeStoreSize(LoadTy); 883 884 unsigned Size = MemoryDependenceAnalysis:: 885 getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD); 886 if (Size == 0) return -1; 887 888 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD); 889} 890 891 892 893static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, 894 MemIntrinsic *MI, 895 const TargetData &TD) { 896 // If the mem operation is a non-constant size, we can't handle it. 897 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 898 if (SizeCst == 0) return -1; 899 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 900 901 // If this is memset, we just need to see if the offset is valid in the size 902 // of the memset.. 903 if (MI->getIntrinsicID() == Intrinsic::memset) 904 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 905 MemSizeInBits, TD); 906 907 // If we have a memcpy/memmove, the only case we can handle is if this is a 908 // copy from constant memory. In that case, we can read directly from the 909 // constant memory. 910 MemTransferInst *MTI = cast<MemTransferInst>(MI); 911 912 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 913 if (Src == 0) return -1; 914 915 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD)); 916 if (GV == 0 || !GV->isConstant()) return -1; 917 918 // See if the access is within the bounds of the transfer. 919 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 920 MI->getDest(), MemSizeInBits, TD); 921 if (Offset == -1) 922 return Offset; 923 924 // Otherwise, see if we can constant fold a load from the constant with the 925 // offset applied as appropriate. 926 Src = ConstantExpr::getBitCast(Src, 927 llvm::Type::getInt8PtrTy(Src->getContext())); 928 Constant *OffsetCst = 929 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 930 Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); 931 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 932 if (ConstantFoldLoadFromConstPtr(Src, &TD)) 933 return Offset; 934 return -1; 935} 936 937 938/// GetStoreValueForLoad - This function is called when we have a 939/// memdep query of a load that ends up being a clobbering store. This means 940/// that the store provides bits used by the load but we the pointers don't 941/// mustalias. Check this case to see if there is anything more we can do 942/// before we give up. 943static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 944 Type *LoadTy, 945 Instruction *InsertPt, const TargetData &TD){ 946 LLVMContext &Ctx = SrcVal->getType()->getContext(); 947 948 uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 949 uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; 950 951 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 952 953 // Compute which bits of the stored value are being used by the load. Convert 954 // to an integer type to start with. 955 if (SrcVal->getType()->isPointerTy()) 956 SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx)); 957 if (!SrcVal->getType()->isIntegerTy()) 958 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); 959 960 // Shift the bits to the least significant depending on endianness. 961 unsigned ShiftAmt; 962 if (TD.isLittleEndian()) 963 ShiftAmt = Offset*8; 964 else 965 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 966 967 if (ShiftAmt) 968 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); 969 970 if (LoadSize != StoreSize) 971 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); 972 973 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); 974} 975 976/// GetStoreValueForLoad - This function is called when we have a 977/// memdep query of a load that ends up being a clobbering load. This means 978/// that the load *may* provide bits used by the load but we can't be sure 979/// because the pointers don't mustalias. Check this case to see if there is 980/// anything more we can do before we give up. 981static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, 982 Type *LoadTy, Instruction *InsertPt, 983 GVN &gvn) { 984 const TargetData &TD = *gvn.getTargetData(); 985 // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to 986 // widen SrcVal out to a larger load. 987 unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); 988 unsigned LoadSize = TD.getTypeStoreSize(LoadTy); 989 if (Offset+LoadSize > SrcValSize) { 990 assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); 991 assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); 992 // If we have a load/load clobber an DepLI can be widened to cover this 993 // load, then we should widen it to the next power of 2 size big enough! 994 unsigned NewLoadSize = Offset+LoadSize; 995 if (!isPowerOf2_32(NewLoadSize)) 996 NewLoadSize = NextPowerOf2(NewLoadSize); 997 998 Value *PtrVal = SrcVal->getPointerOperand(); 999 1000 // Insert the new load after the old load. This ensures that subsequent 1001 // memdep queries will find the new load. We can't easily remove the old 1002 // load completely because it is already in the value numbering table. 1003 IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); 1004 Type *DestPTy = 1005 IntegerType::get(LoadTy->getContext(), NewLoadSize*8); 1006 DestPTy = PointerType::get(DestPTy, 1007 cast<PointerType>(PtrVal->getType())->getAddressSpace()); 1008 Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); 1009 PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); 1010 LoadInst *NewLoad = Builder.CreateLoad(PtrVal); 1011 NewLoad->takeName(SrcVal); 1012 NewLoad->setAlignment(SrcVal->getAlignment()); 1013 1014 DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); 1015 DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); 1016 1017 // Replace uses of the original load with the wider load. On a big endian 1018 // system, we need to shift down to get the relevant bits. 1019 Value *RV = NewLoad; 1020 if (TD.isBigEndian()) 1021 RV = Builder.CreateLShr(RV, 1022 NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); 1023 RV = Builder.CreateTrunc(RV, SrcVal->getType()); 1024 SrcVal->replaceAllUsesWith(RV); 1025 1026 // We would like to use gvn.markInstructionForDeletion here, but we can't 1027 // because the load is already memoized into the leader map table that GVN 1028 // tracks. It is potentially possible to remove the load from the table, 1029 // but then there all of the operations based on it would need to be 1030 // rehashed. Just leave the dead load around. 1031 gvn.getMemDep().removeInstruction(SrcVal); 1032 SrcVal = NewLoad; 1033 } 1034 1035 return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD); 1036} 1037 1038 1039/// GetMemInstValueForLoad - This function is called when we have a 1040/// memdep query of a load that ends up being a clobbering mem intrinsic. 1041static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 1042 Type *LoadTy, Instruction *InsertPt, 1043 const TargetData &TD){ 1044 LLVMContext &Ctx = LoadTy->getContext(); 1045 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 1046 1047 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1048 1049 // We know that this method is only called when the mem transfer fully 1050 // provides the bits for the load. 1051 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 1052 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 1053 // independently of what the offset is. 1054 Value *Val = MSI->getValue(); 1055 if (LoadSize != 1) 1056 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 1057 1058 Value *OneElt = Val; 1059 1060 // Splat the value out to the right number of bits. 1061 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 1062 // If we can double the number of bytes set, do it. 1063 if (NumBytesSet*2 <= LoadSize) { 1064 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 1065 Val = Builder.CreateOr(Val, ShVal); 1066 NumBytesSet <<= 1; 1067 continue; 1068 } 1069 1070 // Otherwise insert one byte at a time. 1071 Value *ShVal = Builder.CreateShl(Val, 1*8); 1072 Val = Builder.CreateOr(OneElt, ShVal); 1073 ++NumBytesSet; 1074 } 1075 1076 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); 1077 } 1078 1079 // Otherwise, this is a memcpy/memmove from a constant global. 1080 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 1081 Constant *Src = cast<Constant>(MTI->getSource()); 1082 1083 // Otherwise, see if we can constant fold a load from the constant with the 1084 // offset applied as appropriate. 1085 Src = ConstantExpr::getBitCast(Src, 1086 llvm::Type::getInt8PtrTy(Src->getContext())); 1087 Constant *OffsetCst = 1088 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1089 Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); 1090 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 1091 return ConstantFoldLoadFromConstPtr(Src, &TD); 1092} 1093 1094namespace { 1095 1096struct AvailableValueInBlock { 1097 /// BB - The basic block in question. 1098 BasicBlock *BB; 1099 enum ValType { 1100 SimpleVal, // A simple offsetted value that is accessed. 1101 LoadVal, // A value produced by a load. 1102 MemIntrin // A memory intrinsic which is loaded from. 1103 }; 1104 1105 /// V - The value that is live out of the block. 1106 PointerIntPair<Value *, 2, ValType> Val; 1107 1108 /// Offset - The byte offset in Val that is interesting for the load query. 1109 unsigned Offset; 1110 1111 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 1112 unsigned Offset = 0) { 1113 AvailableValueInBlock Res; 1114 Res.BB = BB; 1115 Res.Val.setPointer(V); 1116 Res.Val.setInt(SimpleVal); 1117 Res.Offset = Offset; 1118 return Res; 1119 } 1120 1121 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 1122 unsigned Offset = 0) { 1123 AvailableValueInBlock Res; 1124 Res.BB = BB; 1125 Res.Val.setPointer(MI); 1126 Res.Val.setInt(MemIntrin); 1127 Res.Offset = Offset; 1128 return Res; 1129 } 1130 1131 static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, 1132 unsigned Offset = 0) { 1133 AvailableValueInBlock Res; 1134 Res.BB = BB; 1135 Res.Val.setPointer(LI); 1136 Res.Val.setInt(LoadVal); 1137 Res.Offset = Offset; 1138 return Res; 1139 } 1140 1141 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 1142 bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } 1143 bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } 1144 1145 Value *getSimpleValue() const { 1146 assert(isSimpleValue() && "Wrong accessor"); 1147 return Val.getPointer(); 1148 } 1149 1150 LoadInst *getCoercedLoadValue() const { 1151 assert(isCoercedLoadValue() && "Wrong accessor"); 1152 return cast<LoadInst>(Val.getPointer()); 1153 } 1154 1155 MemIntrinsic *getMemIntrinValue() const { 1156 assert(isMemIntrinValue() && "Wrong accessor"); 1157 return cast<MemIntrinsic>(Val.getPointer()); 1158 } 1159 1160 /// MaterializeAdjustedValue - Emit code into this block to adjust the value 1161 /// defined here to the specified type. This handles various coercion cases. 1162 Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { 1163 Value *Res; 1164 if (isSimpleValue()) { 1165 Res = getSimpleValue(); 1166 if (Res->getType() != LoadTy) { 1167 const TargetData *TD = gvn.getTargetData(); 1168 assert(TD && "Need target data to handle type mismatch case"); 1169 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), 1170 *TD); 1171 1172 DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 1173 << *getSimpleValue() << '\n' 1174 << *Res << '\n' << "\n\n\n"); 1175 } 1176 } else if (isCoercedLoadValue()) { 1177 LoadInst *Load = getCoercedLoadValue(); 1178 if (Load->getType() == LoadTy && Offset == 0) { 1179 Res = Load; 1180 } else { 1181 Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), 1182 gvn); 1183 1184 DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " 1185 << *getCoercedLoadValue() << '\n' 1186 << *Res << '\n' << "\n\n\n"); 1187 } 1188 } else { 1189 const TargetData *TD = gvn.getTargetData(); 1190 assert(TD && "Need target data to handle type mismatch case"); 1191 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, 1192 LoadTy, BB->getTerminator(), *TD); 1193 DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1194 << " " << *getMemIntrinValue() << '\n' 1195 << *Res << '\n' << "\n\n\n"); 1196 } 1197 return Res; 1198 } 1199}; 1200 1201} // end anonymous namespace 1202 1203/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 1204/// construct SSA form, allowing us to eliminate LI. This returns the value 1205/// that should be used at LI's definition site. 1206static Value *ConstructSSAForLoadSet(LoadInst *LI, 1207 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1208 GVN &gvn) { 1209 // Check for the fully redundant, dominating load case. In this case, we can 1210 // just use the dominating value directly. 1211 if (ValuesPerBlock.size() == 1 && 1212 gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, 1213 LI->getParent())) 1214 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); 1215 1216 // Otherwise, we have to construct SSA form. 1217 SmallVector<PHINode*, 8> NewPHIs; 1218 SSAUpdater SSAUpdate(&NewPHIs); 1219 SSAUpdate.Initialize(LI->getType(), LI->getName()); 1220 1221 Type *LoadTy = LI->getType(); 1222 1223 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1224 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1225 BasicBlock *BB = AV.BB; 1226 1227 if (SSAUpdate.HasValueForBlock(BB)) 1228 continue; 1229 1230 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); 1231 } 1232 1233 // Perform PHI construction. 1234 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1235 1236 // If new PHI nodes were created, notify alias analysis. 1237 if (V->getType()->isPointerTy()) { 1238 AliasAnalysis *AA = gvn.getAliasAnalysis(); 1239 1240 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1241 AA->copyValue(LI, NewPHIs[i]); 1242 1243 // Now that we've copied information to the new PHIs, scan through 1244 // them again and inform alias analysis that we've added potentially 1245 // escaping uses to any values that are operands to these PHIs. 1246 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { 1247 PHINode *P = NewPHIs[i]; 1248 for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) { 1249 unsigned jj = PHINode::getOperandNumForIncomingValue(ii); 1250 AA->addEscapingUse(P->getOperandUse(jj)); 1251 } 1252 } 1253 } 1254 1255 return V; 1256} 1257 1258static bool isLifetimeStart(const Instruction *Inst) { 1259 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1260 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1261 return false; 1262} 1263 1264/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 1265/// non-local by performing PHI construction. 1266bool GVN::processNonLocalLoad(LoadInst *LI) { 1267 // Find the non-local dependencies of the load. 1268 SmallVector<NonLocalDepResult, 64> Deps; 1269 AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); 1270 MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); 1271 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " 1272 // << Deps.size() << *LI << '\n'); 1273 1274 // If we had to process more than one hundred blocks to find the 1275 // dependencies, this load isn't worth worrying about. Optimizing 1276 // it will be too expensive. 1277 if (Deps.size() > 100) 1278 return false; 1279 1280 // If we had a phi translation failure, we'll have a single entry which is a 1281 // clobber in the current block. Reject this early. 1282 if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) { 1283 DEBUG( 1284 dbgs() << "GVN: non-local load "; 1285 WriteAsOperand(dbgs(), LI); 1286 dbgs() << " has unknown dependencies\n"; 1287 ); 1288 return false; 1289 } 1290 1291 // Filter out useless results (non-locals, etc). Keep track of the blocks 1292 // where we have a value available in repl, also keep track of whether we see 1293 // dependencies that produce an unknown value for the load (such as a call 1294 // that could potentially clobber the load). 1295 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; 1296 SmallVector<BasicBlock*, 16> UnavailableBlocks; 1297 1298 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 1299 BasicBlock *DepBB = Deps[i].getBB(); 1300 MemDepResult DepInfo = Deps[i].getResult(); 1301 1302 if (DepInfo.isUnknown()) { 1303 UnavailableBlocks.push_back(DepBB); 1304 continue; 1305 } 1306 1307 if (DepInfo.isClobber()) { 1308 // The address being loaded in this non-local block may not be the same as 1309 // the pointer operand of the load if PHI translation occurs. Make sure 1310 // to consider the right address. 1311 Value *Address = Deps[i].getAddress(); 1312 1313 // If the dependence is to a store that writes to a superset of the bits 1314 // read by the load, we can extract the bits we need for the load from the 1315 // stored value. 1316 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1317 if (TD && Address) { 1318 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, 1319 DepSI, *TD); 1320 if (Offset != -1) { 1321 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1322 DepSI->getValueOperand(), 1323 Offset)); 1324 continue; 1325 } 1326 } 1327 } 1328 1329 // Check to see if we have something like this: 1330 // load i32* P 1331 // load i8* (P+1) 1332 // if we have this, replace the later with an extraction from the former. 1333 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { 1334 // If this is a clobber and L is the first instruction in its block, then 1335 // we have the first instruction in the entry block. 1336 if (DepLI != LI && Address && TD) { 1337 int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), 1338 LI->getPointerOperand(), 1339 DepLI, *TD); 1340 1341 if (Offset != -1) { 1342 ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, 1343 Offset)); 1344 continue; 1345 } 1346 } 1347 } 1348 1349 // If the clobbering value is a memset/memcpy/memmove, see if we can 1350 // forward a value on from it. 1351 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1352 if (TD && Address) { 1353 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 1354 DepMI, *TD); 1355 if (Offset != -1) { 1356 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1357 Offset)); 1358 continue; 1359 } 1360 } 1361 } 1362 1363 UnavailableBlocks.push_back(DepBB); 1364 continue; 1365 } 1366 1367 assert(DepInfo.isDef() && "Expecting def here"); 1368 1369 Instruction *DepInst = DepInfo.getInst(); 1370 1371 // Loading the allocation -> undef. 1372 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || 1373 // Loading immediately after lifetime begin -> undef. 1374 isLifetimeStart(DepInst)) { 1375 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1376 UndefValue::get(LI->getType()))); 1377 continue; 1378 } 1379 1380 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1381 // Reject loads and stores that are to the same address but are of 1382 // different types if we have to. 1383 if (S->getValueOperand()->getType() != LI->getType()) { 1384 // If the stored value is larger or equal to the loaded value, we can 1385 // reuse it. 1386 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), 1387 LI->getType(), *TD)) { 1388 UnavailableBlocks.push_back(DepBB); 1389 continue; 1390 } 1391 } 1392 1393 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1394 S->getValueOperand())); 1395 continue; 1396 } 1397 1398 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1399 // If the types mismatch and we can't handle it, reject reuse of the load. 1400 if (LD->getType() != LI->getType()) { 1401 // If the stored value is larger or equal to the loaded value, we can 1402 // reuse it. 1403 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ 1404 UnavailableBlocks.push_back(DepBB); 1405 continue; 1406 } 1407 } 1408 ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); 1409 continue; 1410 } 1411 1412 UnavailableBlocks.push_back(DepBB); 1413 continue; 1414 } 1415 1416 // If we have no predecessors that produce a known value for this load, exit 1417 // early. 1418 if (ValuesPerBlock.empty()) return false; 1419 1420 // If all of the instructions we depend on produce a known value for this 1421 // load, then it is fully redundant and we can use PHI insertion to compute 1422 // its value. Insert PHIs and remove the fully redundant value now. 1423 if (UnavailableBlocks.empty()) { 1424 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1425 1426 // Perform PHI construction. 1427 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1428 LI->replaceAllUsesWith(V); 1429 1430 if (isa<PHINode>(V)) 1431 V->takeName(LI); 1432 if (V->getType()->isPointerTy()) 1433 MD->invalidateCachedPointerInfo(V); 1434 markInstructionForDeletion(LI); 1435 ++NumGVNLoad; 1436 return true; 1437 } 1438 1439 if (!EnablePRE || !EnableLoadPRE) 1440 return false; 1441 1442 // Okay, we have *some* definitions of the value. This means that the value 1443 // is available in some of our (transitive) predecessors. Lets think about 1444 // doing PRE of this load. This will involve inserting a new load into the 1445 // predecessor when it's not available. We could do this in general, but 1446 // prefer to not increase code size. As such, we only do this when we know 1447 // that we only have to insert *one* load (which means we're basically moving 1448 // the load, not inserting a new one). 1449 1450 SmallPtrSet<BasicBlock *, 4> Blockers; 1451 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1452 Blockers.insert(UnavailableBlocks[i]); 1453 1454 // Let's find the first basic block with more than one predecessor. Walk 1455 // backwards through predecessors if needed. 1456 BasicBlock *LoadBB = LI->getParent(); 1457 BasicBlock *TmpBB = LoadBB; 1458 1459 bool isSinglePred = false; 1460 bool allSingleSucc = true; 1461 while (TmpBB->getSinglePredecessor()) { 1462 isSinglePred = true; 1463 TmpBB = TmpBB->getSinglePredecessor(); 1464 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1465 return false; 1466 if (Blockers.count(TmpBB)) 1467 return false; 1468 1469 // If any of these blocks has more than one successor (i.e. if the edge we 1470 // just traversed was critical), then there are other paths through this 1471 // block along which the load may not be anticipated. Hoisting the load 1472 // above this block would be adding the load to execution paths along 1473 // which it was not previously executed. 1474 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1475 return false; 1476 } 1477 1478 assert(TmpBB); 1479 LoadBB = TmpBB; 1480 1481 // FIXME: It is extremely unclear what this loop is doing, other than 1482 // artificially restricting loadpre. 1483 if (isSinglePred) { 1484 bool isHot = false; 1485 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1486 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1487 if (AV.isSimpleValue()) 1488 // "Hot" Instruction is in some loop (because it dominates its dep. 1489 // instruction). 1490 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) 1491 if (DT->dominates(LI, I)) { 1492 isHot = true; 1493 break; 1494 } 1495 } 1496 1497 // We are interested only in "hot" instructions. We don't want to do any 1498 // mis-optimizations here. 1499 if (!isHot) 1500 return false; 1501 } 1502 1503 // Check to see how many predecessors have the loaded value fully 1504 // available. 1505 DenseMap<BasicBlock*, Value*> PredLoads; 1506 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1507 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1508 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1509 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1510 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1511 1512 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit; 1513 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1514 PI != E; ++PI) { 1515 BasicBlock *Pred = *PI; 1516 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { 1517 continue; 1518 } 1519 PredLoads[Pred] = 0; 1520 1521 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1522 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1523 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1524 << Pred->getName() << "': " << *LI << '\n'); 1525 return false; 1526 } 1527 1528 if (LoadBB->isLandingPad()) { 1529 DEBUG(dbgs() 1530 << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '" 1531 << Pred->getName() << "': " << *LI << '\n'); 1532 return false; 1533 } 1534 1535 unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); 1536 NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); 1537 } 1538 } 1539 1540 if (!NeedToSplit.empty()) { 1541 toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); 1542 return false; 1543 } 1544 1545 // Decide whether PRE is profitable for this load. 1546 unsigned NumUnavailablePreds = PredLoads.size(); 1547 assert(NumUnavailablePreds != 0 && 1548 "Fully available value should be eliminated above!"); 1549 1550 // If this load is unavailable in multiple predecessors, reject it. 1551 // FIXME: If we could restructure the CFG, we could make a common pred with 1552 // all the preds that don't have an available LI and insert a new load into 1553 // that one block. 1554 if (NumUnavailablePreds != 1) 1555 return false; 1556 1557 // Check if the load can safely be moved to all the unavailable predecessors. 1558 bool CanDoPRE = true; 1559 SmallVector<Instruction*, 8> NewInsts; 1560 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1561 E = PredLoads.end(); I != E; ++I) { 1562 BasicBlock *UnavailablePred = I->first; 1563 1564 // Do PHI translation to get its value in the predecessor if necessary. The 1565 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1566 1567 // If all preds have a single successor, then we know it is safe to insert 1568 // the load on the pred (?!?), so we can insert code to materialize the 1569 // pointer if it is not available. 1570 PHITransAddr Address(LI->getPointerOperand(), TD); 1571 Value *LoadPtr = 0; 1572 if (allSingleSucc) { 1573 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1574 *DT, NewInsts); 1575 } else { 1576 Address.PHITranslateValue(LoadBB, UnavailablePred, DT); 1577 LoadPtr = Address.getAddr(); 1578 } 1579 1580 // If we couldn't find or insert a computation of this phi translated value, 1581 // we fail PRE. 1582 if (LoadPtr == 0) { 1583 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1584 << *LI->getPointerOperand() << "\n"); 1585 CanDoPRE = false; 1586 break; 1587 } 1588 1589 // Make sure it is valid to move this load here. We have to watch out for: 1590 // @1 = getelementptr (i8* p, ... 1591 // test p and branch if == 0 1592 // load @1 1593 // It is valid to have the getelementptr before the test, even if p can 1594 // be 0, as getelementptr only does address arithmetic. 1595 // If we are not pushing the value through any multiple-successor blocks 1596 // we do not have this case. Otherwise, check that the load is safe to 1597 // put anywhere; this can be improved, but should be conservatively safe. 1598 if (!allSingleSucc && 1599 // FIXME: REEVALUTE THIS. 1600 !isSafeToLoadUnconditionally(LoadPtr, 1601 UnavailablePred->getTerminator(), 1602 LI->getAlignment(), TD)) { 1603 CanDoPRE = false; 1604 break; 1605 } 1606 1607 I->second = LoadPtr; 1608 } 1609 1610 if (!CanDoPRE) { 1611 while (!NewInsts.empty()) { 1612 Instruction *I = NewInsts.pop_back_val(); 1613 if (MD) MD->removeInstruction(I); 1614 I->eraseFromParent(); 1615 } 1616 return false; 1617 } 1618 1619 // Okay, we can eliminate this load by inserting a reload in the predecessor 1620 // and using PHI construction to get the value in the other predecessors, do 1621 // it. 1622 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1623 DEBUG(if (!NewInsts.empty()) 1624 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1625 << *NewInsts.back() << '\n'); 1626 1627 // Assign value numbers to the new instructions. 1628 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 1629 // FIXME: We really _ought_ to insert these value numbers into their 1630 // parent's availability map. However, in doing so, we risk getting into 1631 // ordering issues. If a block hasn't been processed yet, we would be 1632 // marking a value as AVAIL-IN, which isn't what we intend. 1633 VN.lookup_or_add(NewInsts[i]); 1634 } 1635 1636 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1637 E = PredLoads.end(); I != E; ++I) { 1638 BasicBlock *UnavailablePred = I->first; 1639 Value *LoadPtr = I->second; 1640 1641 Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1642 LI->getAlignment(), 1643 UnavailablePred->getTerminator()); 1644 1645 // Transfer the old load's TBAA tag to the new load. 1646 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) 1647 NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1648 1649 // Transfer DebugLoc. 1650 NewLoad->setDebugLoc(LI->getDebugLoc()); 1651 1652 // Add the newly created load. 1653 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1654 NewLoad)); 1655 MD->invalidateCachedPointerInfo(LoadPtr); 1656 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1657 } 1658 1659 // Perform PHI construction. 1660 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1661 LI->replaceAllUsesWith(V); 1662 if (isa<PHINode>(V)) 1663 V->takeName(LI); 1664 if (V->getType()->isPointerTy()) 1665 MD->invalidateCachedPointerInfo(V); 1666 markInstructionForDeletion(LI); 1667 ++NumPRELoad; 1668 return true; 1669} 1670 1671/// processLoad - Attempt to eliminate a load, first by eliminating it 1672/// locally, and then attempting non-local elimination if that fails. 1673bool GVN::processLoad(LoadInst *L) { 1674 if (!MD) 1675 return false; 1676 1677 if (!L->isSimple()) 1678 return false; 1679 1680 if (L->use_empty()) { 1681 markInstructionForDeletion(L); 1682 return true; 1683 } 1684 1685 // ... to a pointer that has been loaded from before... 1686 MemDepResult Dep = MD->getDependency(L); 1687 1688 // If we have a clobber and target data is around, see if this is a clobber 1689 // that we can fix up through code synthesis. 1690 if (Dep.isClobber() && TD) { 1691 // Check to see if we have something like this: 1692 // store i32 123, i32* %P 1693 // %A = bitcast i32* %P to i8* 1694 // %B = gep i8* %A, i32 1 1695 // %C = load i8* %B 1696 // 1697 // We could do that by recognizing if the clobber instructions are obviously 1698 // a common base + constant offset, and if the previous store (or memset) 1699 // completely covers this load. This sort of thing can happen in bitfield 1700 // access code. 1701 Value *AvailVal = 0; 1702 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { 1703 int Offset = AnalyzeLoadFromClobberingStore(L->getType(), 1704 L->getPointerOperand(), 1705 DepSI, *TD); 1706 if (Offset != -1) 1707 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, 1708 L->getType(), L, *TD); 1709 } 1710 1711 // Check to see if we have something like this: 1712 // load i32* P 1713 // load i8* (P+1) 1714 // if we have this, replace the later with an extraction from the former. 1715 if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) { 1716 // If this is a clobber and L is the first instruction in its block, then 1717 // we have the first instruction in the entry block. 1718 if (DepLI == L) 1719 return false; 1720 1721 int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), 1722 L->getPointerOperand(), 1723 DepLI, *TD); 1724 if (Offset != -1) 1725 AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); 1726 } 1727 1728 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1729 // a value on from it. 1730 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1731 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), 1732 L->getPointerOperand(), 1733 DepMI, *TD); 1734 if (Offset != -1) 1735 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD); 1736 } 1737 1738 if (AvailVal) { 1739 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1740 << *AvailVal << '\n' << *L << "\n\n\n"); 1741 1742 // Replace the load! 1743 L->replaceAllUsesWith(AvailVal); 1744 if (AvailVal->getType()->isPointerTy()) 1745 MD->invalidateCachedPointerInfo(AvailVal); 1746 markInstructionForDeletion(L); 1747 ++NumGVNLoad; 1748 return true; 1749 } 1750 } 1751 1752 // If the value isn't available, don't do anything! 1753 if (Dep.isClobber()) { 1754 DEBUG( 1755 // fast print dep, using operator<< on instruction is too slow. 1756 dbgs() << "GVN: load "; 1757 WriteAsOperand(dbgs(), L); 1758 Instruction *I = Dep.getInst(); 1759 dbgs() << " is clobbered by " << *I << '\n'; 1760 ); 1761 return false; 1762 } 1763 1764 if (Dep.isUnknown()) { 1765 DEBUG( 1766 // fast print dep, using operator<< on instruction is too slow. 1767 dbgs() << "GVN: load "; 1768 WriteAsOperand(dbgs(), L); 1769 dbgs() << " has unknown dependence\n"; 1770 ); 1771 return false; 1772 } 1773 1774 // If it is defined in another block, try harder. 1775 if (Dep.isNonLocal()) 1776 return processNonLocalLoad(L); 1777 1778 assert(Dep.isDef() && "Expecting def here"); 1779 1780 Instruction *DepInst = Dep.getInst(); 1781 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1782 Value *StoredVal = DepSI->getValueOperand(); 1783 1784 // The store and load are to a must-aliased pointer, but they may not 1785 // actually have the same type. See if we know how to reuse the stored 1786 // value (depending on its type). 1787 if (StoredVal->getType() != L->getType()) { 1788 if (TD) { 1789 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 1790 L, *TD); 1791 if (StoredVal == 0) 1792 return false; 1793 1794 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1795 << '\n' << *L << "\n\n\n"); 1796 } 1797 else 1798 return false; 1799 } 1800 1801 // Remove it! 1802 L->replaceAllUsesWith(StoredVal); 1803 if (StoredVal->getType()->isPointerTy()) 1804 MD->invalidateCachedPointerInfo(StoredVal); 1805 markInstructionForDeletion(L); 1806 ++NumGVNLoad; 1807 return true; 1808 } 1809 1810 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1811 Value *AvailableVal = DepLI; 1812 1813 // The loads are of a must-aliased pointer, but they may not actually have 1814 // the same type. See if we know how to reuse the previously loaded value 1815 // (depending on its type). 1816 if (DepLI->getType() != L->getType()) { 1817 if (TD) { 1818 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), 1819 L, *TD); 1820 if (AvailableVal == 0) 1821 return false; 1822 1823 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1824 << "\n" << *L << "\n\n\n"); 1825 } 1826 else 1827 return false; 1828 } 1829 1830 // Remove it! 1831 L->replaceAllUsesWith(AvailableVal); 1832 if (DepLI->getType()->isPointerTy()) 1833 MD->invalidateCachedPointerInfo(DepLI); 1834 markInstructionForDeletion(L); 1835 ++NumGVNLoad; 1836 return true; 1837 } 1838 1839 // If this load really doesn't depend on anything, then we must be loading an 1840 // undef value. This can happen when loading for a fresh allocation with no 1841 // intervening stores, for example. 1842 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { 1843 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1844 markInstructionForDeletion(L); 1845 ++NumGVNLoad; 1846 return true; 1847 } 1848 1849 // If this load occurs either right after a lifetime begin, 1850 // then the loaded value is undefined. 1851 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { 1852 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1853 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1854 markInstructionForDeletion(L); 1855 ++NumGVNLoad; 1856 return true; 1857 } 1858 } 1859 1860 return false; 1861} 1862 1863// findLeader - In order to find a leader for a given value number at a 1864// specific basic block, we first obtain the list of all Values for that number, 1865// and then scan the list to find one whose block dominates the block in 1866// question. This is fast because dominator tree queries consist of only 1867// a few comparisons of DFS numbers. 1868Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { 1869 LeaderTableEntry Vals = LeaderTable[num]; 1870 if (!Vals.Val) return 0; 1871 1872 Value *Val = 0; 1873 if (DT->dominates(Vals.BB, BB)) { 1874 Val = Vals.Val; 1875 if (isa<Constant>(Val)) return Val; 1876 } 1877 1878 LeaderTableEntry* Next = Vals.Next; 1879 while (Next) { 1880 if (DT->dominates(Next->BB, BB)) { 1881 if (isa<Constant>(Next->Val)) return Next->Val; 1882 if (!Val) Val = Next->Val; 1883 } 1884 1885 Next = Next->Next; 1886 } 1887 1888 return Val; 1889} 1890 1891/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the 1892/// use is dominated by the given basic block. Returns the number of uses that 1893/// were replaced. 1894unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, 1895 BasicBlock *Root) { 1896 unsigned Count = 0; 1897 for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); 1898 UI != UE; ) { 1899 Instruction *User = cast<Instruction>(*UI); 1900 unsigned OpNum = UI.getOperandNo(); 1901 ++UI; 1902 1903 if (DT->dominates(Root, User->getParent())) { 1904 User->setOperand(OpNum, To); 1905 ++Count; 1906 } 1907 } 1908 return Count; 1909} 1910 1911/// propagateEquality - The given values are known to be equal in every block 1912/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with 1913/// 'RHS' everywhere in the scope. Returns whether a change was made. 1914bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { 1915 if (LHS == RHS) return false; 1916 assert(LHS->getType() == RHS->getType() && "Equal but types differ!"); 1917 1918 // Don't try to propagate equalities between constants. 1919 if (isa<Constant>(LHS) && isa<Constant>(RHS)) 1920 return false; 1921 1922 // Make sure that any constants are on the right-hand side. In general the 1923 // best results are obtained by placing the longest lived value on the RHS. 1924 if (isa<Constant>(LHS)) 1925 std::swap(LHS, RHS); 1926 1927 // If neither term is constant then bail out. This is not for correctness, 1928 // it's just that the non-constant case is much less useful: it occurs just 1929 // as often as the constant case but handling it hardly ever results in an 1930 // improvement. 1931 if (!isa<Constant>(RHS)) 1932 return false; 1933 1934 // If value numbering later deduces that an instruction in the scope is equal 1935 // to 'LHS' then ensure it will be turned into 'RHS'. 1936 addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root); 1937 1938 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. 1939 unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); 1940 bool Changed = NumReplacements > 0; 1941 NumGVNEqProp += NumReplacements; 1942 1943 // Now try to deduce additional equalities from this one. For example, if the 1944 // known equality was "(A != B)" == "false" then it follows that A and B are 1945 // equal in the scope. Only boolean equalities with an explicit true or false 1946 // RHS are currently supported. 1947 if (!RHS->getType()->isIntegerTy(1)) 1948 // Not a boolean equality - bail out. 1949 return Changed; 1950 ConstantInt *CI = dyn_cast<ConstantInt>(RHS); 1951 if (!CI) 1952 // RHS neither 'true' nor 'false' - bail out. 1953 return Changed; 1954 // Whether RHS equals 'true'. Otherwise it equals 'false'. 1955 bool isKnownTrue = CI->isAllOnesValue(); 1956 bool isKnownFalse = !isKnownTrue; 1957 1958 // If "A && B" is known true then both A and B are known true. If "A || B" 1959 // is known false then both A and B are known false. 1960 Value *A, *B; 1961 if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || 1962 (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { 1963 Changed |= propagateEquality(A, RHS, Root); 1964 Changed |= propagateEquality(B, RHS, Root); 1965 return Changed; 1966 } 1967 1968 // If we are propagating an equality like "(A == B)" == "true" then also 1969 // propagate the equality A == B. 1970 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) { 1971 // Only equality comparisons are supported. 1972 if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || 1973 (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) { 1974 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); 1975 Changed |= propagateEquality(Op0, Op1, Root); 1976 } 1977 return Changed; 1978 } 1979 1980 return Changed; 1981} 1982 1983/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return 1984/// true if every path from the entry block to 'Dst' passes via this edge. In 1985/// particular 'Dst' must not be reachable via another edge from 'Src'. 1986static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst, 1987 DominatorTree *DT) { 1988 // First off, there must not be more than one edge from Src to Dst, there 1989 // should be exactly one. So keep track of the number of times Src occurs 1990 // as a predecessor of Dst and fail if it's more than once. Secondly, any 1991 // other predecessors of Dst should be dominated by Dst (see logic below). 1992 bool SawEdgeFromSrc = false; 1993 for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) { 1994 BasicBlock *Pred = *PI; 1995 if (Pred == Src) { 1996 // An edge from Src to Dst. 1997 if (SawEdgeFromSrc) 1998 // There are multiple edges from Src to Dst - fail. 1999 return false; 2000 SawEdgeFromSrc = true; 2001 continue; 2002 } 2003 // If the predecessor is not dominated by Dst, then it must be possible to 2004 // reach it either without passing through Src (and thus not via the edge) 2005 // or by passing through Src but taking a different edge out of Src. Either 2006 // way it is possible to reach Dst without passing via the edge, so fail. 2007 if (!DT->dominates(Dst, *PI)) 2008 return false; 2009 } 2010 assert(SawEdgeFromSrc && "No edge between these basic blocks!"); 2011 2012 // Every path from the entry block to Dst must at some point pass to Dst from 2013 // a predecessor that is not dominated by Dst. This predecessor can only be 2014 // Src, since all others are dominated by Dst. As there is only one edge from 2015 // Src to Dst, the path passes by this edge. 2016 return true; 2017} 2018 2019/// processInstruction - When calculating availability, handle an instruction 2020/// by inserting it into the appropriate sets 2021bool GVN::processInstruction(Instruction *I) { 2022 // Ignore dbg info intrinsics. 2023 if (isa<DbgInfoIntrinsic>(I)) 2024 return false; 2025 2026 // If the instruction can be easily simplified then do so now in preference 2027 // to value numbering it. Value numbering often exposes redundancies, for 2028 // example if it determines that %y is equal to %x then the instruction 2029 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 2030 if (Value *V = SimplifyInstruction(I, TD, DT)) { 2031 I->replaceAllUsesWith(V); 2032 if (MD && V->getType()->isPointerTy()) 2033 MD->invalidateCachedPointerInfo(V); 2034 markInstructionForDeletion(I); 2035 ++NumGVNSimpl; 2036 return true; 2037 } 2038 2039 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 2040 if (processLoad(LI)) 2041 return true; 2042 2043 unsigned Num = VN.lookup_or_add(LI); 2044 addToLeaderTable(Num, LI, LI->getParent()); 2045 return false; 2046 } 2047 2048 // For conditional branches, we can perform simple conditional propagation on 2049 // the condition value itself. 2050 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 2051 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 2052 return false; 2053 2054 Value *BranchCond = BI->getCondition(); 2055 2056 BasicBlock *TrueSucc = BI->getSuccessor(0); 2057 BasicBlock *FalseSucc = BI->getSuccessor(1); 2058 BasicBlock *Parent = BI->getParent(); 2059 bool Changed = false; 2060 2061 if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT)) 2062 Changed |= propagateEquality(BranchCond, 2063 ConstantInt::getTrue(TrueSucc->getContext()), 2064 TrueSucc); 2065 2066 if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT)) 2067 Changed |= propagateEquality(BranchCond, 2068 ConstantInt::getFalse(FalseSucc->getContext()), 2069 FalseSucc); 2070 2071 return Changed; 2072 } 2073 2074 // For switches, propagate the case values into the case destinations. 2075 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { 2076 Value *SwitchCond = SI->getCondition(); 2077 BasicBlock *Parent = SI->getParent(); 2078 bool Changed = false; 2079 for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { 2080 BasicBlock *Dst = SI->getSuccessor(i); 2081 if (isOnlyReachableViaThisEdge(Parent, Dst, DT)) 2082 Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst); 2083 } 2084 return Changed; 2085 } 2086 2087 // Instructions with void type don't return a value, so there's 2088 // no point in trying to find redudancies in them. 2089 if (I->getType()->isVoidTy()) return false; 2090 2091 uint32_t NextNum = VN.getNextUnusedValueNumber(); 2092 unsigned Num = VN.lookup_or_add(I); 2093 2094 // Allocations are always uniquely numbered, so we can save time and memory 2095 // by fast failing them. 2096 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { 2097 addToLeaderTable(Num, I, I->getParent()); 2098 return false; 2099 } 2100 2101 // If the number we were assigned was a brand new VN, then we don't 2102 // need to do a lookup to see if the number already exists 2103 // somewhere in the domtree: it can't! 2104 if (Num == NextNum) { 2105 addToLeaderTable(Num, I, I->getParent()); 2106 return false; 2107 } 2108 2109 // Perform fast-path value-number based elimination of values inherited from 2110 // dominators. 2111 Value *repl = findLeader(I->getParent(), Num); 2112 if (repl == 0) { 2113 // Failure, just remember this instance for future use. 2114 addToLeaderTable(Num, I, I->getParent()); 2115 return false; 2116 } 2117 2118 // Remove it! 2119 I->replaceAllUsesWith(repl); 2120 if (MD && repl->getType()->isPointerTy()) 2121 MD->invalidateCachedPointerInfo(repl); 2122 markInstructionForDeletion(I); 2123 return true; 2124} 2125 2126/// runOnFunction - This is the main transformation entry point for a function. 2127bool GVN::runOnFunction(Function& F) { 2128 if (!NoLoads) 2129 MD = &getAnalysis<MemoryDependenceAnalysis>(); 2130 DT = &getAnalysis<DominatorTree>(); 2131 TD = getAnalysisIfAvailable<TargetData>(); 2132 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 2133 VN.setMemDep(MD); 2134 VN.setDomTree(DT); 2135 2136 bool Changed = false; 2137 bool ShouldContinue = true; 2138 2139 // Merge unconditional branches, allowing PRE to catch more 2140 // optimization opportunities. 2141 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 2142 BasicBlock *BB = FI++; 2143 2144 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 2145 if (removedBlock) ++NumGVNBlocks; 2146 2147 Changed |= removedBlock; 2148 } 2149 2150 unsigned Iteration = 0; 2151 while (ShouldContinue) { 2152 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 2153 ShouldContinue = iterateOnFunction(F); 2154 if (splitCriticalEdges()) 2155 ShouldContinue = true; 2156 Changed |= ShouldContinue; 2157 ++Iteration; 2158 } 2159 2160 if (EnablePRE) { 2161 bool PREChanged = true; 2162 while (PREChanged) { 2163 PREChanged = performPRE(F); 2164 Changed |= PREChanged; 2165 } 2166 } 2167 // FIXME: Should perform GVN again after PRE does something. PRE can move 2168 // computations into blocks where they become fully redundant. Note that 2169 // we can't do this until PRE's critical edge splitting updates memdep. 2170 // Actually, when this happens, we should just fully integrate PRE into GVN. 2171 2172 cleanupGlobalSets(); 2173 2174 return Changed; 2175} 2176 2177 2178bool GVN::processBlock(BasicBlock *BB) { 2179 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function 2180 // (and incrementing BI before processing an instruction). 2181 assert(InstrsToErase.empty() && 2182 "We expect InstrsToErase to be empty across iterations"); 2183 bool ChangedFunction = false; 2184 2185 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 2186 BI != BE;) { 2187 ChangedFunction |= processInstruction(BI); 2188 if (InstrsToErase.empty()) { 2189 ++BI; 2190 continue; 2191 } 2192 2193 // If we need some instructions deleted, do it now. 2194 NumGVNInstr += InstrsToErase.size(); 2195 2196 // Avoid iterator invalidation. 2197 bool AtStart = BI == BB->begin(); 2198 if (!AtStart) 2199 --BI; 2200 2201 for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(), 2202 E = InstrsToErase.end(); I != E; ++I) { 2203 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 2204 if (MD) MD->removeInstruction(*I); 2205 (*I)->eraseFromParent(); 2206 DEBUG(verifyRemoved(*I)); 2207 } 2208 InstrsToErase.clear(); 2209 2210 if (AtStart) 2211 BI = BB->begin(); 2212 else 2213 ++BI; 2214 } 2215 2216 return ChangedFunction; 2217} 2218 2219/// performPRE - Perform a purely local form of PRE that looks for diamond 2220/// control flow patterns and attempts to perform simple PRE at the join point. 2221bool GVN::performPRE(Function &F) { 2222 bool Changed = false; 2223 DenseMap<BasicBlock*, Value*> predMap; 2224 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 2225 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 2226 BasicBlock *CurrentBlock = *DI; 2227 2228 // Nothing to PRE in the entry block. 2229 if (CurrentBlock == &F.getEntryBlock()) continue; 2230 2231 // Don't perform PRE on a landing pad. 2232 if (CurrentBlock->isLandingPad()) continue; 2233 2234 for (BasicBlock::iterator BI = CurrentBlock->begin(), 2235 BE = CurrentBlock->end(); BI != BE; ) { 2236 Instruction *CurInst = BI++; 2237 2238 if (isa<AllocaInst>(CurInst) || 2239 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 2240 CurInst->getType()->isVoidTy() || 2241 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 2242 isa<DbgInfoIntrinsic>(CurInst)) 2243 continue; 2244 2245 // We don't currently value number ANY inline asm calls. 2246 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 2247 if (CallI->isInlineAsm()) 2248 continue; 2249 2250 uint32_t ValNo = VN.lookup(CurInst); 2251 2252 // Look for the predecessors for PRE opportunities. We're 2253 // only trying to solve the basic diamond case, where 2254 // a value is computed in the successor and one predecessor, 2255 // but not the other. We also explicitly disallow cases 2256 // where the successor is its own predecessor, because they're 2257 // more complicated to get right. 2258 unsigned NumWith = 0; 2259 unsigned NumWithout = 0; 2260 BasicBlock *PREPred = 0; 2261 predMap.clear(); 2262 2263 for (pred_iterator PI = pred_begin(CurrentBlock), 2264 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 2265 BasicBlock *P = *PI; 2266 // We're not interested in PRE where the block is its 2267 // own predecessor, or in blocks with predecessors 2268 // that are not reachable. 2269 if (P == CurrentBlock) { 2270 NumWithout = 2; 2271 break; 2272 } else if (!DT->dominates(&F.getEntryBlock(), P)) { 2273 NumWithout = 2; 2274 break; 2275 } 2276 2277 Value* predV = findLeader(P, ValNo); 2278 if (predV == 0) { 2279 PREPred = P; 2280 ++NumWithout; 2281 } else if (predV == CurInst) { 2282 NumWithout = 2; 2283 } else { 2284 predMap[P] = predV; 2285 ++NumWith; 2286 } 2287 } 2288 2289 // Don't do PRE when it might increase code size, i.e. when 2290 // we would need to insert instructions in more than one pred. 2291 if (NumWithout != 1 || NumWith == 0) 2292 continue; 2293 2294 // Don't do PRE across indirect branch. 2295 if (isa<IndirectBrInst>(PREPred->getTerminator())) 2296 continue; 2297 2298 // We can't do PRE safely on a critical edge, so instead we schedule 2299 // the edge to be split and perform the PRE the next time we iterate 2300 // on the function. 2301 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 2302 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 2303 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 2304 continue; 2305 } 2306 2307 // Instantiate the expression in the predecessor that lacked it. 2308 // Because we are going top-down through the block, all value numbers 2309 // will be available in the predecessor by the time we need them. Any 2310 // that weren't originally present will have been instantiated earlier 2311 // in this loop. 2312 Instruction *PREInstr = CurInst->clone(); 2313 bool success = true; 2314 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 2315 Value *Op = PREInstr->getOperand(i); 2316 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 2317 continue; 2318 2319 if (Value *V = findLeader(PREPred, VN.lookup(Op))) { 2320 PREInstr->setOperand(i, V); 2321 } else { 2322 success = false; 2323 break; 2324 } 2325 } 2326 2327 // Fail out if we encounter an operand that is not available in 2328 // the PRE predecessor. This is typically because of loads which 2329 // are not value numbered precisely. 2330 if (!success) { 2331 delete PREInstr; 2332 DEBUG(verifyRemoved(PREInstr)); 2333 continue; 2334 } 2335 2336 PREInstr->insertBefore(PREPred->getTerminator()); 2337 PREInstr->setName(CurInst->getName() + ".pre"); 2338 PREInstr->setDebugLoc(CurInst->getDebugLoc()); 2339 predMap[PREPred] = PREInstr; 2340 VN.add(PREInstr, ValNo); 2341 ++NumGVNPRE; 2342 2343 // Update the availability map to include the new instruction. 2344 addToLeaderTable(ValNo, PREInstr, PREPred); 2345 2346 // Create a PHI to make the value available in this block. 2347 pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); 2348 PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), 2349 CurInst->getName() + ".pre-phi", 2350 CurrentBlock->begin()); 2351 for (pred_iterator PI = PB; PI != PE; ++PI) { 2352 BasicBlock *P = *PI; 2353 Phi->addIncoming(predMap[P], P); 2354 } 2355 2356 VN.add(Phi, ValNo); 2357 addToLeaderTable(ValNo, Phi, CurrentBlock); 2358 Phi->setDebugLoc(CurInst->getDebugLoc()); 2359 CurInst->replaceAllUsesWith(Phi); 2360 if (Phi->getType()->isPointerTy()) { 2361 // Because we have added a PHI-use of the pointer value, it has now 2362 // "escaped" from alias analysis' perspective. We need to inform 2363 // AA of this. 2364 for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; 2365 ++ii) { 2366 unsigned jj = PHINode::getOperandNumForIncomingValue(ii); 2367 VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); 2368 } 2369 2370 if (MD) 2371 MD->invalidateCachedPointerInfo(Phi); 2372 } 2373 VN.erase(CurInst); 2374 removeFromLeaderTable(ValNo, CurInst, CurrentBlock); 2375 2376 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 2377 if (MD) MD->removeInstruction(CurInst); 2378 CurInst->eraseFromParent(); 2379 DEBUG(verifyRemoved(CurInst)); 2380 Changed = true; 2381 } 2382 } 2383 2384 if (splitCriticalEdges()) 2385 Changed = true; 2386 2387 return Changed; 2388} 2389 2390/// splitCriticalEdges - Split critical edges found during the previous 2391/// iteration that may enable further optimization. 2392bool GVN::splitCriticalEdges() { 2393 if (toSplit.empty()) 2394 return false; 2395 do { 2396 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 2397 SplitCriticalEdge(Edge.first, Edge.second, this); 2398 } while (!toSplit.empty()); 2399 if (MD) MD->invalidateCachedPredecessors(); 2400 return true; 2401} 2402 2403/// iterateOnFunction - Executes one iteration of GVN 2404bool GVN::iterateOnFunction(Function &F) { 2405 cleanupGlobalSets(); 2406 2407 // Top-down walk of the dominator tree 2408 bool Changed = false; 2409#if 0 2410 // Needed for value numbering with phi construction to work. 2411 ReversePostOrderTraversal<Function*> RPOT(&F); 2412 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 2413 RE = RPOT.end(); RI != RE; ++RI) 2414 Changed |= processBlock(*RI); 2415#else 2416 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2417 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 2418 Changed |= processBlock(DI->getBlock()); 2419#endif 2420 2421 return Changed; 2422} 2423 2424void GVN::cleanupGlobalSets() { 2425 VN.clear(); 2426 LeaderTable.clear(); 2427 TableAllocator.Reset(); 2428} 2429 2430/// verifyRemoved - Verify that the specified instruction does not occur in our 2431/// internal data structures. 2432void GVN::verifyRemoved(const Instruction *Inst) const { 2433 VN.verifyRemoved(Inst); 2434 2435 // Walk through the value number scope to make sure the instruction isn't 2436 // ferreted away in it. 2437 for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator 2438 I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { 2439 const LeaderTableEntry *Node = &I->second; 2440 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2441 2442 while (Node->Next) { 2443 Node = Node->Next; 2444 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2445 } 2446 } 2447} 2448