GVN.cpp revision 465942d0085a2d20bcd2028bd1e54bde1ac73385
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/GlobalVariable.h" 21#include "llvm/IntrinsicInst.h" 22#include "llvm/LLVMContext.h" 23#include "llvm/Analysis/AliasAnalysis.h" 24#include "llvm/Analysis/ConstantFolding.h" 25#include "llvm/Analysis/Dominators.h" 26#include "llvm/Analysis/InstructionSimplify.h" 27#include "llvm/Analysis/Loads.h" 28#include "llvm/Analysis/MemoryBuiltins.h" 29#include "llvm/Analysis/MemoryDependenceAnalysis.h" 30#include "llvm/Analysis/PHITransAddr.h" 31#include "llvm/Analysis/ValueTracking.h" 32#include "llvm/Assembly/Writer.h" 33#include "llvm/Target/TargetData.h" 34#include "llvm/Transforms/Utils/BasicBlockUtils.h" 35#include "llvm/Transforms/Utils/SSAUpdater.h" 36#include "llvm/ADT/DenseMap.h" 37#include "llvm/ADT/DepthFirstIterator.h" 38#include "llvm/ADT/SmallPtrSet.h" 39#include "llvm/ADT/Statistic.h" 40#include "llvm/Support/Allocator.h" 41#include "llvm/Support/CommandLine.h" 42#include "llvm/Support/Debug.h" 43#include "llvm/Support/IRBuilder.h" 44using namespace llvm; 45 46STATISTIC(NumGVNInstr, "Number of instructions deleted"); 47STATISTIC(NumGVNLoad, "Number of loads deleted"); 48STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 49STATISTIC(NumGVNBlocks, "Number of blocks merged"); 50STATISTIC(NumPRELoad, "Number of loads PRE'd"); 51 52static cl::opt<bool> EnablePRE("enable-pre", 53 cl::init(true), cl::Hidden); 54static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 55 56//===----------------------------------------------------------------------===// 57// ValueTable Class 58//===----------------------------------------------------------------------===// 59 60/// This class holds the mapping between values and value numbers. It is used 61/// as an efficient mechanism to determine the expression-wise equivalence of 62/// two values. 63namespace { 64 struct Expression { 65 uint32_t opcode; 66 const Type* type; 67 SmallVector<uint32_t, 4> varargs; 68 69 Expression() { } 70 Expression(uint32_t o) : opcode(o) { } 71 72 bool operator==(const Expression &other) const { 73 if (opcode != other.opcode) 74 return false; 75 else if (opcode == ~0U || opcode == ~1U) 76 return true; 77 else if (type != other.type) 78 return false; 79 else if (varargs != other.varargs) 80 return false; 81 return true; 82 } 83 }; 84 85 class ValueTable { 86 private: 87 DenseMap<Value*, uint32_t> valueNumbering; 88 DenseMap<Expression, uint32_t> expressionNumbering; 89 AliasAnalysis* AA; 90 MemoryDependenceAnalysis* MD; 91 DominatorTree* DT; 92 93 uint32_t nextValueNumber; 94 95 Expression create_expression(Instruction* I); 96 uint32_t lookup_or_add_call(CallInst* C); 97 public: 98 ValueTable() : nextValueNumber(1) { } 99 uint32_t lookup_or_add(Value *V); 100 uint32_t lookup(Value *V) const; 101 void add(Value *V, uint32_t num); 102 void clear(); 103 void erase(Value *v); 104 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 105 AliasAnalysis *getAliasAnalysis() const { return AA; } 106 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 107 void setDomTree(DominatorTree* D) { DT = D; } 108 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 109 void verifyRemoved(const Value *) const; 110 }; 111} 112 113namespace llvm { 114template <> struct DenseMapInfo<Expression> { 115 static inline Expression getEmptyKey() { 116 return ~0U; 117 } 118 119 static inline Expression getTombstoneKey() { 120 return ~1U; 121 } 122 123 static unsigned getHashValue(const Expression e) { 124 unsigned hash = e.opcode; 125 126 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 127 (unsigned)((uintptr_t)e.type >> 9)); 128 129 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 130 E = e.varargs.end(); I != E; ++I) 131 hash = *I + hash * 37; 132 133 return hash; 134 } 135 static bool isEqual(const Expression &LHS, const Expression &RHS) { 136 return LHS == RHS; 137 } 138}; 139 140} 141 142//===----------------------------------------------------------------------===// 143// ValueTable Internal Functions 144//===----------------------------------------------------------------------===// 145 146 147Expression ValueTable::create_expression(Instruction *I) { 148 Expression e; 149 e.type = I->getType(); 150 e.opcode = I->getOpcode(); 151 for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); 152 OI != OE; ++OI) 153 e.varargs.push_back(lookup_or_add(*OI)); 154 155 if (CmpInst *C = dyn_cast<CmpInst>(I)) 156 e.opcode = (C->getOpcode() << 8) | C->getPredicate(); 157 else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) { 158 for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 159 II != IE; ++II) 160 e.varargs.push_back(*II); 161 } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { 162 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 163 II != IE; ++II) 164 e.varargs.push_back(*II); 165 } 166 167 return e; 168} 169 170//===----------------------------------------------------------------------===// 171// ValueTable External Functions 172//===----------------------------------------------------------------------===// 173 174/// add - Insert a value into the table with a specified value number. 175void ValueTable::add(Value *V, uint32_t num) { 176 valueNumbering.insert(std::make_pair(V, num)); 177} 178 179uint32_t ValueTable::lookup_or_add_call(CallInst* C) { 180 if (AA->doesNotAccessMemory(C)) { 181 Expression exp = create_expression(C); 182 uint32_t& e = expressionNumbering[exp]; 183 if (!e) e = nextValueNumber++; 184 valueNumbering[C] = e; 185 return e; 186 } else if (AA->onlyReadsMemory(C)) { 187 Expression exp = create_expression(C); 188 uint32_t& e = expressionNumbering[exp]; 189 if (!e) { 190 e = nextValueNumber++; 191 valueNumbering[C] = e; 192 return e; 193 } 194 if (!MD) { 195 e = nextValueNumber++; 196 valueNumbering[C] = e; 197 return e; 198 } 199 200 MemDepResult local_dep = MD->getDependency(C); 201 202 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 203 valueNumbering[C] = nextValueNumber; 204 return nextValueNumber++; 205 } 206 207 if (local_dep.isDef()) { 208 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 209 210 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 211 valueNumbering[C] = nextValueNumber; 212 return nextValueNumber++; 213 } 214 215 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 216 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 217 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); 218 if (c_vn != cd_vn) { 219 valueNumbering[C] = nextValueNumber; 220 return nextValueNumber++; 221 } 222 } 223 224 uint32_t v = lookup_or_add(local_cdep); 225 valueNumbering[C] = v; 226 return v; 227 } 228 229 // Non-local case. 230 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 231 MD->getNonLocalCallDependency(CallSite(C)); 232 // FIXME: call/call dependencies for readonly calls should return def, not 233 // clobber! Move the checking logic to MemDep! 234 CallInst* cdep = 0; 235 236 // Check to see if we have a single dominating call instruction that is 237 // identical to C. 238 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 239 const NonLocalDepEntry *I = &deps[i]; 240 // Ignore non-local dependencies. 241 if (I->getResult().isNonLocal()) 242 continue; 243 244 // We don't handle non-depedencies. If we already have a call, reject 245 // instruction dependencies. 246 if (I->getResult().isClobber() || cdep != 0) { 247 cdep = 0; 248 break; 249 } 250 251 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 252 // FIXME: All duplicated with non-local case. 253 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 254 cdep = NonLocalDepCall; 255 continue; 256 } 257 258 cdep = 0; 259 break; 260 } 261 262 if (!cdep) { 263 valueNumbering[C] = nextValueNumber; 264 return nextValueNumber++; 265 } 266 267 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 268 valueNumbering[C] = nextValueNumber; 269 return nextValueNumber++; 270 } 271 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 272 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 273 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); 274 if (c_vn != cd_vn) { 275 valueNumbering[C] = nextValueNumber; 276 return nextValueNumber++; 277 } 278 } 279 280 uint32_t v = lookup_or_add(cdep); 281 valueNumbering[C] = v; 282 return v; 283 284 } else { 285 valueNumbering[C] = nextValueNumber; 286 return nextValueNumber++; 287 } 288} 289 290/// lookup_or_add - Returns the value number for the specified value, assigning 291/// it a new number if it did not have one before. 292uint32_t ValueTable::lookup_or_add(Value *V) { 293 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 294 if (VI != valueNumbering.end()) 295 return VI->second; 296 297 if (!isa<Instruction>(V)) { 298 valueNumbering[V] = nextValueNumber; 299 return nextValueNumber++; 300 } 301 302 Instruction* I = cast<Instruction>(V); 303 Expression exp; 304 switch (I->getOpcode()) { 305 case Instruction::Call: 306 return lookup_or_add_call(cast<CallInst>(I)); 307 case Instruction::Add: 308 case Instruction::FAdd: 309 case Instruction::Sub: 310 case Instruction::FSub: 311 case Instruction::Mul: 312 case Instruction::FMul: 313 case Instruction::UDiv: 314 case Instruction::SDiv: 315 case Instruction::FDiv: 316 case Instruction::URem: 317 case Instruction::SRem: 318 case Instruction::FRem: 319 case Instruction::Shl: 320 case Instruction::LShr: 321 case Instruction::AShr: 322 case Instruction::And: 323 case Instruction::Or : 324 case Instruction::Xor: 325 case Instruction::ICmp: 326 case Instruction::FCmp: 327 case Instruction::Trunc: 328 case Instruction::ZExt: 329 case Instruction::SExt: 330 case Instruction::FPToUI: 331 case Instruction::FPToSI: 332 case Instruction::UIToFP: 333 case Instruction::SIToFP: 334 case Instruction::FPTrunc: 335 case Instruction::FPExt: 336 case Instruction::PtrToInt: 337 case Instruction::IntToPtr: 338 case Instruction::BitCast: 339 case Instruction::Select: 340 case Instruction::ExtractElement: 341 case Instruction::InsertElement: 342 case Instruction::ShuffleVector: 343 case Instruction::ExtractValue: 344 case Instruction::InsertValue: 345 case Instruction::GetElementPtr: 346 exp = create_expression(I); 347 break; 348 default: 349 valueNumbering[V] = nextValueNumber; 350 return nextValueNumber++; 351 } 352 353 uint32_t& e = expressionNumbering[exp]; 354 if (!e) e = nextValueNumber++; 355 valueNumbering[V] = e; 356 return e; 357} 358 359/// lookup - Returns the value number of the specified value. Fails if 360/// the value has not yet been numbered. 361uint32_t ValueTable::lookup(Value *V) const { 362 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 363 assert(VI != valueNumbering.end() && "Value not numbered?"); 364 return VI->second; 365} 366 367/// clear - Remove all entries from the ValueTable 368void ValueTable::clear() { 369 valueNumbering.clear(); 370 expressionNumbering.clear(); 371 nextValueNumber = 1; 372} 373 374/// erase - Remove a value from the value numbering 375void ValueTable::erase(Value *V) { 376 valueNumbering.erase(V); 377} 378 379/// verifyRemoved - Verify that the value is removed from all internal data 380/// structures. 381void ValueTable::verifyRemoved(const Value *V) const { 382 for (DenseMap<Value*, uint32_t>::const_iterator 383 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 384 assert(I->first != V && "Inst still occurs in value numbering map!"); 385 } 386} 387 388//===----------------------------------------------------------------------===// 389// GVN Pass 390//===----------------------------------------------------------------------===// 391 392namespace { 393 394 class GVN : public FunctionPass { 395 bool runOnFunction(Function &F); 396 public: 397 static char ID; // Pass identification, replacement for typeid 398 explicit GVN(bool noloads = false) 399 : FunctionPass(ID), NoLoads(noloads), MD(0) { 400 initializeGVNPass(*PassRegistry::getPassRegistry()); 401 } 402 403 private: 404 bool NoLoads; 405 MemoryDependenceAnalysis *MD; 406 DominatorTree *DT; 407 const TargetData* TD; 408 409 ValueTable VN; 410 411 /// NumberTable - A mapping from value numers to lists of Value*'s that 412 /// have that value number. Use findLeader to query it. 413 struct LeaderTableEntry { 414 Value *Val; 415 BasicBlock *BB; 416 LeaderTableEntry *Next; 417 }; 418 DenseMap<uint32_t, LeaderTableEntry> NumberTable; 419 BumpPtrAllocator TableAllocator; 420 421 /// addToLeaderTable - Push a new Value to the NumberTable onto the list for 422 /// its value number. 423 void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { 424 LeaderTableEntry& Curr = NumberTable[N]; 425 if (!Curr.Val) { 426 Curr.Val = V; 427 Curr.BB = BB; 428 return; 429 } 430 431 LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>(); 432 Node->Val = V; 433 Node->BB = BB; 434 Node->Next = Curr.Next; 435 Curr.Next = Node; 436 } 437 438 /// removeFromLeaderTable - Scan the list of values corresponding to a given value 439 /// number, and remove the given value if encountered. 440 void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { 441 LeaderTableEntry* Prev = 0; 442 LeaderTableEntry* Curr = &NumberTable[N]; 443 444 while (Curr->Val != V || Curr->BB != BB) { 445 Prev = Curr; 446 Curr = Curr->Next; 447 } 448 449 if (Prev) { 450 Prev->Next = Curr->Next; 451 } else { 452 if (!Curr->Next) { 453 Curr->Val = 0; 454 Curr->BB = 0; 455 } else { 456 LeaderTableEntry* Next = Curr->Next; 457 Curr->Val = Next->Val; 458 Curr->BB = Next->BB; 459 Curr->Next = Next->Next; 460 } 461 } 462 } 463 464 // List of critical edges to be split between iterations. 465 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 466 467 // This transformation requires dominator postdominator info 468 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 469 AU.addRequired<DominatorTree>(); 470 if (!NoLoads) 471 AU.addRequired<MemoryDependenceAnalysis>(); 472 AU.addRequired<AliasAnalysis>(); 473 474 AU.addPreserved<DominatorTree>(); 475 AU.addPreserved<AliasAnalysis>(); 476 } 477 478 // Helper fuctions 479 // FIXME: eliminate or document these better 480 bool processLoad(LoadInst* L, 481 SmallVectorImpl<Instruction*> &toErase); 482 bool processInstruction(Instruction *I, 483 SmallVectorImpl<Instruction*> &toErase); 484 bool processNonLocalLoad(LoadInst* L, 485 SmallVectorImpl<Instruction*> &toErase); 486 bool processBlock(BasicBlock *BB); 487 void dump(DenseMap<uint32_t, Value*>& d); 488 bool iterateOnFunction(Function &F); 489 bool performPRE(Function& F); 490 Value *findLeader(BasicBlock *BB, uint32_t num); 491 void cleanupGlobalSets(); 492 void verifyRemoved(const Instruction *I) const; 493 bool splitCriticalEdges(); 494 }; 495 496 char GVN::ID = 0; 497} 498 499// createGVNPass - The public interface to this file... 500FunctionPass *llvm::createGVNPass(bool NoLoads) { 501 return new GVN(NoLoads); 502} 503 504INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) 505INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 506INITIALIZE_PASS_DEPENDENCY(DominatorTree) 507INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 508INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) 509 510void GVN::dump(DenseMap<uint32_t, Value*>& d) { 511 errs() << "{\n"; 512 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 513 E = d.end(); I != E; ++I) { 514 errs() << I->first << "\n"; 515 I->second->dump(); 516 } 517 errs() << "}\n"; 518} 519 520/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 521/// we're analyzing is fully available in the specified block. As we go, keep 522/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 523/// map is actually a tri-state map with the following values: 524/// 0) we know the block *is not* fully available. 525/// 1) we know the block *is* fully available. 526/// 2) we do not know whether the block is fully available or not, but we are 527/// currently speculating that it will be. 528/// 3) we are speculating for this block and have used that to speculate for 529/// other blocks. 530static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 531 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 532 // Optimistically assume that the block is fully available and check to see 533 // if we already know about this block in one lookup. 534 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 535 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 536 537 // If the entry already existed for this block, return the precomputed value. 538 if (!IV.second) { 539 // If this is a speculative "available" value, mark it as being used for 540 // speculation of other blocks. 541 if (IV.first->second == 2) 542 IV.first->second = 3; 543 return IV.first->second != 0; 544 } 545 546 // Otherwise, see if it is fully available in all predecessors. 547 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 548 549 // If this block has no predecessors, it isn't live-in here. 550 if (PI == PE) 551 goto SpeculationFailure; 552 553 for (; PI != PE; ++PI) 554 // If the value isn't fully available in one of our predecessors, then it 555 // isn't fully available in this block either. Undo our previous 556 // optimistic assumption and bail out. 557 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 558 goto SpeculationFailure; 559 560 return true; 561 562// SpeculationFailure - If we get here, we found out that this is not, after 563// all, a fully-available block. We have a problem if we speculated on this and 564// used the speculation to mark other blocks as available. 565SpeculationFailure: 566 char &BBVal = FullyAvailableBlocks[BB]; 567 568 // If we didn't speculate on this, just return with it set to false. 569 if (BBVal == 2) { 570 BBVal = 0; 571 return false; 572 } 573 574 // If we did speculate on this value, we could have blocks set to 1 that are 575 // incorrect. Walk the (transitive) successors of this block and mark them as 576 // 0 if set to one. 577 SmallVector<BasicBlock*, 32> BBWorklist; 578 BBWorklist.push_back(BB); 579 580 do { 581 BasicBlock *Entry = BBWorklist.pop_back_val(); 582 // Note that this sets blocks to 0 (unavailable) if they happen to not 583 // already be in FullyAvailableBlocks. This is safe. 584 char &EntryVal = FullyAvailableBlocks[Entry]; 585 if (EntryVal == 0) continue; // Already unavailable. 586 587 // Mark as unavailable. 588 EntryVal = 0; 589 590 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 591 BBWorklist.push_back(*I); 592 } while (!BBWorklist.empty()); 593 594 return false; 595} 596 597 598/// CanCoerceMustAliasedValueToLoad - Return true if 599/// CoerceAvailableValueToLoadType will succeed. 600static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 601 const Type *LoadTy, 602 const TargetData &TD) { 603 // If the loaded or stored value is an first class array or struct, don't try 604 // to transform them. We need to be able to bitcast to integer. 605 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 606 StoredVal->getType()->isStructTy() || 607 StoredVal->getType()->isArrayTy()) 608 return false; 609 610 // The store has to be at least as big as the load. 611 if (TD.getTypeSizeInBits(StoredVal->getType()) < 612 TD.getTypeSizeInBits(LoadTy)) 613 return false; 614 615 return true; 616} 617 618 619/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 620/// then a load from a must-aliased pointer of a different type, try to coerce 621/// the stored value. LoadedTy is the type of the load we want to replace and 622/// InsertPt is the place to insert new instructions. 623/// 624/// If we can't do it, return null. 625static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 626 const Type *LoadedTy, 627 Instruction *InsertPt, 628 const TargetData &TD) { 629 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) 630 return 0; 631 632 const Type *StoredValTy = StoredVal->getType(); 633 634 uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); 635 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); 636 637 // If the store and reload are the same size, we can always reuse it. 638 if (StoreSize == LoadSize) { 639 if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { 640 // Pointer to Pointer -> use bitcast. 641 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 642 } 643 644 // Convert source pointers to integers, which can be bitcast. 645 if (StoredValTy->isPointerTy()) { 646 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 647 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 648 } 649 650 const Type *TypeToCastTo = LoadedTy; 651 if (TypeToCastTo->isPointerTy()) 652 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); 653 654 if (StoredValTy != TypeToCastTo) 655 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 656 657 // Cast to pointer if the load needs a pointer type. 658 if (LoadedTy->isPointerTy()) 659 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 660 661 return StoredVal; 662 } 663 664 // If the loaded value is smaller than the available value, then we can 665 // extract out a piece from it. If the available value is too small, then we 666 // can't do anything. 667 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 668 669 // Convert source pointers to integers, which can be manipulated. 670 if (StoredValTy->isPointerTy()) { 671 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 672 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 673 } 674 675 // Convert vectors and fp to integer, which can be manipulated. 676 if (!StoredValTy->isIntegerTy()) { 677 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 678 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 679 } 680 681 // If this is a big-endian system, we need to shift the value down to the low 682 // bits so that a truncate will work. 683 if (TD.isBigEndian()) { 684 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 685 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 686 } 687 688 // Truncate the integer to the right size now. 689 const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 690 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 691 692 if (LoadedTy == NewIntTy) 693 return StoredVal; 694 695 // If the result is a pointer, inttoptr. 696 if (LoadedTy->isPointerTy()) 697 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 698 699 // Otherwise, bitcast. 700 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 701} 702 703/// AnalyzeLoadFromClobberingWrite - This function is called when we have a 704/// memdep query of a load that ends up being a clobbering memory write (store, 705/// memset, memcpy, memmove). This means that the write *may* provide bits used 706/// by the load but we can't be sure because the pointers don't mustalias. 707/// 708/// Check this case to see if there is anything more we can do before we give 709/// up. This returns -1 if we have to give up, or a byte number in the stored 710/// value of the piece that feeds the load. 711static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, 712 Value *WritePtr, 713 uint64_t WriteSizeInBits, 714 const TargetData &TD) { 715 // If the loaded or stored value is an first class array or struct, don't try 716 // to transform them. We need to be able to bitcast to integer. 717 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 718 return -1; 719 720 int64_t StoreOffset = 0, LoadOffset = 0; 721 Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); 722 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); 723 if (StoreBase != LoadBase) 724 return -1; 725 726 // If the load and store are to the exact same address, they should have been 727 // a must alias. AA must have gotten confused. 728 // FIXME: Study to see if/when this happens. One case is forwarding a memset 729 // to a load from the base of the memset. 730#if 0 731 if (LoadOffset == StoreOffset) { 732 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 733 << "Base = " << *StoreBase << "\n" 734 << "Store Ptr = " << *WritePtr << "\n" 735 << "Store Offs = " << StoreOffset << "\n" 736 << "Load Ptr = " << *LoadPtr << "\n"; 737 abort(); 738 } 739#endif 740 741 // If the load and store don't overlap at all, the store doesn't provide 742 // anything to the load. In this case, they really don't alias at all, AA 743 // must have gotten confused. 744 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); 745 746 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 747 return -1; 748 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 749 LoadSize >>= 3; 750 751 752 bool isAAFailure = false; 753 if (StoreOffset < LoadOffset) 754 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 755 else 756 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 757 758 if (isAAFailure) { 759#if 0 760 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" 761 << "Base = " << *StoreBase << "\n" 762 << "Store Ptr = " << *WritePtr << "\n" 763 << "Store Offs = " << StoreOffset << "\n" 764 << "Load Ptr = " << *LoadPtr << "\n"; 765 abort(); 766#endif 767 return -1; 768 } 769 770 // If the Load isn't completely contained within the stored bits, we don't 771 // have all the bits to feed it. We could do something crazy in the future 772 // (issue a smaller load then merge the bits in) but this seems unlikely to be 773 // valuable. 774 if (StoreOffset > LoadOffset || 775 StoreOffset+StoreSize < LoadOffset+LoadSize) 776 return -1; 777 778 // Okay, we can do this transformation. Return the number of bytes into the 779 // store that the load is. 780 return LoadOffset-StoreOffset; 781} 782 783/// AnalyzeLoadFromClobberingStore - This function is called when we have a 784/// memdep query of a load that ends up being a clobbering store. 785static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, 786 StoreInst *DepSI, 787 const TargetData &TD) { 788 // Cannot handle reading from store of first-class aggregate yet. 789 if (DepSI->getValueOperand()->getType()->isStructTy() || 790 DepSI->getValueOperand()->getType()->isArrayTy()) 791 return -1; 792 793 Value *StorePtr = DepSI->getPointerOperand(); 794 uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 795 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 796 StorePtr, StoreSize, TD); 797} 798 799static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, 800 MemIntrinsic *MI, 801 const TargetData &TD) { 802 // If the mem operation is a non-constant size, we can't handle it. 803 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 804 if (SizeCst == 0) return -1; 805 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 806 807 // If this is memset, we just need to see if the offset is valid in the size 808 // of the memset.. 809 if (MI->getIntrinsicID() == Intrinsic::memset) 810 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 811 MemSizeInBits, TD); 812 813 // If we have a memcpy/memmove, the only case we can handle is if this is a 814 // copy from constant memory. In that case, we can read directly from the 815 // constant memory. 816 MemTransferInst *MTI = cast<MemTransferInst>(MI); 817 818 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 819 if (Src == 0) return -1; 820 821 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src)); 822 if (GV == 0 || !GV->isConstant()) return -1; 823 824 // See if the access is within the bounds of the transfer. 825 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 826 MI->getDest(), MemSizeInBits, TD); 827 if (Offset == -1) 828 return Offset; 829 830 // Otherwise, see if we can constant fold a load from the constant with the 831 // offset applied as appropriate. 832 Src = ConstantExpr::getBitCast(Src, 833 llvm::Type::getInt8PtrTy(Src->getContext())); 834 Constant *OffsetCst = 835 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 836 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 837 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 838 if (ConstantFoldLoadFromConstPtr(Src, &TD)) 839 return Offset; 840 return -1; 841} 842 843 844/// GetStoreValueForLoad - This function is called when we have a 845/// memdep query of a load that ends up being a clobbering store. This means 846/// that the store *may* provide bits used by the load but we can't be sure 847/// because the pointers don't mustalias. Check this case to see if there is 848/// anything more we can do before we give up. 849static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 850 const Type *LoadTy, 851 Instruction *InsertPt, const TargetData &TD){ 852 LLVMContext &Ctx = SrcVal->getType()->getContext(); 853 854 uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 855 uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; 856 857 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 858 859 // Compute which bits of the stored value are being used by the load. Convert 860 // to an integer type to start with. 861 if (SrcVal->getType()->isPointerTy()) 862 SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); 863 if (!SrcVal->getType()->isIntegerTy()) 864 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), 865 "tmp"); 866 867 // Shift the bits to the least significant depending on endianness. 868 unsigned ShiftAmt; 869 if (TD.isLittleEndian()) 870 ShiftAmt = Offset*8; 871 else 872 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 873 874 if (ShiftAmt) 875 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); 876 877 if (LoadSize != StoreSize) 878 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), 879 "tmp"); 880 881 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); 882} 883 884/// GetMemInstValueForLoad - This function is called when we have a 885/// memdep query of a load that ends up being a clobbering mem intrinsic. 886static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 887 const Type *LoadTy, Instruction *InsertPt, 888 const TargetData &TD){ 889 LLVMContext &Ctx = LoadTy->getContext(); 890 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 891 892 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 893 894 // We know that this method is only called when the mem transfer fully 895 // provides the bits for the load. 896 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 897 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 898 // independently of what the offset is. 899 Value *Val = MSI->getValue(); 900 if (LoadSize != 1) 901 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 902 903 Value *OneElt = Val; 904 905 // Splat the value out to the right number of bits. 906 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 907 // If we can double the number of bytes set, do it. 908 if (NumBytesSet*2 <= LoadSize) { 909 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 910 Val = Builder.CreateOr(Val, ShVal); 911 NumBytesSet <<= 1; 912 continue; 913 } 914 915 // Otherwise insert one byte at a time. 916 Value *ShVal = Builder.CreateShl(Val, 1*8); 917 Val = Builder.CreateOr(OneElt, ShVal); 918 ++NumBytesSet; 919 } 920 921 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); 922 } 923 924 // Otherwise, this is a memcpy/memmove from a constant global. 925 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 926 Constant *Src = cast<Constant>(MTI->getSource()); 927 928 // Otherwise, see if we can constant fold a load from the constant with the 929 // offset applied as appropriate. 930 Src = ConstantExpr::getBitCast(Src, 931 llvm::Type::getInt8PtrTy(Src->getContext())); 932 Constant *OffsetCst = 933 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 934 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 935 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 936 return ConstantFoldLoadFromConstPtr(Src, &TD); 937} 938 939namespace { 940 941struct AvailableValueInBlock { 942 /// BB - The basic block in question. 943 BasicBlock *BB; 944 enum ValType { 945 SimpleVal, // A simple offsetted value that is accessed. 946 MemIntrin // A memory intrinsic which is loaded from. 947 }; 948 949 /// V - The value that is live out of the block. 950 PointerIntPair<Value *, 1, ValType> Val; 951 952 /// Offset - The byte offset in Val that is interesting for the load query. 953 unsigned Offset; 954 955 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 956 unsigned Offset = 0) { 957 AvailableValueInBlock Res; 958 Res.BB = BB; 959 Res.Val.setPointer(V); 960 Res.Val.setInt(SimpleVal); 961 Res.Offset = Offset; 962 return Res; 963 } 964 965 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 966 unsigned Offset = 0) { 967 AvailableValueInBlock Res; 968 Res.BB = BB; 969 Res.Val.setPointer(MI); 970 Res.Val.setInt(MemIntrin); 971 Res.Offset = Offset; 972 return Res; 973 } 974 975 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 976 Value *getSimpleValue() const { 977 assert(isSimpleValue() && "Wrong accessor"); 978 return Val.getPointer(); 979 } 980 981 MemIntrinsic *getMemIntrinValue() const { 982 assert(!isSimpleValue() && "Wrong accessor"); 983 return cast<MemIntrinsic>(Val.getPointer()); 984 } 985 986 /// MaterializeAdjustedValue - Emit code into this block to adjust the value 987 /// defined here to the specified type. This handles various coercion cases. 988 Value *MaterializeAdjustedValue(const Type *LoadTy, 989 const TargetData *TD) const { 990 Value *Res; 991 if (isSimpleValue()) { 992 Res = getSimpleValue(); 993 if (Res->getType() != LoadTy) { 994 assert(TD && "Need target data to handle type mismatch case"); 995 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), 996 *TD); 997 998 DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 999 << *getSimpleValue() << '\n' 1000 << *Res << '\n' << "\n\n\n"); 1001 } 1002 } else { 1003 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, 1004 LoadTy, BB->getTerminator(), *TD); 1005 DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1006 << " " << *getMemIntrinValue() << '\n' 1007 << *Res << '\n' << "\n\n\n"); 1008 } 1009 return Res; 1010 } 1011}; 1012 1013} 1014 1015/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 1016/// construct SSA form, allowing us to eliminate LI. This returns the value 1017/// that should be used at LI's definition site. 1018static Value *ConstructSSAForLoadSet(LoadInst *LI, 1019 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1020 const TargetData *TD, 1021 const DominatorTree &DT, 1022 AliasAnalysis *AA) { 1023 // Check for the fully redundant, dominating load case. In this case, we can 1024 // just use the dominating value directly. 1025 if (ValuesPerBlock.size() == 1 && 1026 DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) 1027 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); 1028 1029 // Otherwise, we have to construct SSA form. 1030 SmallVector<PHINode*, 8> NewPHIs; 1031 SSAUpdater SSAUpdate(&NewPHIs); 1032 SSAUpdate.Initialize(LI->getType(), LI->getName()); 1033 1034 const Type *LoadTy = LI->getType(); 1035 1036 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1037 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1038 BasicBlock *BB = AV.BB; 1039 1040 if (SSAUpdate.HasValueForBlock(BB)) 1041 continue; 1042 1043 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); 1044 } 1045 1046 // Perform PHI construction. 1047 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1048 1049 // If new PHI nodes were created, notify alias analysis. 1050 if (V->getType()->isPointerTy()) 1051 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1052 AA->copyValue(LI, NewPHIs[i]); 1053 1054 // Now that we've copied information to the new PHIs, scan through 1055 // them again and inform alias analysis that we've added potentially 1056 // escaping uses to any values that are operands to these PHIs. 1057 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { 1058 PHINode *P = NewPHIs[i]; 1059 for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) 1060 AA->addEscapingUse(P->getOperandUse(2*ii)); 1061 } 1062 1063 return V; 1064} 1065 1066static bool isLifetimeStart(const Instruction *Inst) { 1067 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1068 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1069 return false; 1070} 1071 1072/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 1073/// non-local by performing PHI construction. 1074bool GVN::processNonLocalLoad(LoadInst *LI, 1075 SmallVectorImpl<Instruction*> &toErase) { 1076 // Find the non-local dependencies of the load. 1077 SmallVector<NonLocalDepResult, 64> Deps; 1078 AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); 1079 MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); 1080 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " 1081 // << Deps.size() << *LI << '\n'); 1082 1083 // If we had to process more than one hundred blocks to find the 1084 // dependencies, this load isn't worth worrying about. Optimizing 1085 // it will be too expensive. 1086 if (Deps.size() > 100) 1087 return false; 1088 1089 // If we had a phi translation failure, we'll have a single entry which is a 1090 // clobber in the current block. Reject this early. 1091 if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { 1092 DEBUG( 1093 dbgs() << "GVN: non-local load "; 1094 WriteAsOperand(dbgs(), LI); 1095 dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; 1096 ); 1097 return false; 1098 } 1099 1100 // Filter out useless results (non-locals, etc). Keep track of the blocks 1101 // where we have a value available in repl, also keep track of whether we see 1102 // dependencies that produce an unknown value for the load (such as a call 1103 // that could potentially clobber the load). 1104 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; 1105 SmallVector<BasicBlock*, 16> UnavailableBlocks; 1106 1107 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 1108 BasicBlock *DepBB = Deps[i].getBB(); 1109 MemDepResult DepInfo = Deps[i].getResult(); 1110 1111 if (DepInfo.isClobber()) { 1112 // The address being loaded in this non-local block may not be the same as 1113 // the pointer operand of the load if PHI translation occurs. Make sure 1114 // to consider the right address. 1115 Value *Address = Deps[i].getAddress(); 1116 1117 // If the dependence is to a store that writes to a superset of the bits 1118 // read by the load, we can extract the bits we need for the load from the 1119 // stored value. 1120 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1121 if (TD && Address) { 1122 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, 1123 DepSI, *TD); 1124 if (Offset != -1) { 1125 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1126 DepSI->getValueOperand(), 1127 Offset)); 1128 continue; 1129 } 1130 } 1131 } 1132 1133 // If the clobbering value is a memset/memcpy/memmove, see if we can 1134 // forward a value on from it. 1135 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1136 if (TD && Address) { 1137 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 1138 DepMI, *TD); 1139 if (Offset != -1) { 1140 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1141 Offset)); 1142 continue; 1143 } 1144 } 1145 } 1146 1147 UnavailableBlocks.push_back(DepBB); 1148 continue; 1149 } 1150 1151 Instruction *DepInst = DepInfo.getInst(); 1152 1153 // Loading the allocation -> undef. 1154 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || 1155 // Loading immediately after lifetime begin -> undef. 1156 isLifetimeStart(DepInst)) { 1157 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1158 UndefValue::get(LI->getType()))); 1159 continue; 1160 } 1161 1162 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1163 // Reject loads and stores that are to the same address but are of 1164 // different types if we have to. 1165 if (S->getValueOperand()->getType() != LI->getType()) { 1166 // If the stored value is larger or equal to the loaded value, we can 1167 // reuse it. 1168 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), 1169 LI->getType(), *TD)) { 1170 UnavailableBlocks.push_back(DepBB); 1171 continue; 1172 } 1173 } 1174 1175 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1176 S->getValueOperand())); 1177 continue; 1178 } 1179 1180 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1181 // If the types mismatch and we can't handle it, reject reuse of the load. 1182 if (LD->getType() != LI->getType()) { 1183 // If the stored value is larger or equal to the loaded value, we can 1184 // reuse it. 1185 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ 1186 UnavailableBlocks.push_back(DepBB); 1187 continue; 1188 } 1189 } 1190 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); 1191 continue; 1192 } 1193 1194 UnavailableBlocks.push_back(DepBB); 1195 continue; 1196 } 1197 1198 // If we have no predecessors that produce a known value for this load, exit 1199 // early. 1200 if (ValuesPerBlock.empty()) return false; 1201 1202 // If all of the instructions we depend on produce a known value for this 1203 // load, then it is fully redundant and we can use PHI insertion to compute 1204 // its value. Insert PHIs and remove the fully redundant value now. 1205 if (UnavailableBlocks.empty()) { 1206 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1207 1208 // Perform PHI construction. 1209 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1210 VN.getAliasAnalysis()); 1211 LI->replaceAllUsesWith(V); 1212 1213 if (isa<PHINode>(V)) 1214 V->takeName(LI); 1215 if (V->getType()->isPointerTy()) 1216 MD->invalidateCachedPointerInfo(V); 1217 VN.erase(LI); 1218 toErase.push_back(LI); 1219 ++NumGVNLoad; 1220 return true; 1221 } 1222 1223 if (!EnablePRE || !EnableLoadPRE) 1224 return false; 1225 1226 // Okay, we have *some* definitions of the value. This means that the value 1227 // is available in some of our (transitive) predecessors. Lets think about 1228 // doing PRE of this load. This will involve inserting a new load into the 1229 // predecessor when it's not available. We could do this in general, but 1230 // prefer to not increase code size. As such, we only do this when we know 1231 // that we only have to insert *one* load (which means we're basically moving 1232 // the load, not inserting a new one). 1233 1234 SmallPtrSet<BasicBlock *, 4> Blockers; 1235 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1236 Blockers.insert(UnavailableBlocks[i]); 1237 1238 // Lets find first basic block with more than one predecessor. Walk backwards 1239 // through predecessors if needed. 1240 BasicBlock *LoadBB = LI->getParent(); 1241 BasicBlock *TmpBB = LoadBB; 1242 1243 bool isSinglePred = false; 1244 bool allSingleSucc = true; 1245 while (TmpBB->getSinglePredecessor()) { 1246 isSinglePred = true; 1247 TmpBB = TmpBB->getSinglePredecessor(); 1248 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1249 return false; 1250 if (Blockers.count(TmpBB)) 1251 return false; 1252 1253 // If any of these blocks has more than one successor (i.e. if the edge we 1254 // just traversed was critical), then there are other paths through this 1255 // block along which the load may not be anticipated. Hoisting the load 1256 // above this block would be adding the load to execution paths along 1257 // which it was not previously executed. 1258 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1259 return false; 1260 } 1261 1262 assert(TmpBB); 1263 LoadBB = TmpBB; 1264 1265 // FIXME: It is extremely unclear what this loop is doing, other than 1266 // artificially restricting loadpre. 1267 if (isSinglePred) { 1268 bool isHot = false; 1269 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1270 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1271 if (AV.isSimpleValue()) 1272 // "Hot" Instruction is in some loop (because it dominates its dep. 1273 // instruction). 1274 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) 1275 if (DT->dominates(LI, I)) { 1276 isHot = true; 1277 break; 1278 } 1279 } 1280 1281 // We are interested only in "hot" instructions. We don't want to do any 1282 // mis-optimizations here. 1283 if (!isHot) 1284 return false; 1285 } 1286 1287 // Check to see how many predecessors have the loaded value fully 1288 // available. 1289 DenseMap<BasicBlock*, Value*> PredLoads; 1290 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1291 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1292 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1293 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1294 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1295 1296 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit; 1297 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1298 PI != E; ++PI) { 1299 BasicBlock *Pred = *PI; 1300 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { 1301 continue; 1302 } 1303 PredLoads[Pred] = 0; 1304 1305 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1306 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1307 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1308 << Pred->getName() << "': " << *LI << '\n'); 1309 return false; 1310 } 1311 unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); 1312 NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); 1313 } 1314 } 1315 if (!NeedToSplit.empty()) { 1316 toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); 1317 return false; 1318 } 1319 1320 // Decide whether PRE is profitable for this load. 1321 unsigned NumUnavailablePreds = PredLoads.size(); 1322 assert(NumUnavailablePreds != 0 && 1323 "Fully available value should be eliminated above!"); 1324 1325 // If this load is unavailable in multiple predecessors, reject it. 1326 // FIXME: If we could restructure the CFG, we could make a common pred with 1327 // all the preds that don't have an available LI and insert a new load into 1328 // that one block. 1329 if (NumUnavailablePreds != 1) 1330 return false; 1331 1332 // Check if the load can safely be moved to all the unavailable predecessors. 1333 bool CanDoPRE = true; 1334 SmallVector<Instruction*, 8> NewInsts; 1335 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1336 E = PredLoads.end(); I != E; ++I) { 1337 BasicBlock *UnavailablePred = I->first; 1338 1339 // Do PHI translation to get its value in the predecessor if necessary. The 1340 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1341 1342 // If all preds have a single successor, then we know it is safe to insert 1343 // the load on the pred (?!?), so we can insert code to materialize the 1344 // pointer if it is not available. 1345 PHITransAddr Address(LI->getPointerOperand(), TD); 1346 Value *LoadPtr = 0; 1347 if (allSingleSucc) { 1348 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1349 *DT, NewInsts); 1350 } else { 1351 Address.PHITranslateValue(LoadBB, UnavailablePred, DT); 1352 LoadPtr = Address.getAddr(); 1353 } 1354 1355 // If we couldn't find or insert a computation of this phi translated value, 1356 // we fail PRE. 1357 if (LoadPtr == 0) { 1358 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1359 << *LI->getPointerOperand() << "\n"); 1360 CanDoPRE = false; 1361 break; 1362 } 1363 1364 // Make sure it is valid to move this load here. We have to watch out for: 1365 // @1 = getelementptr (i8* p, ... 1366 // test p and branch if == 0 1367 // load @1 1368 // It is valid to have the getelementptr before the test, even if p can be 0, 1369 // as getelementptr only does address arithmetic. 1370 // If we are not pushing the value through any multiple-successor blocks 1371 // we do not have this case. Otherwise, check that the load is safe to 1372 // put anywhere; this can be improved, but should be conservatively safe. 1373 if (!allSingleSucc && 1374 // FIXME: REEVALUTE THIS. 1375 !isSafeToLoadUnconditionally(LoadPtr, 1376 UnavailablePred->getTerminator(), 1377 LI->getAlignment(), TD)) { 1378 CanDoPRE = false; 1379 break; 1380 } 1381 1382 I->second = LoadPtr; 1383 } 1384 1385 if (!CanDoPRE) { 1386 while (!NewInsts.empty()) 1387 NewInsts.pop_back_val()->eraseFromParent(); 1388 return false; 1389 } 1390 1391 // Okay, we can eliminate this load by inserting a reload in the predecessor 1392 // and using PHI construction to get the value in the other predecessors, do 1393 // it. 1394 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1395 DEBUG(if (!NewInsts.empty()) 1396 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1397 << *NewInsts.back() << '\n'); 1398 1399 // Assign value numbers to the new instructions. 1400 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 1401 // FIXME: We really _ought_ to insert these value numbers into their 1402 // parent's availability map. However, in doing so, we risk getting into 1403 // ordering issues. If a block hasn't been processed yet, we would be 1404 // marking a value as AVAIL-IN, which isn't what we intend. 1405 VN.lookup_or_add(NewInsts[i]); 1406 } 1407 1408 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1409 E = PredLoads.end(); I != E; ++I) { 1410 BasicBlock *UnavailablePred = I->first; 1411 Value *LoadPtr = I->second; 1412 1413 Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1414 LI->getAlignment(), 1415 UnavailablePred->getTerminator()); 1416 1417 // Transfer the old load's TBAA tag to the new load. 1418 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) 1419 NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1420 1421 // Add the newly created load. 1422 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1423 NewLoad)); 1424 MD->invalidateCachedPointerInfo(LoadPtr); 1425 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1426 } 1427 1428 // Perform PHI construction. 1429 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1430 VN.getAliasAnalysis()); 1431 LI->replaceAllUsesWith(V); 1432 if (isa<PHINode>(V)) 1433 V->takeName(LI); 1434 if (V->getType()->isPointerTy()) 1435 MD->invalidateCachedPointerInfo(V); 1436 VN.erase(LI); 1437 toErase.push_back(LI); 1438 ++NumPRELoad; 1439 return true; 1440} 1441 1442/// processLoad - Attempt to eliminate a load, first by eliminating it 1443/// locally, and then attempting non-local elimination if that fails. 1444bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1445 if (!MD) 1446 return false; 1447 1448 if (L->isVolatile()) 1449 return false; 1450 1451 // ... to a pointer that has been loaded from before... 1452 MemDepResult Dep = MD->getDependency(L); 1453 1454 // If the value isn't available, don't do anything! 1455 if (Dep.isClobber()) { 1456 // Check to see if we have something like this: 1457 // store i32 123, i32* %P 1458 // %A = bitcast i32* %P to i8* 1459 // %B = gep i8* %A, i32 1 1460 // %C = load i8* %B 1461 // 1462 // We could do that by recognizing if the clobber instructions are obviously 1463 // a common base + constant offset, and if the previous store (or memset) 1464 // completely covers this load. This sort of thing can happen in bitfield 1465 // access code. 1466 Value *AvailVal = 0; 1467 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) 1468 if (TD) { 1469 int Offset = AnalyzeLoadFromClobberingStore(L->getType(), 1470 L->getPointerOperand(), 1471 DepSI, *TD); 1472 if (Offset != -1) 1473 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, 1474 L->getType(), L, *TD); 1475 } 1476 1477 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1478 // a value on from it. 1479 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1480 if (TD) { 1481 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), 1482 L->getPointerOperand(), 1483 DepMI, *TD); 1484 if (Offset != -1) 1485 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); 1486 } 1487 } 1488 1489 if (AvailVal) { 1490 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1491 << *AvailVal << '\n' << *L << "\n\n\n"); 1492 1493 // Replace the load! 1494 L->replaceAllUsesWith(AvailVal); 1495 if (AvailVal->getType()->isPointerTy()) 1496 MD->invalidateCachedPointerInfo(AvailVal); 1497 VN.erase(L); 1498 toErase.push_back(L); 1499 ++NumGVNLoad; 1500 return true; 1501 } 1502 1503 DEBUG( 1504 // fast print dep, using operator<< on instruction would be too slow 1505 dbgs() << "GVN: load "; 1506 WriteAsOperand(dbgs(), L); 1507 Instruction *I = Dep.getInst(); 1508 dbgs() << " is clobbered by " << *I << '\n'; 1509 ); 1510 return false; 1511 } 1512 1513 // If it is defined in another block, try harder. 1514 if (Dep.isNonLocal()) 1515 return processNonLocalLoad(L, toErase); 1516 1517 Instruction *DepInst = Dep.getInst(); 1518 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1519 Value *StoredVal = DepSI->getValueOperand(); 1520 1521 // The store and load are to a must-aliased pointer, but they may not 1522 // actually have the same type. See if we know how to reuse the stored 1523 // value (depending on its type). 1524 if (StoredVal->getType() != L->getType()) { 1525 if (TD) { 1526 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 1527 L, *TD); 1528 if (StoredVal == 0) 1529 return false; 1530 1531 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1532 << '\n' << *L << "\n\n\n"); 1533 } 1534 else 1535 return false; 1536 } 1537 1538 // Remove it! 1539 L->replaceAllUsesWith(StoredVal); 1540 if (StoredVal->getType()->isPointerTy()) 1541 MD->invalidateCachedPointerInfo(StoredVal); 1542 VN.erase(L); 1543 toErase.push_back(L); 1544 ++NumGVNLoad; 1545 return true; 1546 } 1547 1548 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1549 Value *AvailableVal = DepLI; 1550 1551 // The loads are of a must-aliased pointer, but they may not actually have 1552 // the same type. See if we know how to reuse the previously loaded value 1553 // (depending on its type). 1554 if (DepLI->getType() != L->getType()) { 1555 if (TD) { 1556 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); 1557 if (AvailableVal == 0) 1558 return false; 1559 1560 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1561 << "\n" << *L << "\n\n\n"); 1562 } 1563 else 1564 return false; 1565 } 1566 1567 // Remove it! 1568 L->replaceAllUsesWith(AvailableVal); 1569 if (DepLI->getType()->isPointerTy()) 1570 MD->invalidateCachedPointerInfo(DepLI); 1571 VN.erase(L); 1572 toErase.push_back(L); 1573 ++NumGVNLoad; 1574 return true; 1575 } 1576 1577 // If this load really doesn't depend on anything, then we must be loading an 1578 // undef value. This can happen when loading for a fresh allocation with no 1579 // intervening stores, for example. 1580 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { 1581 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1582 VN.erase(L); 1583 toErase.push_back(L); 1584 ++NumGVNLoad; 1585 return true; 1586 } 1587 1588 // If this load occurs either right after a lifetime begin, 1589 // then the loaded value is undefined. 1590 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { 1591 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1592 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1593 VN.erase(L); 1594 toErase.push_back(L); 1595 ++NumGVNLoad; 1596 return true; 1597 } 1598 } 1599 1600 return false; 1601} 1602 1603// findLeader - In order to find a leader for a given value number at a 1604// specific basic block, we first obtain the list of all Values for that number, 1605// and then scan the list to find one whose block dominates the block in 1606// question. This is fast because dominator tree queries consist of only 1607// a few comparisons of DFS numbers. 1608Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { 1609 LeaderTableEntry Vals = NumberTable[num]; 1610 if (!Vals.Val) return 0; 1611 1612 Value *Val = 0; 1613 if (DT->dominates(Vals.BB, BB)) { 1614 Val = Vals.Val; 1615 if (isa<Constant>(Val)) return Val; 1616 } 1617 1618 LeaderTableEntry* Next = Vals.Next; 1619 while (Next) { 1620 if (DT->dominates(Next->BB, BB)) { 1621 if (isa<Constant>(Next->Val)) return Next->Val; 1622 if (!Val) Val = Next->Val; 1623 } 1624 1625 Next = Next->Next; 1626 } 1627 1628 return Val; 1629} 1630 1631 1632/// processInstruction - When calculating availability, handle an instruction 1633/// by inserting it into the appropriate sets 1634bool GVN::processInstruction(Instruction *I, 1635 SmallVectorImpl<Instruction*> &toErase) { 1636 // Ignore dbg info intrinsics. 1637 if (isa<DbgInfoIntrinsic>(I)) 1638 return false; 1639 1640 // If the instruction can be easily simplified then do so now in preference 1641 // to value numbering it. Value numbering often exposes redundancies, for 1642 // example if it determines that %y is equal to %x then the instruction 1643 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 1644 if (Value *V = SimplifyInstruction(I, TD, DT)) { 1645 I->replaceAllUsesWith(V); 1646 if (MD && V->getType()->isPointerTy()) 1647 MD->invalidateCachedPointerInfo(V); 1648 VN.erase(I); 1649 toErase.push_back(I); 1650 return true; 1651 } 1652 1653 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1654 bool Changed = processLoad(LI, toErase); 1655 1656 if (!Changed) { 1657 unsigned Num = VN.lookup_or_add(LI); 1658 addToLeaderTable(Num, LI, LI->getParent()); 1659 } 1660 1661 return Changed; 1662 } 1663 1664 // For conditions branches, we can perform simple conditional propagation on 1665 // the condition value itself. 1666 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1667 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 1668 return false; 1669 1670 Value *BranchCond = BI->getCondition(); 1671 uint32_t CondVN = VN.lookup_or_add(BranchCond); 1672 1673 BasicBlock *TrueSucc = BI->getSuccessor(0); 1674 BasicBlock *FalseSucc = BI->getSuccessor(1); 1675 1676 if (TrueSucc->getSinglePredecessor()) 1677 addToLeaderTable(CondVN, 1678 ConstantInt::getTrue(TrueSucc->getContext()), 1679 TrueSucc); 1680 if (FalseSucc->getSinglePredecessor()) 1681 addToLeaderTable(CondVN, 1682 ConstantInt::getFalse(TrueSucc->getContext()), 1683 FalseSucc); 1684 1685 return false; 1686 } 1687 1688 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1689 unsigned Num = VN.lookup_or_add(I); 1690 1691 // Allocations are always uniquely numbered, so we can save time and memory 1692 // by fast failing them. 1693 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { 1694 addToLeaderTable(Num, I, I->getParent()); 1695 return false; 1696 } 1697 1698 // If the number we were assigned was a brand new VN, then we don't 1699 // need to do a lookup to see if the number already exists 1700 // somewhere in the domtree: it can't! 1701 if (Num == NextNum) { 1702 addToLeaderTable(Num, I, I->getParent()); 1703 return false; 1704 } 1705 1706 // Perform fast-path value-number based elimination of values inherited from 1707 // dominators. 1708 Value *repl = findLeader(I->getParent(), Num); 1709 if (repl == 0) { 1710 // Failure, just remember this instance for future use. 1711 addToLeaderTable(Num, I, I->getParent()); 1712 return false; 1713 } 1714 1715 // Remove it! 1716 VN.erase(I); 1717 I->replaceAllUsesWith(repl); 1718 if (MD && repl->getType()->isPointerTy()) 1719 MD->invalidateCachedPointerInfo(repl); 1720 toErase.push_back(I); 1721 return true; 1722} 1723 1724/// runOnFunction - This is the main transformation entry point for a function. 1725bool GVN::runOnFunction(Function& F) { 1726 if (!NoLoads) 1727 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1728 DT = &getAnalysis<DominatorTree>(); 1729 TD = getAnalysisIfAvailable<TargetData>(); 1730 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1731 VN.setMemDep(MD); 1732 VN.setDomTree(DT); 1733 1734 bool Changed = false; 1735 bool ShouldContinue = true; 1736 1737 // Merge unconditional branches, allowing PRE to catch more 1738 // optimization opportunities. 1739 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1740 BasicBlock *BB = FI; 1741 ++FI; 1742 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1743 if (removedBlock) ++NumGVNBlocks; 1744 1745 Changed |= removedBlock; 1746 } 1747 1748 unsigned Iteration = 0; 1749 1750 while (ShouldContinue) { 1751 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 1752 ShouldContinue = iterateOnFunction(F); 1753 if (splitCriticalEdges()) 1754 ShouldContinue = true; 1755 Changed |= ShouldContinue; 1756 ++Iteration; 1757 } 1758 1759 if (EnablePRE) { 1760 bool PREChanged = true; 1761 while (PREChanged) { 1762 PREChanged = performPRE(F); 1763 Changed |= PREChanged; 1764 } 1765 } 1766 // FIXME: Should perform GVN again after PRE does something. PRE can move 1767 // computations into blocks where they become fully redundant. Note that 1768 // we can't do this until PRE's critical edge splitting updates memdep. 1769 // Actually, when this happens, we should just fully integrate PRE into GVN. 1770 1771 cleanupGlobalSets(); 1772 1773 return Changed; 1774} 1775 1776 1777bool GVN::processBlock(BasicBlock *BB) { 1778 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 1779 // incrementing BI before processing an instruction). 1780 SmallVector<Instruction*, 8> toErase; 1781 bool ChangedFunction = false; 1782 1783 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1784 BI != BE;) { 1785 ChangedFunction |= processInstruction(BI, toErase); 1786 if (toErase.empty()) { 1787 ++BI; 1788 continue; 1789 } 1790 1791 // If we need some instructions deleted, do it now. 1792 NumGVNInstr += toErase.size(); 1793 1794 // Avoid iterator invalidation. 1795 bool AtStart = BI == BB->begin(); 1796 if (!AtStart) 1797 --BI; 1798 1799 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 1800 E = toErase.end(); I != E; ++I) { 1801 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 1802 if (MD) MD->removeInstruction(*I); 1803 (*I)->eraseFromParent(); 1804 DEBUG(verifyRemoved(*I)); 1805 } 1806 toErase.clear(); 1807 1808 if (AtStart) 1809 BI = BB->begin(); 1810 else 1811 ++BI; 1812 } 1813 1814 return ChangedFunction; 1815} 1816 1817/// performPRE - Perform a purely local form of PRE that looks for diamond 1818/// control flow patterns and attempts to perform simple PRE at the join point. 1819bool GVN::performPRE(Function &F) { 1820 bool Changed = false; 1821 DenseMap<BasicBlock*, Value*> predMap; 1822 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 1823 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 1824 BasicBlock *CurrentBlock = *DI; 1825 1826 // Nothing to PRE in the entry block. 1827 if (CurrentBlock == &F.getEntryBlock()) continue; 1828 1829 for (BasicBlock::iterator BI = CurrentBlock->begin(), 1830 BE = CurrentBlock->end(); BI != BE; ) { 1831 Instruction *CurInst = BI++; 1832 1833 if (isa<AllocaInst>(CurInst) || 1834 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 1835 CurInst->getType()->isVoidTy() || 1836 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 1837 isa<DbgInfoIntrinsic>(CurInst)) 1838 continue; 1839 1840 // We don't currently value number ANY inline asm calls. 1841 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 1842 if (CallI->isInlineAsm()) 1843 continue; 1844 1845 uint32_t ValNo = VN.lookup(CurInst); 1846 1847 // Look for the predecessors for PRE opportunities. We're 1848 // only trying to solve the basic diamond case, where 1849 // a value is computed in the successor and one predecessor, 1850 // but not the other. We also explicitly disallow cases 1851 // where the successor is its own predecessor, because they're 1852 // more complicated to get right. 1853 unsigned NumWith = 0; 1854 unsigned NumWithout = 0; 1855 BasicBlock *PREPred = 0; 1856 predMap.clear(); 1857 1858 for (pred_iterator PI = pred_begin(CurrentBlock), 1859 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 1860 BasicBlock *P = *PI; 1861 // We're not interested in PRE where the block is its 1862 // own predecessor, or in blocks with predecessors 1863 // that are not reachable. 1864 if (P == CurrentBlock) { 1865 NumWithout = 2; 1866 break; 1867 } else if (!DT->dominates(&F.getEntryBlock(), P)) { 1868 NumWithout = 2; 1869 break; 1870 } 1871 1872 Value* predV = findLeader(P, ValNo); 1873 if (predV == 0) { 1874 PREPred = P; 1875 ++NumWithout; 1876 } else if (predV == CurInst) { 1877 NumWithout = 2; 1878 } else { 1879 predMap[P] = predV; 1880 ++NumWith; 1881 } 1882 } 1883 1884 // Don't do PRE when it might increase code size, i.e. when 1885 // we would need to insert instructions in more than one pred. 1886 if (NumWithout != 1 || NumWith == 0) 1887 continue; 1888 1889 // Don't do PRE across indirect branch. 1890 if (isa<IndirectBrInst>(PREPred->getTerminator())) 1891 continue; 1892 1893 // We can't do PRE safely on a critical edge, so instead we schedule 1894 // the edge to be split and perform the PRE the next time we iterate 1895 // on the function. 1896 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 1897 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 1898 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 1899 continue; 1900 } 1901 1902 // Instantiate the expression in the predecessor that lacked it. 1903 // Because we are going top-down through the block, all value numbers 1904 // will be available in the predecessor by the time we need them. Any 1905 // that weren't originally present will have been instantiated earlier 1906 // in this loop. 1907 Instruction *PREInstr = CurInst->clone(); 1908 bool success = true; 1909 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 1910 Value *Op = PREInstr->getOperand(i); 1911 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 1912 continue; 1913 1914 if (Value *V = findLeader(PREPred, VN.lookup(Op))) { 1915 PREInstr->setOperand(i, V); 1916 } else { 1917 success = false; 1918 break; 1919 } 1920 } 1921 1922 // Fail out if we encounter an operand that is not available in 1923 // the PRE predecessor. This is typically because of loads which 1924 // are not value numbered precisely. 1925 if (!success) { 1926 delete PREInstr; 1927 DEBUG(verifyRemoved(PREInstr)); 1928 continue; 1929 } 1930 1931 PREInstr->insertBefore(PREPred->getTerminator()); 1932 PREInstr->setName(CurInst->getName() + ".pre"); 1933 predMap[PREPred] = PREInstr; 1934 VN.add(PREInstr, ValNo); 1935 ++NumGVNPRE; 1936 1937 // Update the availability map to include the new instruction. 1938 addToLeaderTable(ValNo, PREInstr, PREPred); 1939 1940 // Create a PHI to make the value available in this block. 1941 PHINode* Phi = PHINode::Create(CurInst->getType(), 1942 CurInst->getName() + ".pre-phi", 1943 CurrentBlock->begin()); 1944 for (pred_iterator PI = pred_begin(CurrentBlock), 1945 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 1946 BasicBlock *P = *PI; 1947 Phi->addIncoming(predMap[P], P); 1948 } 1949 1950 VN.add(Phi, ValNo); 1951 addToLeaderTable(ValNo, Phi, CurrentBlock); 1952 1953 CurInst->replaceAllUsesWith(Phi); 1954 if (Phi->getType()->isPointerTy()) { 1955 // Because we have added a PHI-use of the pointer value, it has now 1956 // "escaped" from alias analysis' perspective. We need to inform 1957 // AA of this. 1958 for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) 1959 VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii)); 1960 1961 if (MD) 1962 MD->invalidateCachedPointerInfo(Phi); 1963 } 1964 VN.erase(CurInst); 1965 removeFromLeaderTable(ValNo, CurInst, CurrentBlock); 1966 1967 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 1968 if (MD) MD->removeInstruction(CurInst); 1969 CurInst->eraseFromParent(); 1970 DEBUG(verifyRemoved(CurInst)); 1971 Changed = true; 1972 } 1973 } 1974 1975 if (splitCriticalEdges()) 1976 Changed = true; 1977 1978 return Changed; 1979} 1980 1981/// splitCriticalEdges - Split critical edges found during the previous 1982/// iteration that may enable further optimization. 1983bool GVN::splitCriticalEdges() { 1984 if (toSplit.empty()) 1985 return false; 1986 do { 1987 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 1988 SplitCriticalEdge(Edge.first, Edge.second, this); 1989 } while (!toSplit.empty()); 1990 if (MD) MD->invalidateCachedPredecessors(); 1991 return true; 1992} 1993 1994/// iterateOnFunction - Executes one iteration of GVN 1995bool GVN::iterateOnFunction(Function &F) { 1996 cleanupGlobalSets(); 1997 1998 // Top-down walk of the dominator tree 1999 bool Changed = false; 2000#if 0 2001 // Needed for value numbering with phi construction to work. 2002 ReversePostOrderTraversal<Function*> RPOT(&F); 2003 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 2004 RE = RPOT.end(); RI != RE; ++RI) 2005 Changed |= processBlock(*RI); 2006#else 2007 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2008 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 2009 Changed |= processBlock(DI->getBlock()); 2010#endif 2011 2012 return Changed; 2013} 2014 2015void GVN::cleanupGlobalSets() { 2016 VN.clear(); 2017 NumberTable.clear(); 2018 TableAllocator.Reset(); 2019} 2020 2021/// verifyRemoved - Verify that the specified instruction does not occur in our 2022/// internal data structures. 2023void GVN::verifyRemoved(const Instruction *Inst) const { 2024 VN.verifyRemoved(Inst); 2025 2026 // Walk through the value number scope to make sure the instruction isn't 2027 // ferreted away in it. 2028 for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator 2029 I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) { 2030 const LeaderTableEntry *Node = &I->second; 2031 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2032 2033 while (Node->Next) { 2034 Node = Node->Next; 2035 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2036 } 2037 } 2038} 2039