GVN.cpp revision 680ac4ff4e9434a694d3f72c44f3f2d063a7cb15
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/Constants.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/GlobalVariable.h" 23#include "llvm/Function.h" 24#include "llvm/IntrinsicInst.h" 25#include "llvm/LLVMContext.h" 26#include "llvm/Operator.h" 27#include "llvm/Analysis/AliasAnalysis.h" 28#include "llvm/Analysis/ConstantFolding.h" 29#include "llvm/Analysis/Dominators.h" 30#include "llvm/Analysis/InstructionSimplify.h" 31#include "llvm/Analysis/Loads.h" 32#include "llvm/Analysis/MemoryBuiltins.h" 33#include "llvm/Analysis/MemoryDependenceAnalysis.h" 34#include "llvm/Analysis/PHITransAddr.h" 35#include "llvm/Analysis/ValueTracking.h" 36#include "llvm/Assembly/Writer.h" 37#include "llvm/Target/TargetData.h" 38#include "llvm/Transforms/Utils/BasicBlockUtils.h" 39#include "llvm/Transforms/Utils/Local.h" 40#include "llvm/Transforms/Utils/SSAUpdater.h" 41#include "llvm/ADT/DenseMap.h" 42#include "llvm/ADT/DepthFirstIterator.h" 43#include "llvm/ADT/PostOrderIterator.h" 44#include "llvm/ADT/SmallPtrSet.h" 45#include "llvm/ADT/Statistic.h" 46#include "llvm/Support/Allocator.h" 47#include "llvm/Support/CFG.h" 48#include "llvm/Support/CommandLine.h" 49#include "llvm/Support/Debug.h" 50#include "llvm/Support/ErrorHandling.h" 51#include "llvm/Support/GetElementPtrTypeIterator.h" 52#include "llvm/Support/IRBuilder.h" 53#include <list> 54using namespace llvm; 55 56STATISTIC(NumGVNInstr, "Number of instructions deleted"); 57STATISTIC(NumGVNLoad, "Number of loads deleted"); 58STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 59STATISTIC(NumGVNBlocks, "Number of blocks merged"); 60STATISTIC(NumPRELoad, "Number of loads PRE'd"); 61 62static cl::opt<bool> EnablePRE("enable-pre", 63 cl::init(true), cl::Hidden); 64static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 65 66//===----------------------------------------------------------------------===// 67// ValueTable Class 68//===----------------------------------------------------------------------===// 69 70/// This class holds the mapping between values and value numbers. It is used 71/// as an efficient mechanism to determine the expression-wise equivalence of 72/// two values. 73namespace { 74 struct Expression { 75 uint32_t opcode; 76 const Type* type; 77 SmallVector<uint32_t, 4> varargs; 78 79 Expression() { } 80 Expression(uint32_t o) : opcode(o) { } 81 82 bool operator==(const Expression &other) const { 83 if (opcode != other.opcode) 84 return false; 85 else if (opcode == ~0U || opcode == ~1U) 86 return true; 87 else if (type != other.type) 88 return false; 89 else if (varargs != other.varargs) 90 return false; 91 return true; 92 } 93 }; 94 95 class ValueTable { 96 private: 97 DenseMap<Value*, uint32_t> valueNumbering; 98 DenseMap<Expression, uint32_t> expressionNumbering; 99 AliasAnalysis* AA; 100 MemoryDependenceAnalysis* MD; 101 DominatorTree* DT; 102 103 uint32_t nextValueNumber; 104 105 Expression create_expression(Instruction* I); 106 uint32_t lookup_or_add_call(CallInst* C); 107 public: 108 ValueTable() : nextValueNumber(1) { } 109 uint32_t lookup_or_add(Value *V); 110 uint32_t lookup(Value *V) const; 111 void add(Value *V, uint32_t num); 112 void clear(); 113 void erase(Value *v); 114 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 115 AliasAnalysis *getAliasAnalysis() const { return AA; } 116 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 117 void setDomTree(DominatorTree* D) { DT = D; } 118 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 119 void verifyRemoved(const Value *) const; 120 }; 121} 122 123namespace llvm { 124template <> struct DenseMapInfo<Expression> { 125 static inline Expression getEmptyKey() { 126 return ~0U; 127 } 128 129 static inline Expression getTombstoneKey() { 130 return ~1U; 131 } 132 133 static unsigned getHashValue(const Expression e) { 134 unsigned hash = e.opcode; 135 136 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 137 (unsigned)((uintptr_t)e.type >> 9)); 138 139 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 140 E = e.varargs.end(); I != E; ++I) 141 hash = *I + hash * 37; 142 143 return hash; 144 } 145 static bool isEqual(const Expression &LHS, const Expression &RHS) { 146 return LHS == RHS; 147 } 148}; 149 150} 151 152//===----------------------------------------------------------------------===// 153// ValueTable Internal Functions 154//===----------------------------------------------------------------------===// 155 156 157Expression ValueTable::create_expression(Instruction *I) { 158 Expression e; 159 e.type = I->getType(); 160 e.opcode = I->getOpcode(); 161 for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); 162 OI != OE; ++OI) 163 e.varargs.push_back(lookup_or_add(*OI)); 164 165 if (CmpInst *C = dyn_cast<CmpInst>(I)) 166 e.opcode = (C->getOpcode() << 8) | C->getPredicate(); 167 else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) { 168 for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 169 II != IE; ++II) 170 e.varargs.push_back(*II); 171 } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { 172 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 173 II != IE; ++II) 174 e.varargs.push_back(*II); 175 } 176 177 return e; 178} 179 180//===----------------------------------------------------------------------===// 181// ValueTable External Functions 182//===----------------------------------------------------------------------===// 183 184/// add - Insert a value into the table with a specified value number. 185void ValueTable::add(Value *V, uint32_t num) { 186 valueNumbering.insert(std::make_pair(V, num)); 187} 188 189uint32_t ValueTable::lookup_or_add_call(CallInst* C) { 190 if (AA->doesNotAccessMemory(C)) { 191 Expression exp = create_expression(C); 192 uint32_t& e = expressionNumbering[exp]; 193 if (!e) e = nextValueNumber++; 194 valueNumbering[C] = e; 195 return e; 196 } else if (AA->onlyReadsMemory(C)) { 197 Expression exp = create_expression(C); 198 uint32_t& e = expressionNumbering[exp]; 199 if (!e) { 200 e = nextValueNumber++; 201 valueNumbering[C] = e; 202 return e; 203 } 204 if (!MD) { 205 e = nextValueNumber++; 206 valueNumbering[C] = e; 207 return e; 208 } 209 210 MemDepResult local_dep = MD->getDependency(C); 211 212 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 213 valueNumbering[C] = nextValueNumber; 214 return nextValueNumber++; 215 } 216 217 if (local_dep.isDef()) { 218 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 219 220 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 221 valueNumbering[C] = nextValueNumber; 222 return nextValueNumber++; 223 } 224 225 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 226 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 227 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); 228 if (c_vn != cd_vn) { 229 valueNumbering[C] = nextValueNumber; 230 return nextValueNumber++; 231 } 232 } 233 234 uint32_t v = lookup_or_add(local_cdep); 235 valueNumbering[C] = v; 236 return v; 237 } 238 239 // Non-local case. 240 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 241 MD->getNonLocalCallDependency(CallSite(C)); 242 // FIXME: call/call dependencies for readonly calls should return def, not 243 // clobber! Move the checking logic to MemDep! 244 CallInst* cdep = 0; 245 246 // Check to see if we have a single dominating call instruction that is 247 // identical to C. 248 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 249 const NonLocalDepEntry *I = &deps[i]; 250 // Ignore non-local dependencies. 251 if (I->getResult().isNonLocal()) 252 continue; 253 254 // We don't handle non-depedencies. If we already have a call, reject 255 // instruction dependencies. 256 if (I->getResult().isClobber() || cdep != 0) { 257 cdep = 0; 258 break; 259 } 260 261 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 262 // FIXME: All duplicated with non-local case. 263 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 264 cdep = NonLocalDepCall; 265 continue; 266 } 267 268 cdep = 0; 269 break; 270 } 271 272 if (!cdep) { 273 valueNumbering[C] = nextValueNumber; 274 return nextValueNumber++; 275 } 276 277 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 278 valueNumbering[C] = nextValueNumber; 279 return nextValueNumber++; 280 } 281 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 282 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 283 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); 284 if (c_vn != cd_vn) { 285 valueNumbering[C] = nextValueNumber; 286 return nextValueNumber++; 287 } 288 } 289 290 uint32_t v = lookup_or_add(cdep); 291 valueNumbering[C] = v; 292 return v; 293 294 } else { 295 valueNumbering[C] = nextValueNumber; 296 return nextValueNumber++; 297 } 298} 299 300/// lookup_or_add - Returns the value number for the specified value, assigning 301/// it a new number if it did not have one before. 302uint32_t ValueTable::lookup_or_add(Value *V) { 303 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 304 if (VI != valueNumbering.end()) 305 return VI->second; 306 307 if (!isa<Instruction>(V)) { 308 valueNumbering[V] = nextValueNumber; 309 return nextValueNumber++; 310 } 311 312 Instruction* I = cast<Instruction>(V); 313 Expression exp; 314 switch (I->getOpcode()) { 315 case Instruction::Call: 316 return lookup_or_add_call(cast<CallInst>(I)); 317 case Instruction::Add: 318 case Instruction::FAdd: 319 case Instruction::Sub: 320 case Instruction::FSub: 321 case Instruction::Mul: 322 case Instruction::FMul: 323 case Instruction::UDiv: 324 case Instruction::SDiv: 325 case Instruction::FDiv: 326 case Instruction::URem: 327 case Instruction::SRem: 328 case Instruction::FRem: 329 case Instruction::Shl: 330 case Instruction::LShr: 331 case Instruction::AShr: 332 case Instruction::And: 333 case Instruction::Or : 334 case Instruction::Xor: 335 case Instruction::ICmp: 336 case Instruction::FCmp: 337 case Instruction::Trunc: 338 case Instruction::ZExt: 339 case Instruction::SExt: 340 case Instruction::FPToUI: 341 case Instruction::FPToSI: 342 case Instruction::UIToFP: 343 case Instruction::SIToFP: 344 case Instruction::FPTrunc: 345 case Instruction::FPExt: 346 case Instruction::PtrToInt: 347 case Instruction::IntToPtr: 348 case Instruction::BitCast: 349 case Instruction::Select: 350 case Instruction::ExtractElement: 351 case Instruction::InsertElement: 352 case Instruction::ShuffleVector: 353 case Instruction::ExtractValue: 354 case Instruction::InsertValue: 355 case Instruction::GetElementPtr: 356 exp = create_expression(I); 357 break; 358 default: 359 valueNumbering[V] = nextValueNumber; 360 return nextValueNumber++; 361 } 362 363 uint32_t& e = expressionNumbering[exp]; 364 if (!e) e = nextValueNumber++; 365 valueNumbering[V] = e; 366 return e; 367} 368 369/// lookup - Returns the value number of the specified value. Fails if 370/// the value has not yet been numbered. 371uint32_t ValueTable::lookup(Value *V) const { 372 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 373 assert(VI != valueNumbering.end() && "Value not numbered?"); 374 return VI->second; 375} 376 377/// clear - Remove all entries from the ValueTable 378void ValueTable::clear() { 379 valueNumbering.clear(); 380 expressionNumbering.clear(); 381 nextValueNumber = 1; 382} 383 384/// erase - Remove a value from the value numbering 385void ValueTable::erase(Value *V) { 386 valueNumbering.erase(V); 387} 388 389/// verifyRemoved - Verify that the value is removed from all internal data 390/// structures. 391void ValueTable::verifyRemoved(const Value *V) const { 392 for (DenseMap<Value*, uint32_t>::const_iterator 393 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 394 assert(I->first != V && "Inst still occurs in value numbering map!"); 395 } 396} 397 398//===----------------------------------------------------------------------===// 399// GVN Pass 400//===----------------------------------------------------------------------===// 401 402namespace { 403 404 class GVN : public FunctionPass { 405 bool runOnFunction(Function &F); 406 public: 407 static char ID; // Pass identification, replacement for typeid 408 explicit GVN(bool noloads = false) 409 : FunctionPass(ID), NoLoads(noloads), MD(0) { 410 initializeGVNPass(*PassRegistry::getPassRegistry()); 411 } 412 413 private: 414 bool NoLoads; 415 MemoryDependenceAnalysis *MD; 416 DominatorTree *DT; 417 const TargetData* TD; 418 419 ValueTable VN; 420 421 /// NumberTable - A mapping from value numers to lists of Value*'s that 422 /// have that value number. Use lookupNumber to query it. 423 struct NumberTableEntry { 424 Value *Val; 425 BasicBlock *BB; 426 NumberTableEntry *Next; 427 }; 428 DenseMap<uint32_t, NumberTableEntry> NumberTable; 429 BumpPtrAllocator TableAllocator; 430 431 /// insert_table - Push a new Value to the NumberTable onto the list for 432 /// its value number. 433 void insert_table(uint32_t N, Value *V, BasicBlock *BB) { 434 NumberTableEntry& Curr = NumberTable[N]; 435 if (!Curr.Val) { 436 Curr.Val = V; 437 Curr.BB = BB; 438 return; 439 } 440 441 NumberTableEntry* Node = TableAllocator.Allocate<NumberTableEntry>(); 442 Node->Val = V; 443 Node->BB = BB; 444 Node->Next = Curr.Next; 445 Curr.Next = Node; 446 } 447 448 /// erase_table - Scan the list of values corresponding to a given value 449 /// number, and remove the given value if encountered. 450 void erase_table(uint32_t N, Value *V, BasicBlock *BB) { 451 NumberTableEntry* Prev = 0; 452 NumberTableEntry* Curr = &NumberTable[N]; 453 454 while (Curr->Val != V || Curr->BB != BB) { 455 Prev = Curr; 456 Curr = Curr->Next; 457 } 458 459 if (Prev) { 460 Prev->Next = Curr->Next; 461 } else { 462 if (!Curr->Next) { 463 Curr->Val = 0; 464 Curr->BB = 0; 465 } else { 466 NumberTableEntry* Next = Curr->Next; 467 Curr->Val = Next->Val; 468 Curr->BB = Next->BB; 469 Curr->Next = Next->Next; 470 } 471 } 472 } 473 474 // List of critical edges to be split between iterations. 475 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 476 477 // This transformation requires dominator postdominator info 478 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 479 AU.addRequired<DominatorTree>(); 480 if (!NoLoads) 481 AU.addRequired<MemoryDependenceAnalysis>(); 482 AU.addRequired<AliasAnalysis>(); 483 484 AU.addPreserved<DominatorTree>(); 485 AU.addPreserved<AliasAnalysis>(); 486 } 487 488 // Helper fuctions 489 // FIXME: eliminate or document these better 490 bool processLoad(LoadInst* L, 491 SmallVectorImpl<Instruction*> &toErase); 492 bool processInstruction(Instruction *I, 493 SmallVectorImpl<Instruction*> &toErase); 494 bool processNonLocalLoad(LoadInst* L, 495 SmallVectorImpl<Instruction*> &toErase); 496 bool processBlock(BasicBlock *BB); 497 void dump(DenseMap<uint32_t, Value*>& d); 498 bool iterateOnFunction(Function &F); 499 bool performPRE(Function& F); 500 Value *lookupNumber(BasicBlock *BB, uint32_t num); 501 void cleanupGlobalSets(); 502 void verifyRemoved(const Instruction *I) const; 503 bool splitCriticalEdges(); 504 }; 505 506 char GVN::ID = 0; 507} 508 509// createGVNPass - The public interface to this file... 510FunctionPass *llvm::createGVNPass(bool NoLoads) { 511 return new GVN(NoLoads); 512} 513 514INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) 515INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 516INITIALIZE_PASS_DEPENDENCY(DominatorTree) 517INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 518INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) 519 520void GVN::dump(DenseMap<uint32_t, Value*>& d) { 521 errs() << "{\n"; 522 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 523 E = d.end(); I != E; ++I) { 524 errs() << I->first << "\n"; 525 I->second->dump(); 526 } 527 errs() << "}\n"; 528} 529 530/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 531/// we're analyzing is fully available in the specified block. As we go, keep 532/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 533/// map is actually a tri-state map with the following values: 534/// 0) we know the block *is not* fully available. 535/// 1) we know the block *is* fully available. 536/// 2) we do not know whether the block is fully available or not, but we are 537/// currently speculating that it will be. 538/// 3) we are speculating for this block and have used that to speculate for 539/// other blocks. 540static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 541 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 542 // Optimistically assume that the block is fully available and check to see 543 // if we already know about this block in one lookup. 544 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 545 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 546 547 // If the entry already existed for this block, return the precomputed value. 548 if (!IV.second) { 549 // If this is a speculative "available" value, mark it as being used for 550 // speculation of other blocks. 551 if (IV.first->second == 2) 552 IV.first->second = 3; 553 return IV.first->second != 0; 554 } 555 556 // Otherwise, see if it is fully available in all predecessors. 557 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 558 559 // If this block has no predecessors, it isn't live-in here. 560 if (PI == PE) 561 goto SpeculationFailure; 562 563 for (; PI != PE; ++PI) 564 // If the value isn't fully available in one of our predecessors, then it 565 // isn't fully available in this block either. Undo our previous 566 // optimistic assumption and bail out. 567 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 568 goto SpeculationFailure; 569 570 return true; 571 572// SpeculationFailure - If we get here, we found out that this is not, after 573// all, a fully-available block. We have a problem if we speculated on this and 574// used the speculation to mark other blocks as available. 575SpeculationFailure: 576 char &BBVal = FullyAvailableBlocks[BB]; 577 578 // If we didn't speculate on this, just return with it set to false. 579 if (BBVal == 2) { 580 BBVal = 0; 581 return false; 582 } 583 584 // If we did speculate on this value, we could have blocks set to 1 that are 585 // incorrect. Walk the (transitive) successors of this block and mark them as 586 // 0 if set to one. 587 SmallVector<BasicBlock*, 32> BBWorklist; 588 BBWorklist.push_back(BB); 589 590 do { 591 BasicBlock *Entry = BBWorklist.pop_back_val(); 592 // Note that this sets blocks to 0 (unavailable) if they happen to not 593 // already be in FullyAvailableBlocks. This is safe. 594 char &EntryVal = FullyAvailableBlocks[Entry]; 595 if (EntryVal == 0) continue; // Already unavailable. 596 597 // Mark as unavailable. 598 EntryVal = 0; 599 600 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 601 BBWorklist.push_back(*I); 602 } while (!BBWorklist.empty()); 603 604 return false; 605} 606 607 608/// CanCoerceMustAliasedValueToLoad - Return true if 609/// CoerceAvailableValueToLoadType will succeed. 610static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 611 const Type *LoadTy, 612 const TargetData &TD) { 613 // If the loaded or stored value is an first class array or struct, don't try 614 // to transform them. We need to be able to bitcast to integer. 615 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 616 StoredVal->getType()->isStructTy() || 617 StoredVal->getType()->isArrayTy()) 618 return false; 619 620 // The store has to be at least as big as the load. 621 if (TD.getTypeSizeInBits(StoredVal->getType()) < 622 TD.getTypeSizeInBits(LoadTy)) 623 return false; 624 625 return true; 626} 627 628 629/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 630/// then a load from a must-aliased pointer of a different type, try to coerce 631/// the stored value. LoadedTy is the type of the load we want to replace and 632/// InsertPt is the place to insert new instructions. 633/// 634/// If we can't do it, return null. 635static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 636 const Type *LoadedTy, 637 Instruction *InsertPt, 638 const TargetData &TD) { 639 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) 640 return 0; 641 642 const Type *StoredValTy = StoredVal->getType(); 643 644 uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); 645 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); 646 647 // If the store and reload are the same size, we can always reuse it. 648 if (StoreSize == LoadSize) { 649 if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { 650 // Pointer to Pointer -> use bitcast. 651 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 652 } 653 654 // Convert source pointers to integers, which can be bitcast. 655 if (StoredValTy->isPointerTy()) { 656 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 657 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 658 } 659 660 const Type *TypeToCastTo = LoadedTy; 661 if (TypeToCastTo->isPointerTy()) 662 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); 663 664 if (StoredValTy != TypeToCastTo) 665 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 666 667 // Cast to pointer if the load needs a pointer type. 668 if (LoadedTy->isPointerTy()) 669 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 670 671 return StoredVal; 672 } 673 674 // If the loaded value is smaller than the available value, then we can 675 // extract out a piece from it. If the available value is too small, then we 676 // can't do anything. 677 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 678 679 // Convert source pointers to integers, which can be manipulated. 680 if (StoredValTy->isPointerTy()) { 681 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 682 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 683 } 684 685 // Convert vectors and fp to integer, which can be manipulated. 686 if (!StoredValTy->isIntegerTy()) { 687 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 688 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 689 } 690 691 // If this is a big-endian system, we need to shift the value down to the low 692 // bits so that a truncate will work. 693 if (TD.isBigEndian()) { 694 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 695 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 696 } 697 698 // Truncate the integer to the right size now. 699 const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 700 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 701 702 if (LoadedTy == NewIntTy) 703 return StoredVal; 704 705 // If the result is a pointer, inttoptr. 706 if (LoadedTy->isPointerTy()) 707 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 708 709 // Otherwise, bitcast. 710 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 711} 712 713/// AnalyzeLoadFromClobberingWrite - This function is called when we have a 714/// memdep query of a load that ends up being a clobbering memory write (store, 715/// memset, memcpy, memmove). This means that the write *may* provide bits used 716/// by the load but we can't be sure because the pointers don't mustalias. 717/// 718/// Check this case to see if there is anything more we can do before we give 719/// up. This returns -1 if we have to give up, or a byte number in the stored 720/// value of the piece that feeds the load. 721static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, 722 Value *WritePtr, 723 uint64_t WriteSizeInBits, 724 const TargetData &TD) { 725 // If the loaded or stored value is an first class array or struct, don't try 726 // to transform them. We need to be able to bitcast to integer. 727 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 728 return -1; 729 730 int64_t StoreOffset = 0, LoadOffset = 0; 731 Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); 732 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); 733 if (StoreBase != LoadBase) 734 return -1; 735 736 // If the load and store are to the exact same address, they should have been 737 // a must alias. AA must have gotten confused. 738 // FIXME: Study to see if/when this happens. One case is forwarding a memset 739 // to a load from the base of the memset. 740#if 0 741 if (LoadOffset == StoreOffset) { 742 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 743 << "Base = " << *StoreBase << "\n" 744 << "Store Ptr = " << *WritePtr << "\n" 745 << "Store Offs = " << StoreOffset << "\n" 746 << "Load Ptr = " << *LoadPtr << "\n"; 747 abort(); 748 } 749#endif 750 751 // If the load and store don't overlap at all, the store doesn't provide 752 // anything to the load. In this case, they really don't alias at all, AA 753 // must have gotten confused. 754 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); 755 756 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 757 return -1; 758 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 759 LoadSize >>= 3; 760 761 762 bool isAAFailure = false; 763 if (StoreOffset < LoadOffset) 764 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 765 else 766 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 767 768 if (isAAFailure) { 769#if 0 770 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" 771 << "Base = " << *StoreBase << "\n" 772 << "Store Ptr = " << *WritePtr << "\n" 773 << "Store Offs = " << StoreOffset << "\n" 774 << "Load Ptr = " << *LoadPtr << "\n"; 775 abort(); 776#endif 777 return -1; 778 } 779 780 // If the Load isn't completely contained within the stored bits, we don't 781 // have all the bits to feed it. We could do something crazy in the future 782 // (issue a smaller load then merge the bits in) but this seems unlikely to be 783 // valuable. 784 if (StoreOffset > LoadOffset || 785 StoreOffset+StoreSize < LoadOffset+LoadSize) 786 return -1; 787 788 // Okay, we can do this transformation. Return the number of bytes into the 789 // store that the load is. 790 return LoadOffset-StoreOffset; 791} 792 793/// AnalyzeLoadFromClobberingStore - This function is called when we have a 794/// memdep query of a load that ends up being a clobbering store. 795static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, 796 StoreInst *DepSI, 797 const TargetData &TD) { 798 // Cannot handle reading from store of first-class aggregate yet. 799 if (DepSI->getValueOperand()->getType()->isStructTy() || 800 DepSI->getValueOperand()->getType()->isArrayTy()) 801 return -1; 802 803 Value *StorePtr = DepSI->getPointerOperand(); 804 uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 805 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 806 StorePtr, StoreSize, TD); 807} 808 809static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, 810 MemIntrinsic *MI, 811 const TargetData &TD) { 812 // If the mem operation is a non-constant size, we can't handle it. 813 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 814 if (SizeCst == 0) return -1; 815 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 816 817 // If this is memset, we just need to see if the offset is valid in the size 818 // of the memset.. 819 if (MI->getIntrinsicID() == Intrinsic::memset) 820 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 821 MemSizeInBits, TD); 822 823 // If we have a memcpy/memmove, the only case we can handle is if this is a 824 // copy from constant memory. In that case, we can read directly from the 825 // constant memory. 826 MemTransferInst *MTI = cast<MemTransferInst>(MI); 827 828 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 829 if (Src == 0) return -1; 830 831 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src)); 832 if (GV == 0 || !GV->isConstant()) return -1; 833 834 // See if the access is within the bounds of the transfer. 835 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 836 MI->getDest(), MemSizeInBits, TD); 837 if (Offset == -1) 838 return Offset; 839 840 // Otherwise, see if we can constant fold a load from the constant with the 841 // offset applied as appropriate. 842 Src = ConstantExpr::getBitCast(Src, 843 llvm::Type::getInt8PtrTy(Src->getContext())); 844 Constant *OffsetCst = 845 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 846 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 847 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 848 if (ConstantFoldLoadFromConstPtr(Src, &TD)) 849 return Offset; 850 return -1; 851} 852 853 854/// GetStoreValueForLoad - This function is called when we have a 855/// memdep query of a load that ends up being a clobbering store. This means 856/// that the store *may* provide bits used by the load but we can't be sure 857/// because the pointers don't mustalias. Check this case to see if there is 858/// anything more we can do before we give up. 859static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 860 const Type *LoadTy, 861 Instruction *InsertPt, const TargetData &TD){ 862 LLVMContext &Ctx = SrcVal->getType()->getContext(); 863 864 uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 865 uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; 866 867 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 868 869 // Compute which bits of the stored value are being used by the load. Convert 870 // to an integer type to start with. 871 if (SrcVal->getType()->isPointerTy()) 872 SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); 873 if (!SrcVal->getType()->isIntegerTy()) 874 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), 875 "tmp"); 876 877 // Shift the bits to the least significant depending on endianness. 878 unsigned ShiftAmt; 879 if (TD.isLittleEndian()) 880 ShiftAmt = Offset*8; 881 else 882 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 883 884 if (ShiftAmt) 885 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); 886 887 if (LoadSize != StoreSize) 888 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), 889 "tmp"); 890 891 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); 892} 893 894/// GetMemInstValueForLoad - This function is called when we have a 895/// memdep query of a load that ends up being a clobbering mem intrinsic. 896static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 897 const Type *LoadTy, Instruction *InsertPt, 898 const TargetData &TD){ 899 LLVMContext &Ctx = LoadTy->getContext(); 900 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 901 902 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 903 904 // We know that this method is only called when the mem transfer fully 905 // provides the bits for the load. 906 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 907 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 908 // independently of what the offset is. 909 Value *Val = MSI->getValue(); 910 if (LoadSize != 1) 911 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 912 913 Value *OneElt = Val; 914 915 // Splat the value out to the right number of bits. 916 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 917 // If we can double the number of bytes set, do it. 918 if (NumBytesSet*2 <= LoadSize) { 919 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 920 Val = Builder.CreateOr(Val, ShVal); 921 NumBytesSet <<= 1; 922 continue; 923 } 924 925 // Otherwise insert one byte at a time. 926 Value *ShVal = Builder.CreateShl(Val, 1*8); 927 Val = Builder.CreateOr(OneElt, ShVal); 928 ++NumBytesSet; 929 } 930 931 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); 932 } 933 934 // Otherwise, this is a memcpy/memmove from a constant global. 935 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 936 Constant *Src = cast<Constant>(MTI->getSource()); 937 938 // Otherwise, see if we can constant fold a load from the constant with the 939 // offset applied as appropriate. 940 Src = ConstantExpr::getBitCast(Src, 941 llvm::Type::getInt8PtrTy(Src->getContext())); 942 Constant *OffsetCst = 943 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 944 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 945 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 946 return ConstantFoldLoadFromConstPtr(Src, &TD); 947} 948 949namespace { 950 951struct AvailableValueInBlock { 952 /// BB - The basic block in question. 953 BasicBlock *BB; 954 enum ValType { 955 SimpleVal, // A simple offsetted value that is accessed. 956 MemIntrin // A memory intrinsic which is loaded from. 957 }; 958 959 /// V - The value that is live out of the block. 960 PointerIntPair<Value *, 1, ValType> Val; 961 962 /// Offset - The byte offset in Val that is interesting for the load query. 963 unsigned Offset; 964 965 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 966 unsigned Offset = 0) { 967 AvailableValueInBlock Res; 968 Res.BB = BB; 969 Res.Val.setPointer(V); 970 Res.Val.setInt(SimpleVal); 971 Res.Offset = Offset; 972 return Res; 973 } 974 975 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 976 unsigned Offset = 0) { 977 AvailableValueInBlock Res; 978 Res.BB = BB; 979 Res.Val.setPointer(MI); 980 Res.Val.setInt(MemIntrin); 981 Res.Offset = Offset; 982 return Res; 983 } 984 985 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 986 Value *getSimpleValue() const { 987 assert(isSimpleValue() && "Wrong accessor"); 988 return Val.getPointer(); 989 } 990 991 MemIntrinsic *getMemIntrinValue() const { 992 assert(!isSimpleValue() && "Wrong accessor"); 993 return cast<MemIntrinsic>(Val.getPointer()); 994 } 995 996 /// MaterializeAdjustedValue - Emit code into this block to adjust the value 997 /// defined here to the specified type. This handles various coercion cases. 998 Value *MaterializeAdjustedValue(const Type *LoadTy, 999 const TargetData *TD) const { 1000 Value *Res; 1001 if (isSimpleValue()) { 1002 Res = getSimpleValue(); 1003 if (Res->getType() != LoadTy) { 1004 assert(TD && "Need target data to handle type mismatch case"); 1005 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), 1006 *TD); 1007 1008 DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 1009 << *getSimpleValue() << '\n' 1010 << *Res << '\n' << "\n\n\n"); 1011 } 1012 } else { 1013 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, 1014 LoadTy, BB->getTerminator(), *TD); 1015 DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1016 << " " << *getMemIntrinValue() << '\n' 1017 << *Res << '\n' << "\n\n\n"); 1018 } 1019 return Res; 1020 } 1021}; 1022 1023} 1024 1025/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 1026/// construct SSA form, allowing us to eliminate LI. This returns the value 1027/// that should be used at LI's definition site. 1028static Value *ConstructSSAForLoadSet(LoadInst *LI, 1029 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1030 const TargetData *TD, 1031 const DominatorTree &DT, 1032 AliasAnalysis *AA) { 1033 // Check for the fully redundant, dominating load case. In this case, we can 1034 // just use the dominating value directly. 1035 if (ValuesPerBlock.size() == 1 && 1036 DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) 1037 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); 1038 1039 // Otherwise, we have to construct SSA form. 1040 SmallVector<PHINode*, 8> NewPHIs; 1041 SSAUpdater SSAUpdate(&NewPHIs); 1042 SSAUpdate.Initialize(LI->getType(), LI->getName()); 1043 1044 const Type *LoadTy = LI->getType(); 1045 1046 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1047 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1048 BasicBlock *BB = AV.BB; 1049 1050 if (SSAUpdate.HasValueForBlock(BB)) 1051 continue; 1052 1053 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); 1054 } 1055 1056 // Perform PHI construction. 1057 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1058 1059 // If new PHI nodes were created, notify alias analysis. 1060 if (V->getType()->isPointerTy()) 1061 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1062 AA->copyValue(LI, NewPHIs[i]); 1063 1064 // Now that we've copied information to the new PHIs, scan through 1065 // them again and inform alias analysis that we've added potentially 1066 // escaping uses to any values that are operands to these PHIs. 1067 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { 1068 PHINode *P = NewPHIs[i]; 1069 for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) 1070 AA->addEscapingUse(P->getOperandUse(2*ii)); 1071 } 1072 1073 return V; 1074} 1075 1076static bool isLifetimeStart(const Instruction *Inst) { 1077 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1078 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1079 return false; 1080} 1081 1082/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 1083/// non-local by performing PHI construction. 1084bool GVN::processNonLocalLoad(LoadInst *LI, 1085 SmallVectorImpl<Instruction*> &toErase) { 1086 // Find the non-local dependencies of the load. 1087 SmallVector<NonLocalDepResult, 64> Deps; 1088 AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); 1089 MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); 1090 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " 1091 // << Deps.size() << *LI << '\n'); 1092 1093 // If we had to process more than one hundred blocks to find the 1094 // dependencies, this load isn't worth worrying about. Optimizing 1095 // it will be too expensive. 1096 if (Deps.size() > 100) 1097 return false; 1098 1099 // If we had a phi translation failure, we'll have a single entry which is a 1100 // clobber in the current block. Reject this early. 1101 if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { 1102 DEBUG( 1103 dbgs() << "GVN: non-local load "; 1104 WriteAsOperand(dbgs(), LI); 1105 dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; 1106 ); 1107 return false; 1108 } 1109 1110 // Filter out useless results (non-locals, etc). Keep track of the blocks 1111 // where we have a value available in repl, also keep track of whether we see 1112 // dependencies that produce an unknown value for the load (such as a call 1113 // that could potentially clobber the load). 1114 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; 1115 SmallVector<BasicBlock*, 16> UnavailableBlocks; 1116 1117 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 1118 BasicBlock *DepBB = Deps[i].getBB(); 1119 MemDepResult DepInfo = Deps[i].getResult(); 1120 1121 if (DepInfo.isClobber()) { 1122 // The address being loaded in this non-local block may not be the same as 1123 // the pointer operand of the load if PHI translation occurs. Make sure 1124 // to consider the right address. 1125 Value *Address = Deps[i].getAddress(); 1126 1127 // If the dependence is to a store that writes to a superset of the bits 1128 // read by the load, we can extract the bits we need for the load from the 1129 // stored value. 1130 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1131 if (TD && Address) { 1132 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, 1133 DepSI, *TD); 1134 if (Offset != -1) { 1135 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1136 DepSI->getValueOperand(), 1137 Offset)); 1138 continue; 1139 } 1140 } 1141 } 1142 1143 // If the clobbering value is a memset/memcpy/memmove, see if we can 1144 // forward a value on from it. 1145 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1146 if (TD && Address) { 1147 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 1148 DepMI, *TD); 1149 if (Offset != -1) { 1150 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1151 Offset)); 1152 continue; 1153 } 1154 } 1155 } 1156 1157 UnavailableBlocks.push_back(DepBB); 1158 continue; 1159 } 1160 1161 Instruction *DepInst = DepInfo.getInst(); 1162 1163 // Loading the allocation -> undef. 1164 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || 1165 // Loading immediately after lifetime begin -> undef. 1166 isLifetimeStart(DepInst)) { 1167 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1168 UndefValue::get(LI->getType()))); 1169 continue; 1170 } 1171 1172 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1173 // Reject loads and stores that are to the same address but are of 1174 // different types if we have to. 1175 if (S->getValueOperand()->getType() != LI->getType()) { 1176 // If the stored value is larger or equal to the loaded value, we can 1177 // reuse it. 1178 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), 1179 LI->getType(), *TD)) { 1180 UnavailableBlocks.push_back(DepBB); 1181 continue; 1182 } 1183 } 1184 1185 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1186 S->getValueOperand())); 1187 continue; 1188 } 1189 1190 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1191 // If the types mismatch and we can't handle it, reject reuse of the load. 1192 if (LD->getType() != LI->getType()) { 1193 // If the stored value is larger or equal to the loaded value, we can 1194 // reuse it. 1195 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ 1196 UnavailableBlocks.push_back(DepBB); 1197 continue; 1198 } 1199 } 1200 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); 1201 continue; 1202 } 1203 1204 UnavailableBlocks.push_back(DepBB); 1205 continue; 1206 } 1207 1208 // If we have no predecessors that produce a known value for this load, exit 1209 // early. 1210 if (ValuesPerBlock.empty()) return false; 1211 1212 // If all of the instructions we depend on produce a known value for this 1213 // load, then it is fully redundant and we can use PHI insertion to compute 1214 // its value. Insert PHIs and remove the fully redundant value now. 1215 if (UnavailableBlocks.empty()) { 1216 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1217 1218 // Perform PHI construction. 1219 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1220 VN.getAliasAnalysis()); 1221 LI->replaceAllUsesWith(V); 1222 1223 if (isa<PHINode>(V)) 1224 V->takeName(LI); 1225 if (V->getType()->isPointerTy()) 1226 MD->invalidateCachedPointerInfo(V); 1227 VN.erase(LI); 1228 toErase.push_back(LI); 1229 ++NumGVNLoad; 1230 return true; 1231 } 1232 1233 if (!EnablePRE || !EnableLoadPRE) 1234 return false; 1235 1236 // Okay, we have *some* definitions of the value. This means that the value 1237 // is available in some of our (transitive) predecessors. Lets think about 1238 // doing PRE of this load. This will involve inserting a new load into the 1239 // predecessor when it's not available. We could do this in general, but 1240 // prefer to not increase code size. As such, we only do this when we know 1241 // that we only have to insert *one* load (which means we're basically moving 1242 // the load, not inserting a new one). 1243 1244 SmallPtrSet<BasicBlock *, 4> Blockers; 1245 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1246 Blockers.insert(UnavailableBlocks[i]); 1247 1248 // Lets find first basic block with more than one predecessor. Walk backwards 1249 // through predecessors if needed. 1250 BasicBlock *LoadBB = LI->getParent(); 1251 BasicBlock *TmpBB = LoadBB; 1252 1253 bool isSinglePred = false; 1254 bool allSingleSucc = true; 1255 while (TmpBB->getSinglePredecessor()) { 1256 isSinglePred = true; 1257 TmpBB = TmpBB->getSinglePredecessor(); 1258 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1259 return false; 1260 if (Blockers.count(TmpBB)) 1261 return false; 1262 1263 // If any of these blocks has more than one successor (i.e. if the edge we 1264 // just traversed was critical), then there are other paths through this 1265 // block along which the load may not be anticipated. Hoisting the load 1266 // above this block would be adding the load to execution paths along 1267 // which it was not previously executed. 1268 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1269 return false; 1270 } 1271 1272 assert(TmpBB); 1273 LoadBB = TmpBB; 1274 1275 // FIXME: It is extremely unclear what this loop is doing, other than 1276 // artificially restricting loadpre. 1277 if (isSinglePred) { 1278 bool isHot = false; 1279 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1280 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1281 if (AV.isSimpleValue()) 1282 // "Hot" Instruction is in some loop (because it dominates its dep. 1283 // instruction). 1284 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) 1285 if (DT->dominates(LI, I)) { 1286 isHot = true; 1287 break; 1288 } 1289 } 1290 1291 // We are interested only in "hot" instructions. We don't want to do any 1292 // mis-optimizations here. 1293 if (!isHot) 1294 return false; 1295 } 1296 1297 // Check to see how many predecessors have the loaded value fully 1298 // available. 1299 DenseMap<BasicBlock*, Value*> PredLoads; 1300 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1301 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1302 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1303 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1304 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1305 1306 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit; 1307 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1308 PI != E; ++PI) { 1309 BasicBlock *Pred = *PI; 1310 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { 1311 continue; 1312 } 1313 PredLoads[Pred] = 0; 1314 1315 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1316 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1317 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1318 << Pred->getName() << "': " << *LI << '\n'); 1319 return false; 1320 } 1321 unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); 1322 NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); 1323 } 1324 } 1325 if (!NeedToSplit.empty()) { 1326 toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); 1327 return false; 1328 } 1329 1330 // Decide whether PRE is profitable for this load. 1331 unsigned NumUnavailablePreds = PredLoads.size(); 1332 assert(NumUnavailablePreds != 0 && 1333 "Fully available value should be eliminated above!"); 1334 1335 // If this load is unavailable in multiple predecessors, reject it. 1336 // FIXME: If we could restructure the CFG, we could make a common pred with 1337 // all the preds that don't have an available LI and insert a new load into 1338 // that one block. 1339 if (NumUnavailablePreds != 1) 1340 return false; 1341 1342 // Check if the load can safely be moved to all the unavailable predecessors. 1343 bool CanDoPRE = true; 1344 SmallVector<Instruction*, 8> NewInsts; 1345 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1346 E = PredLoads.end(); I != E; ++I) { 1347 BasicBlock *UnavailablePred = I->first; 1348 1349 // Do PHI translation to get its value in the predecessor if necessary. The 1350 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1351 1352 // If all preds have a single successor, then we know it is safe to insert 1353 // the load on the pred (?!?), so we can insert code to materialize the 1354 // pointer if it is not available. 1355 PHITransAddr Address(LI->getPointerOperand(), TD); 1356 Value *LoadPtr = 0; 1357 if (allSingleSucc) { 1358 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1359 *DT, NewInsts); 1360 } else { 1361 Address.PHITranslateValue(LoadBB, UnavailablePred, DT); 1362 LoadPtr = Address.getAddr(); 1363 } 1364 1365 // If we couldn't find or insert a computation of this phi translated value, 1366 // we fail PRE. 1367 if (LoadPtr == 0) { 1368 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1369 << *LI->getPointerOperand() << "\n"); 1370 CanDoPRE = false; 1371 break; 1372 } 1373 1374 // Make sure it is valid to move this load here. We have to watch out for: 1375 // @1 = getelementptr (i8* p, ... 1376 // test p and branch if == 0 1377 // load @1 1378 // It is valid to have the getelementptr before the test, even if p can be 0, 1379 // as getelementptr only does address arithmetic. 1380 // If we are not pushing the value through any multiple-successor blocks 1381 // we do not have this case. Otherwise, check that the load is safe to 1382 // put anywhere; this can be improved, but should be conservatively safe. 1383 if (!allSingleSucc && 1384 // FIXME: REEVALUTE THIS. 1385 !isSafeToLoadUnconditionally(LoadPtr, 1386 UnavailablePred->getTerminator(), 1387 LI->getAlignment(), TD)) { 1388 CanDoPRE = false; 1389 break; 1390 } 1391 1392 I->second = LoadPtr; 1393 } 1394 1395 if (!CanDoPRE) { 1396 while (!NewInsts.empty()) 1397 NewInsts.pop_back_val()->eraseFromParent(); 1398 return false; 1399 } 1400 1401 // Okay, we can eliminate this load by inserting a reload in the predecessor 1402 // and using PHI construction to get the value in the other predecessors, do 1403 // it. 1404 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1405 DEBUG(if (!NewInsts.empty()) 1406 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1407 << *NewInsts.back() << '\n'); 1408 1409 // Assign value numbers to the new instructions. 1410 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 1411 // FIXME: We really _ought_ to insert these value numbers into their 1412 // parent's availability map. However, in doing so, we risk getting into 1413 // ordering issues. If a block hasn't been processed yet, we would be 1414 // marking a value as AVAIL-IN, which isn't what we intend. 1415 VN.lookup_or_add(NewInsts[i]); 1416 } 1417 1418 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1419 E = PredLoads.end(); I != E; ++I) { 1420 BasicBlock *UnavailablePred = I->first; 1421 Value *LoadPtr = I->second; 1422 1423 Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1424 LI->getAlignment(), 1425 UnavailablePred->getTerminator()); 1426 1427 // Transfer the old load's TBAA tag to the new load. 1428 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) 1429 NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1430 1431 // Add the newly created load. 1432 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1433 NewLoad)); 1434 MD->invalidateCachedPointerInfo(LoadPtr); 1435 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1436 } 1437 1438 // Perform PHI construction. 1439 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1440 VN.getAliasAnalysis()); 1441 LI->replaceAllUsesWith(V); 1442 if (isa<PHINode>(V)) 1443 V->takeName(LI); 1444 if (V->getType()->isPointerTy()) 1445 MD->invalidateCachedPointerInfo(V); 1446 VN.erase(LI); 1447 toErase.push_back(LI); 1448 ++NumPRELoad; 1449 return true; 1450} 1451 1452/// processLoad - Attempt to eliminate a load, first by eliminating it 1453/// locally, and then attempting non-local elimination if that fails. 1454bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1455 if (!MD) 1456 return false; 1457 1458 if (L->isVolatile()) 1459 return false; 1460 1461 // ... to a pointer that has been loaded from before... 1462 MemDepResult Dep = MD->getDependency(L); 1463 1464 // If the value isn't available, don't do anything! 1465 if (Dep.isClobber()) { 1466 // Check to see if we have something like this: 1467 // store i32 123, i32* %P 1468 // %A = bitcast i32* %P to i8* 1469 // %B = gep i8* %A, i32 1 1470 // %C = load i8* %B 1471 // 1472 // We could do that by recognizing if the clobber instructions are obviously 1473 // a common base + constant offset, and if the previous store (or memset) 1474 // completely covers this load. This sort of thing can happen in bitfield 1475 // access code. 1476 Value *AvailVal = 0; 1477 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) 1478 if (TD) { 1479 int Offset = AnalyzeLoadFromClobberingStore(L->getType(), 1480 L->getPointerOperand(), 1481 DepSI, *TD); 1482 if (Offset != -1) 1483 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, 1484 L->getType(), L, *TD); 1485 } 1486 1487 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1488 // a value on from it. 1489 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1490 if (TD) { 1491 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), 1492 L->getPointerOperand(), 1493 DepMI, *TD); 1494 if (Offset != -1) 1495 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); 1496 } 1497 } 1498 1499 if (AvailVal) { 1500 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1501 << *AvailVal << '\n' << *L << "\n\n\n"); 1502 1503 // Replace the load! 1504 L->replaceAllUsesWith(AvailVal); 1505 if (AvailVal->getType()->isPointerTy()) 1506 MD->invalidateCachedPointerInfo(AvailVal); 1507 VN.erase(L); 1508 toErase.push_back(L); 1509 ++NumGVNLoad; 1510 return true; 1511 } 1512 1513 DEBUG( 1514 // fast print dep, using operator<< on instruction would be too slow 1515 dbgs() << "GVN: load "; 1516 WriteAsOperand(dbgs(), L); 1517 Instruction *I = Dep.getInst(); 1518 dbgs() << " is clobbered by " << *I << '\n'; 1519 ); 1520 return false; 1521 } 1522 1523 // If it is defined in another block, try harder. 1524 if (Dep.isNonLocal()) 1525 return processNonLocalLoad(L, toErase); 1526 1527 Instruction *DepInst = Dep.getInst(); 1528 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1529 Value *StoredVal = DepSI->getValueOperand(); 1530 1531 // The store and load are to a must-aliased pointer, but they may not 1532 // actually have the same type. See if we know how to reuse the stored 1533 // value (depending on its type). 1534 if (StoredVal->getType() != L->getType()) { 1535 if (TD) { 1536 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 1537 L, *TD); 1538 if (StoredVal == 0) 1539 return false; 1540 1541 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1542 << '\n' << *L << "\n\n\n"); 1543 } 1544 else 1545 return false; 1546 } 1547 1548 // Remove it! 1549 L->replaceAllUsesWith(StoredVal); 1550 if (StoredVal->getType()->isPointerTy()) 1551 MD->invalidateCachedPointerInfo(StoredVal); 1552 VN.erase(L); 1553 toErase.push_back(L); 1554 ++NumGVNLoad; 1555 return true; 1556 } 1557 1558 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1559 Value *AvailableVal = DepLI; 1560 1561 // The loads are of a must-aliased pointer, but they may not actually have 1562 // the same type. See if we know how to reuse the previously loaded value 1563 // (depending on its type). 1564 if (DepLI->getType() != L->getType()) { 1565 if (TD) { 1566 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); 1567 if (AvailableVal == 0) 1568 return false; 1569 1570 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1571 << "\n" << *L << "\n\n\n"); 1572 } 1573 else 1574 return false; 1575 } 1576 1577 // Remove it! 1578 L->replaceAllUsesWith(AvailableVal); 1579 if (DepLI->getType()->isPointerTy()) 1580 MD->invalidateCachedPointerInfo(DepLI); 1581 VN.erase(L); 1582 toErase.push_back(L); 1583 ++NumGVNLoad; 1584 return true; 1585 } 1586 1587 // If this load really doesn't depend on anything, then we must be loading an 1588 // undef value. This can happen when loading for a fresh allocation with no 1589 // intervening stores, for example. 1590 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { 1591 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1592 VN.erase(L); 1593 toErase.push_back(L); 1594 ++NumGVNLoad; 1595 return true; 1596 } 1597 1598 // If this load occurs either right after a lifetime begin, 1599 // then the loaded value is undefined. 1600 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { 1601 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1602 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1603 VN.erase(L); 1604 toErase.push_back(L); 1605 ++NumGVNLoad; 1606 return true; 1607 } 1608 } 1609 1610 return false; 1611} 1612 1613// lookupNumber - In order to find a leader for a given value number at a 1614// specific basic block, we first obtain the list of all Values for that number, 1615// and then scan the list to find one whose block dominates the block in 1616// question. This is fast because dominator tree queries consist of only 1617// a few comparisons of DFS numbers. 1618Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { 1619 NumberTableEntry Vals = NumberTable[num]; 1620 if (!Vals.Val) return 0; 1621 1622 Value *Val = 0; 1623 if (DT->dominates(Vals.BB, BB)) { 1624 Val = Vals.Val; 1625 if (isa<Constant>(Val)) return Val; 1626 } 1627 1628 NumberTableEntry* Next = Vals.Next; 1629 while (Next) { 1630 if (DT->dominates(Next->BB, BB)) { 1631 if (isa<Constant>(Next->Val)) return Next->Val; 1632 if (!Val) Val = Next->Val; 1633 } 1634 1635 Next = Next->Next; 1636 } 1637 1638 return Val; 1639} 1640 1641 1642/// processInstruction - When calculating availability, handle an instruction 1643/// by inserting it into the appropriate sets 1644bool GVN::processInstruction(Instruction *I, 1645 SmallVectorImpl<Instruction*> &toErase) { 1646 // Ignore dbg info intrinsics. 1647 if (isa<DbgInfoIntrinsic>(I)) 1648 return false; 1649 1650 // If the instruction can be easily simplified then do so now in preference 1651 // to value numbering it. Value numbering often exposes redundancies, for 1652 // example if it determines that %y is equal to %x then the instruction 1653 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 1654 if (Value *V = SimplifyInstruction(I, TD, DT)) { 1655 I->replaceAllUsesWith(V); 1656 if (MD && V->getType()->isPointerTy()) 1657 MD->invalidateCachedPointerInfo(V); 1658 VN.erase(I); 1659 toErase.push_back(I); 1660 return true; 1661 } 1662 1663 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1664 bool Changed = processLoad(LI, toErase); 1665 1666 if (!Changed) { 1667 unsigned Num = VN.lookup_or_add(LI); 1668 insert_table(Num, LI, LI->getParent()); 1669 } 1670 1671 return Changed; 1672 } 1673 1674 // For conditions branches, we can perform simple conditional propagation on 1675 // the condition value itself. 1676 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1677 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 1678 return false; 1679 1680 Value *BranchCond = BI->getCondition(); 1681 uint32_t CondVN = VN.lookup_or_add(BranchCond); 1682 1683 BasicBlock *TrueSucc = BI->getSuccessor(0); 1684 BasicBlock *FalseSucc = BI->getSuccessor(1); 1685 1686 if (TrueSucc->getSinglePredecessor()) 1687 insert_table(CondVN, 1688 ConstantInt::getTrue(TrueSucc->getContext()), 1689 TrueSucc); 1690 if (FalseSucc->getSinglePredecessor()) 1691 insert_table(CondVN, 1692 ConstantInt::getFalse(TrueSucc->getContext()), 1693 FalseSucc); 1694 1695 return false; 1696 } 1697 1698 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1699 unsigned Num = VN.lookup_or_add(I); 1700 1701 // Allocations are always uniquely numbered, so we can save time and memory 1702 // by fast failing them. 1703 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { 1704 insert_table(Num, I, I->getParent()); 1705 return false; 1706 } 1707 1708 // If the number we were assigned was a brand new VN, then we don't 1709 // need to do a lookup to see if the number already exists 1710 // somewhere in the domtree: it can't! 1711 if (Num == NextNum) { 1712 insert_table(Num, I, I->getParent()); 1713 return false; 1714 } 1715 1716 // Perform fast-path value-number based elimination of values inherited from 1717 // dominators. 1718 Value *repl = lookupNumber(I->getParent(), Num); 1719 if (repl == 0) { 1720 // Failure, just remember this instance for future use. 1721 insert_table(Num, I, I->getParent()); 1722 return false; 1723 } 1724 1725 // Remove it! 1726 VN.erase(I); 1727 I->replaceAllUsesWith(repl); 1728 if (MD && repl->getType()->isPointerTy()) 1729 MD->invalidateCachedPointerInfo(repl); 1730 toErase.push_back(I); 1731 return true; 1732} 1733 1734/// runOnFunction - This is the main transformation entry point for a function. 1735bool GVN::runOnFunction(Function& F) { 1736 if (!NoLoads) 1737 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1738 DT = &getAnalysis<DominatorTree>(); 1739 TD = getAnalysisIfAvailable<TargetData>(); 1740 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1741 VN.setMemDep(MD); 1742 VN.setDomTree(DT); 1743 1744 bool Changed = false; 1745 bool ShouldContinue = true; 1746 1747 // Merge unconditional branches, allowing PRE to catch more 1748 // optimization opportunities. 1749 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1750 BasicBlock *BB = FI; 1751 ++FI; 1752 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1753 if (removedBlock) ++NumGVNBlocks; 1754 1755 Changed |= removedBlock; 1756 } 1757 1758 unsigned Iteration = 0; 1759 1760 while (ShouldContinue) { 1761 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 1762 ShouldContinue = iterateOnFunction(F); 1763 if (splitCriticalEdges()) 1764 ShouldContinue = true; 1765 Changed |= ShouldContinue; 1766 ++Iteration; 1767 } 1768 1769 if (EnablePRE) { 1770 bool PREChanged = true; 1771 while (PREChanged) { 1772 PREChanged = performPRE(F); 1773 Changed |= PREChanged; 1774 } 1775 } 1776 // FIXME: Should perform GVN again after PRE does something. PRE can move 1777 // computations into blocks where they become fully redundant. Note that 1778 // we can't do this until PRE's critical edge splitting updates memdep. 1779 // Actually, when this happens, we should just fully integrate PRE into GVN. 1780 1781 cleanupGlobalSets(); 1782 1783 return Changed; 1784} 1785 1786 1787bool GVN::processBlock(BasicBlock *BB) { 1788 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 1789 // incrementing BI before processing an instruction). 1790 SmallVector<Instruction*, 8> toErase; 1791 bool ChangedFunction = false; 1792 1793 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1794 BI != BE;) { 1795 ChangedFunction |= processInstruction(BI, toErase); 1796 if (toErase.empty()) { 1797 ++BI; 1798 continue; 1799 } 1800 1801 // If we need some instructions deleted, do it now. 1802 NumGVNInstr += toErase.size(); 1803 1804 // Avoid iterator invalidation. 1805 bool AtStart = BI == BB->begin(); 1806 if (!AtStart) 1807 --BI; 1808 1809 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 1810 E = toErase.end(); I != E; ++I) { 1811 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 1812 if (MD) MD->removeInstruction(*I); 1813 (*I)->eraseFromParent(); 1814 DEBUG(verifyRemoved(*I)); 1815 } 1816 toErase.clear(); 1817 1818 if (AtStart) 1819 BI = BB->begin(); 1820 else 1821 ++BI; 1822 } 1823 1824 return ChangedFunction; 1825} 1826 1827/// performPRE - Perform a purely local form of PRE that looks for diamond 1828/// control flow patterns and attempts to perform simple PRE at the join point. 1829bool GVN::performPRE(Function &F) { 1830 bool Changed = false; 1831 DenseMap<BasicBlock*, Value*> predMap; 1832 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 1833 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 1834 BasicBlock *CurrentBlock = *DI; 1835 1836 // Nothing to PRE in the entry block. 1837 if (CurrentBlock == &F.getEntryBlock()) continue; 1838 1839 for (BasicBlock::iterator BI = CurrentBlock->begin(), 1840 BE = CurrentBlock->end(); BI != BE; ) { 1841 Instruction *CurInst = BI++; 1842 1843 if (isa<AllocaInst>(CurInst) || 1844 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 1845 CurInst->getType()->isVoidTy() || 1846 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 1847 isa<DbgInfoIntrinsic>(CurInst)) 1848 continue; 1849 1850 // We don't currently value number ANY inline asm calls. 1851 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 1852 if (CallI->isInlineAsm()) 1853 continue; 1854 1855 uint32_t ValNo = VN.lookup(CurInst); 1856 1857 // Look for the predecessors for PRE opportunities. We're 1858 // only trying to solve the basic diamond case, where 1859 // a value is computed in the successor and one predecessor, 1860 // but not the other. We also explicitly disallow cases 1861 // where the successor is its own predecessor, because they're 1862 // more complicated to get right. 1863 unsigned NumWith = 0; 1864 unsigned NumWithout = 0; 1865 BasicBlock *PREPred = 0; 1866 predMap.clear(); 1867 1868 for (pred_iterator PI = pred_begin(CurrentBlock), 1869 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 1870 BasicBlock *P = *PI; 1871 // We're not interested in PRE where the block is its 1872 // own predecessor, or in blocks with predecessors 1873 // that are not reachable. 1874 if (P == CurrentBlock) { 1875 NumWithout = 2; 1876 break; 1877 } else if (!DT->dominates(&F.getEntryBlock(), P)) { 1878 NumWithout = 2; 1879 break; 1880 } 1881 1882 Value* predV = lookupNumber(P, ValNo); 1883 if (predV == 0) { 1884 PREPred = P; 1885 ++NumWithout; 1886 } else if (predV == CurInst) { 1887 NumWithout = 2; 1888 } else { 1889 predMap[P] = predV; 1890 ++NumWith; 1891 } 1892 } 1893 1894 // Don't do PRE when it might increase code size, i.e. when 1895 // we would need to insert instructions in more than one pred. 1896 if (NumWithout != 1 || NumWith == 0) 1897 continue; 1898 1899 // Don't do PRE across indirect branch. 1900 if (isa<IndirectBrInst>(PREPred->getTerminator())) 1901 continue; 1902 1903 // We can't do PRE safely on a critical edge, so instead we schedule 1904 // the edge to be split and perform the PRE the next time we iterate 1905 // on the function. 1906 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 1907 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 1908 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 1909 continue; 1910 } 1911 1912 // Instantiate the expression in the predecessor that lacked it. 1913 // Because we are going top-down through the block, all value numbers 1914 // will be available in the predecessor by the time we need them. Any 1915 // that weren't originally present will have been instantiated earlier 1916 // in this loop. 1917 Instruction *PREInstr = CurInst->clone(); 1918 bool success = true; 1919 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 1920 Value *Op = PREInstr->getOperand(i); 1921 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 1922 continue; 1923 1924 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { 1925 PREInstr->setOperand(i, V); 1926 } else { 1927 success = false; 1928 break; 1929 } 1930 } 1931 1932 // Fail out if we encounter an operand that is not available in 1933 // the PRE predecessor. This is typically because of loads which 1934 // are not value numbered precisely. 1935 if (!success) { 1936 delete PREInstr; 1937 DEBUG(verifyRemoved(PREInstr)); 1938 continue; 1939 } 1940 1941 PREInstr->insertBefore(PREPred->getTerminator()); 1942 PREInstr->setName(CurInst->getName() + ".pre"); 1943 predMap[PREPred] = PREInstr; 1944 VN.add(PREInstr, ValNo); 1945 ++NumGVNPRE; 1946 1947 // Update the availability map to include the new instruction. 1948 insert_table(ValNo, PREInstr, PREPred); 1949 1950 // Create a PHI to make the value available in this block. 1951 PHINode* Phi = PHINode::Create(CurInst->getType(), 1952 CurInst->getName() + ".pre-phi", 1953 CurrentBlock->begin()); 1954 for (pred_iterator PI = pred_begin(CurrentBlock), 1955 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 1956 BasicBlock *P = *PI; 1957 Phi->addIncoming(predMap[P], P); 1958 } 1959 1960 VN.add(Phi, ValNo); 1961 insert_table(ValNo, Phi, CurrentBlock); 1962 1963 CurInst->replaceAllUsesWith(Phi); 1964 if (Phi->getType()->isPointerTy()) { 1965 // Because we have added a PHI-use of the pointer value, it has now 1966 // "escaped" from alias analysis' perspective. We need to inform 1967 // AA of this. 1968 for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) 1969 VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii)); 1970 1971 if (MD) 1972 MD->invalidateCachedPointerInfo(Phi); 1973 } 1974 VN.erase(CurInst); 1975 erase_table(ValNo, CurInst, CurrentBlock); 1976 1977 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 1978 if (MD) MD->removeInstruction(CurInst); 1979 CurInst->eraseFromParent(); 1980 DEBUG(verifyRemoved(CurInst)); 1981 Changed = true; 1982 } 1983 } 1984 1985 if (splitCriticalEdges()) 1986 Changed = true; 1987 1988 return Changed; 1989} 1990 1991/// splitCriticalEdges - Split critical edges found during the previous 1992/// iteration that may enable further optimization. 1993bool GVN::splitCriticalEdges() { 1994 if (toSplit.empty()) 1995 return false; 1996 do { 1997 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 1998 SplitCriticalEdge(Edge.first, Edge.second, this); 1999 } while (!toSplit.empty()); 2000 if (MD) MD->invalidateCachedPredecessors(); 2001 return true; 2002} 2003 2004/// iterateOnFunction - Executes one iteration of GVN 2005bool GVN::iterateOnFunction(Function &F) { 2006 cleanupGlobalSets(); 2007 2008 // Top-down walk of the dominator tree 2009 bool Changed = false; 2010#if 0 2011 // Needed for value numbering with phi construction to work. 2012 ReversePostOrderTraversal<Function*> RPOT(&F); 2013 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 2014 RE = RPOT.end(); RI != RE; ++RI) 2015 Changed |= processBlock(*RI); 2016#else 2017 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2018 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 2019 Changed |= processBlock(DI->getBlock()); 2020#endif 2021 2022 return Changed; 2023} 2024 2025void GVN::cleanupGlobalSets() { 2026 VN.clear(); 2027 NumberTable.clear(); 2028 TableAllocator.Reset(); 2029} 2030 2031/// verifyRemoved - Verify that the specified instruction does not occur in our 2032/// internal data structures. 2033void GVN::verifyRemoved(const Instruction *Inst) const { 2034 VN.verifyRemoved(Inst); 2035 2036 // Walk through the value number scope to make sure the instruction isn't 2037 // ferreted away in it. 2038 for (DenseMap<uint32_t, NumberTableEntry>::const_iterator 2039 I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) { 2040 const NumberTableEntry *Node = &I->second; 2041 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2042 2043 while (Node->Next) { 2044 Node = Node->Next; 2045 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2046 } 2047 } 2048} 2049