GVN.cpp revision 2802c7536f1f52a4028f8ba7c5406c2120f899b5
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/Constants.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/GlobalVariable.h" 23#include "llvm/Function.h" 24#include "llvm/IntrinsicInst.h" 25#include "llvm/LLVMContext.h" 26#include "llvm/Operator.h" 27#include "llvm/Analysis/AliasAnalysis.h" 28#include "llvm/Analysis/ConstantFolding.h" 29#include "llvm/Analysis/Dominators.h" 30#include "llvm/Analysis/InstructionSimplify.h" 31#include "llvm/Analysis/Loads.h" 32#include "llvm/Analysis/MemoryBuiltins.h" 33#include "llvm/Analysis/MemoryDependenceAnalysis.h" 34#include "llvm/Analysis/PHITransAddr.h" 35#include "llvm/Analysis/ValueTracking.h" 36#include "llvm/Assembly/Writer.h" 37#include "llvm/Target/TargetData.h" 38#include "llvm/Transforms/Utils/BasicBlockUtils.h" 39#include "llvm/Transforms/Utils/Local.h" 40#include "llvm/Transforms/Utils/SSAUpdater.h" 41#include "llvm/ADT/DenseMap.h" 42#include "llvm/ADT/DepthFirstIterator.h" 43#include "llvm/ADT/PostOrderIterator.h" 44#include "llvm/ADT/SmallPtrSet.h" 45#include "llvm/ADT/Statistic.h" 46#include "llvm/Support/Allocator.h" 47#include "llvm/Support/CFG.h" 48#include "llvm/Support/CommandLine.h" 49#include "llvm/Support/Debug.h" 50#include "llvm/Support/ErrorHandling.h" 51#include "llvm/Support/GetElementPtrTypeIterator.h" 52#include "llvm/Support/IRBuilder.h" 53#include <list> 54using namespace llvm; 55 56STATISTIC(NumGVNInstr, "Number of instructions deleted"); 57STATISTIC(NumGVNLoad, "Number of loads deleted"); 58STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 59STATISTIC(NumGVNBlocks, "Number of blocks merged"); 60STATISTIC(NumPRELoad, "Number of loads PRE'd"); 61 62static cl::opt<bool> EnablePRE("enable-pre", 63 cl::init(true), cl::Hidden); 64static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 65 66//===----------------------------------------------------------------------===// 67// ValueTable Class 68//===----------------------------------------------------------------------===// 69 70/// This class holds the mapping between values and value numbers. It is used 71/// as an efficient mechanism to determine the expression-wise equivalence of 72/// two values. 73namespace { 74 struct Expression { 75 uint32_t opcode; 76 const Type* type; 77 SmallVector<uint32_t, 4> varargs; 78 79 Expression() { } 80 Expression(uint32_t o) : opcode(o) { } 81 82 bool operator==(const Expression &other) const { 83 if (opcode != other.opcode) 84 return false; 85 else if (opcode == ~0U || opcode == ~1U) 86 return true; 87 else if (type != other.type) 88 return false; 89 else if (varargs != other.varargs) 90 return false; 91 return true; 92 } 93 }; 94 95 class ValueTable { 96 private: 97 DenseMap<Value*, uint32_t> valueNumbering; 98 DenseMap<Expression, uint32_t> expressionNumbering; 99 AliasAnalysis* AA; 100 MemoryDependenceAnalysis* MD; 101 DominatorTree* DT; 102 103 uint32_t nextValueNumber; 104 105 Expression create_expression(Instruction* I); 106 uint32_t lookup_or_add_call(CallInst* C); 107 public: 108 ValueTable() : nextValueNumber(1) { } 109 uint32_t lookup_or_add(Value *V); 110 uint32_t lookup(Value *V) const; 111 void add(Value *V, uint32_t num); 112 void clear(); 113 void erase(Value *v); 114 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 115 AliasAnalysis *getAliasAnalysis() const { return AA; } 116 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 117 void setDomTree(DominatorTree* D) { DT = D; } 118 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 119 void verifyRemoved(const Value *) const; 120 }; 121} 122 123namespace llvm { 124template <> struct DenseMapInfo<Expression> { 125 static inline Expression getEmptyKey() { 126 return ~0U; 127 } 128 129 static inline Expression getTombstoneKey() { 130 return ~1U; 131 } 132 133 static unsigned getHashValue(const Expression e) { 134 unsigned hash = e.opcode; 135 136 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 137 (unsigned)((uintptr_t)e.type >> 9)); 138 139 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 140 E = e.varargs.end(); I != E; ++I) 141 hash = *I + hash * 37; 142 143 return hash; 144 } 145 static bool isEqual(const Expression &LHS, const Expression &RHS) { 146 return LHS == RHS; 147 } 148}; 149 150} 151 152//===----------------------------------------------------------------------===// 153// ValueTable Internal Functions 154//===----------------------------------------------------------------------===// 155 156 157Expression ValueTable::create_expression(Instruction *I) { 158 Expression e; 159 e.type = I->getType(); 160 e.opcode = I->getOpcode(); 161 for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); 162 OI != OE; ++OI) 163 e.varargs.push_back(lookup_or_add(*OI)); 164 165 if (CmpInst *C = dyn_cast<CmpInst>(I)) 166 e.opcode = (C->getOpcode() << 8) | C->getPredicate(); 167 else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) { 168 for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 169 II != IE; ++II) 170 e.varargs.push_back(*II); 171 } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { 172 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 173 II != IE; ++II) 174 e.varargs.push_back(*II); 175 } 176 177 return e; 178} 179 180//===----------------------------------------------------------------------===// 181// ValueTable External Functions 182//===----------------------------------------------------------------------===// 183 184/// add - Insert a value into the table with a specified value number. 185void ValueTable::add(Value *V, uint32_t num) { 186 valueNumbering.insert(std::make_pair(V, num)); 187} 188 189uint32_t ValueTable::lookup_or_add_call(CallInst* C) { 190 if (AA->doesNotAccessMemory(C)) { 191 Expression exp = create_expression(C); 192 uint32_t& e = expressionNumbering[exp]; 193 if (!e) e = nextValueNumber++; 194 valueNumbering[C] = e; 195 return e; 196 } else if (AA->onlyReadsMemory(C)) { 197 Expression exp = create_expression(C); 198 uint32_t& e = expressionNumbering[exp]; 199 if (!e) { 200 e = nextValueNumber++; 201 valueNumbering[C] = e; 202 return e; 203 } 204 if (!MD) { 205 e = nextValueNumber++; 206 valueNumbering[C] = e; 207 return e; 208 } 209 210 MemDepResult local_dep = MD->getDependency(C); 211 212 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 213 valueNumbering[C] = nextValueNumber; 214 return nextValueNumber++; 215 } 216 217 if (local_dep.isDef()) { 218 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 219 220 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 221 valueNumbering[C] = nextValueNumber; 222 return nextValueNumber++; 223 } 224 225 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 226 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 227 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); 228 if (c_vn != cd_vn) { 229 valueNumbering[C] = nextValueNumber; 230 return nextValueNumber++; 231 } 232 } 233 234 uint32_t v = lookup_or_add(local_cdep); 235 valueNumbering[C] = v; 236 return v; 237 } 238 239 // Non-local case. 240 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 241 MD->getNonLocalCallDependency(CallSite(C)); 242 // FIXME: call/call dependencies for readonly calls should return def, not 243 // clobber! Move the checking logic to MemDep! 244 CallInst* cdep = 0; 245 246 // Check to see if we have a single dominating call instruction that is 247 // identical to C. 248 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 249 const NonLocalDepEntry *I = &deps[i]; 250 // Ignore non-local dependencies. 251 if (I->getResult().isNonLocal()) 252 continue; 253 254 // We don't handle non-depedencies. If we already have a call, reject 255 // instruction dependencies. 256 if (I->getResult().isClobber() || cdep != 0) { 257 cdep = 0; 258 break; 259 } 260 261 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 262 // FIXME: All duplicated with non-local case. 263 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 264 cdep = NonLocalDepCall; 265 continue; 266 } 267 268 cdep = 0; 269 break; 270 } 271 272 if (!cdep) { 273 valueNumbering[C] = nextValueNumber; 274 return nextValueNumber++; 275 } 276 277 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 278 valueNumbering[C] = nextValueNumber; 279 return nextValueNumber++; 280 } 281 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 282 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 283 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); 284 if (c_vn != cd_vn) { 285 valueNumbering[C] = nextValueNumber; 286 return nextValueNumber++; 287 } 288 } 289 290 uint32_t v = lookup_or_add(cdep); 291 valueNumbering[C] = v; 292 return v; 293 294 } else { 295 valueNumbering[C] = nextValueNumber; 296 return nextValueNumber++; 297 } 298} 299 300/// lookup_or_add - Returns the value number for the specified value, assigning 301/// it a new number if it did not have one before. 302uint32_t ValueTable::lookup_or_add(Value *V) { 303 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 304 if (VI != valueNumbering.end()) 305 return VI->second; 306 307 if (!isa<Instruction>(V)) { 308 valueNumbering[V] = nextValueNumber; 309 return nextValueNumber++; 310 } 311 312 Instruction* I = cast<Instruction>(V); 313 Expression exp; 314 switch (I->getOpcode()) { 315 case Instruction::Call: 316 return lookup_or_add_call(cast<CallInst>(I)); 317 case Instruction::Add: 318 case Instruction::FAdd: 319 case Instruction::Sub: 320 case Instruction::FSub: 321 case Instruction::Mul: 322 case Instruction::FMul: 323 case Instruction::UDiv: 324 case Instruction::SDiv: 325 case Instruction::FDiv: 326 case Instruction::URem: 327 case Instruction::SRem: 328 case Instruction::FRem: 329 case Instruction::Shl: 330 case Instruction::LShr: 331 case Instruction::AShr: 332 case Instruction::And: 333 case Instruction::Or : 334 case Instruction::Xor: 335 case Instruction::ICmp: 336 case Instruction::FCmp: 337 case Instruction::Trunc: 338 case Instruction::ZExt: 339 case Instruction::SExt: 340 case Instruction::FPToUI: 341 case Instruction::FPToSI: 342 case Instruction::UIToFP: 343 case Instruction::SIToFP: 344 case Instruction::FPTrunc: 345 case Instruction::FPExt: 346 case Instruction::PtrToInt: 347 case Instruction::IntToPtr: 348 case Instruction::BitCast: 349 case Instruction::Select: 350 case Instruction::ExtractElement: 351 case Instruction::InsertElement: 352 case Instruction::ShuffleVector: 353 case Instruction::ExtractValue: 354 case Instruction::InsertValue: 355 case Instruction::GetElementPtr: 356 exp = create_expression(I); 357 break; 358 default: 359 valueNumbering[V] = nextValueNumber; 360 return nextValueNumber++; 361 } 362 363 uint32_t& e = expressionNumbering[exp]; 364 if (!e) e = nextValueNumber++; 365 valueNumbering[V] = e; 366 return e; 367} 368 369/// lookup - Returns the value number of the specified value. Fails if 370/// the value has not yet been numbered. 371uint32_t ValueTable::lookup(Value *V) const { 372 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 373 assert(VI != valueNumbering.end() && "Value not numbered?"); 374 return VI->second; 375} 376 377/// clear - Remove all entries from the ValueTable 378void ValueTable::clear() { 379 valueNumbering.clear(); 380 expressionNumbering.clear(); 381 nextValueNumber = 1; 382} 383 384/// erase - Remove a value from the value numbering 385void ValueTable::erase(Value *V) { 386 valueNumbering.erase(V); 387} 388 389/// verifyRemoved - Verify that the value is removed from all internal data 390/// structures. 391void ValueTable::verifyRemoved(const Value *V) const { 392 for (DenseMap<Value*, uint32_t>::const_iterator 393 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 394 assert(I->first != V && "Inst still occurs in value numbering map!"); 395 } 396} 397 398//===----------------------------------------------------------------------===// 399// GVN Pass 400//===----------------------------------------------------------------------===// 401 402namespace { 403 404 class GVN : public FunctionPass { 405 bool runOnFunction(Function &F); 406 public: 407 static char ID; // Pass identification, replacement for typeid 408 explicit GVN(bool noloads = false) 409 : FunctionPass(ID), NoLoads(noloads), MD(0) { 410 initializeGVNPass(*PassRegistry::getPassRegistry()); 411 } 412 413 private: 414 bool NoLoads; 415 MemoryDependenceAnalysis *MD; 416 DominatorTree *DT; 417 const TargetData* TD; 418 419 ValueTable VN; 420 421 /// NumberTable - A mapping from value numers to lists of Value*'s that 422 /// have that value number. Use lookupNumber to query it. 423 struct NumberTableEntry { 424 Value *Val; 425 BasicBlock *BB; 426 NumberTableEntry *Next; 427 }; 428 DenseMap<uint32_t, NumberTableEntry> NumberTable; 429 BumpPtrAllocator TableAllocator; 430 431 /// insert_table - Push a new Value to the NumberTable onto the list for 432 /// its value number. 433 void insert_table(uint32_t N, Value *V, BasicBlock *BB) { 434 NumberTableEntry& Curr = NumberTable[N]; 435 if (!Curr.Val) { 436 Curr.Val = V; 437 Curr.BB = BB; 438 return; 439 } 440 441 NumberTableEntry* Node = TableAllocator.Allocate<NumberTableEntry>(); 442 Node->Val = V; 443 Node->BB = BB; 444 Node->Next = Curr.Next; 445 Curr.Next = Node; 446 } 447 448 /// erase_table - Scan the list of values corresponding to a given value 449 /// number, and remove the given value if encountered. 450 void erase_table(uint32_t N, Value *V, BasicBlock *BB) { 451 NumberTableEntry* Prev = 0; 452 NumberTableEntry* Curr = &NumberTable[N]; 453 454 while (Curr->Val != V || Curr->BB != BB) { 455 Prev = Curr; 456 Curr = Curr->Next; 457 } 458 459 if (Prev) { 460 Prev->Next = Curr->Next; 461 } else { 462 if (!Curr->Next) { 463 Curr->Val = 0; 464 Curr->BB = 0; 465 } else { 466 NumberTableEntry* Next = Curr->Next; 467 Curr->Val = Next->Val; 468 Curr->BB = Next->BB; 469 } 470 } 471 } 472 473 // List of critical edges to be split between iterations. 474 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 475 476 // This transformation requires dominator postdominator info 477 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 478 AU.addRequired<DominatorTree>(); 479 if (!NoLoads) 480 AU.addRequired<MemoryDependenceAnalysis>(); 481 AU.addRequired<AliasAnalysis>(); 482 483 AU.addPreserved<DominatorTree>(); 484 AU.addPreserved<AliasAnalysis>(); 485 } 486 487 // Helper fuctions 488 // FIXME: eliminate or document these better 489 bool processLoad(LoadInst* L, 490 SmallVectorImpl<Instruction*> &toErase); 491 bool processInstruction(Instruction *I, 492 SmallVectorImpl<Instruction*> &toErase); 493 bool processNonLocalLoad(LoadInst* L, 494 SmallVectorImpl<Instruction*> &toErase); 495 bool processBlock(BasicBlock *BB); 496 void dump(DenseMap<uint32_t, Value*>& d); 497 bool iterateOnFunction(Function &F); 498 bool performPRE(Function& F); 499 Value *lookupNumber(BasicBlock *BB, uint32_t num); 500 void cleanupGlobalSets(); 501 void verifyRemoved(const Instruction *I) const; 502 bool splitCriticalEdges(); 503 }; 504 505 char GVN::ID = 0; 506} 507 508// createGVNPass - The public interface to this file... 509FunctionPass *llvm::createGVNPass(bool NoLoads) { 510 return new GVN(NoLoads); 511} 512 513INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) 514INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 515INITIALIZE_PASS_DEPENDENCY(DominatorTree) 516INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 517INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) 518 519void GVN::dump(DenseMap<uint32_t, Value*>& d) { 520 errs() << "{\n"; 521 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 522 E = d.end(); I != E; ++I) { 523 errs() << I->first << "\n"; 524 I->second->dump(); 525 } 526 errs() << "}\n"; 527} 528 529/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 530/// we're analyzing is fully available in the specified block. As we go, keep 531/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 532/// map is actually a tri-state map with the following values: 533/// 0) we know the block *is not* fully available. 534/// 1) we know the block *is* fully available. 535/// 2) we do not know whether the block is fully available or not, but we are 536/// currently speculating that it will be. 537/// 3) we are speculating for this block and have used that to speculate for 538/// other blocks. 539static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 540 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 541 // Optimistically assume that the block is fully available and check to see 542 // if we already know about this block in one lookup. 543 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 544 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 545 546 // If the entry already existed for this block, return the precomputed value. 547 if (!IV.second) { 548 // If this is a speculative "available" value, mark it as being used for 549 // speculation of other blocks. 550 if (IV.first->second == 2) 551 IV.first->second = 3; 552 return IV.first->second != 0; 553 } 554 555 // Otherwise, see if it is fully available in all predecessors. 556 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 557 558 // If this block has no predecessors, it isn't live-in here. 559 if (PI == PE) 560 goto SpeculationFailure; 561 562 for (; PI != PE; ++PI) 563 // If the value isn't fully available in one of our predecessors, then it 564 // isn't fully available in this block either. Undo our previous 565 // optimistic assumption and bail out. 566 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 567 goto SpeculationFailure; 568 569 return true; 570 571// SpeculationFailure - If we get here, we found out that this is not, after 572// all, a fully-available block. We have a problem if we speculated on this and 573// used the speculation to mark other blocks as available. 574SpeculationFailure: 575 char &BBVal = FullyAvailableBlocks[BB]; 576 577 // If we didn't speculate on this, just return with it set to false. 578 if (BBVal == 2) { 579 BBVal = 0; 580 return false; 581 } 582 583 // If we did speculate on this value, we could have blocks set to 1 that are 584 // incorrect. Walk the (transitive) successors of this block and mark them as 585 // 0 if set to one. 586 SmallVector<BasicBlock*, 32> BBWorklist; 587 BBWorklist.push_back(BB); 588 589 do { 590 BasicBlock *Entry = BBWorklist.pop_back_val(); 591 // Note that this sets blocks to 0 (unavailable) if they happen to not 592 // already be in FullyAvailableBlocks. This is safe. 593 char &EntryVal = FullyAvailableBlocks[Entry]; 594 if (EntryVal == 0) continue; // Already unavailable. 595 596 // Mark as unavailable. 597 EntryVal = 0; 598 599 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 600 BBWorklist.push_back(*I); 601 } while (!BBWorklist.empty()); 602 603 return false; 604} 605 606 607/// CanCoerceMustAliasedValueToLoad - Return true if 608/// CoerceAvailableValueToLoadType will succeed. 609static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 610 const Type *LoadTy, 611 const TargetData &TD) { 612 // If the loaded or stored value is an first class array or struct, don't try 613 // to transform them. We need to be able to bitcast to integer. 614 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 615 StoredVal->getType()->isStructTy() || 616 StoredVal->getType()->isArrayTy()) 617 return false; 618 619 // The store has to be at least as big as the load. 620 if (TD.getTypeSizeInBits(StoredVal->getType()) < 621 TD.getTypeSizeInBits(LoadTy)) 622 return false; 623 624 return true; 625} 626 627 628/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 629/// then a load from a must-aliased pointer of a different type, try to coerce 630/// the stored value. LoadedTy is the type of the load we want to replace and 631/// InsertPt is the place to insert new instructions. 632/// 633/// If we can't do it, return null. 634static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 635 const Type *LoadedTy, 636 Instruction *InsertPt, 637 const TargetData &TD) { 638 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) 639 return 0; 640 641 const Type *StoredValTy = StoredVal->getType(); 642 643 uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); 644 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); 645 646 // If the store and reload are the same size, we can always reuse it. 647 if (StoreSize == LoadSize) { 648 if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { 649 // Pointer to Pointer -> use bitcast. 650 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 651 } 652 653 // Convert source pointers to integers, which can be bitcast. 654 if (StoredValTy->isPointerTy()) { 655 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 656 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 657 } 658 659 const Type *TypeToCastTo = LoadedTy; 660 if (TypeToCastTo->isPointerTy()) 661 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); 662 663 if (StoredValTy != TypeToCastTo) 664 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 665 666 // Cast to pointer if the load needs a pointer type. 667 if (LoadedTy->isPointerTy()) 668 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 669 670 return StoredVal; 671 } 672 673 // If the loaded value is smaller than the available value, then we can 674 // extract out a piece from it. If the available value is too small, then we 675 // can't do anything. 676 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 677 678 // Convert source pointers to integers, which can be manipulated. 679 if (StoredValTy->isPointerTy()) { 680 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 681 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 682 } 683 684 // Convert vectors and fp to integer, which can be manipulated. 685 if (!StoredValTy->isIntegerTy()) { 686 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 687 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 688 } 689 690 // If this is a big-endian system, we need to shift the value down to the low 691 // bits so that a truncate will work. 692 if (TD.isBigEndian()) { 693 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 694 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 695 } 696 697 // Truncate the integer to the right size now. 698 const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 699 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 700 701 if (LoadedTy == NewIntTy) 702 return StoredVal; 703 704 // If the result is a pointer, inttoptr. 705 if (LoadedTy->isPointerTy()) 706 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 707 708 // Otherwise, bitcast. 709 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 710} 711 712/// AnalyzeLoadFromClobberingWrite - This function is called when we have a 713/// memdep query of a load that ends up being a clobbering memory write (store, 714/// memset, memcpy, memmove). This means that the write *may* provide bits used 715/// by the load but we can't be sure because the pointers don't mustalias. 716/// 717/// Check this case to see if there is anything more we can do before we give 718/// up. This returns -1 if we have to give up, or a byte number in the stored 719/// value of the piece that feeds the load. 720static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, 721 Value *WritePtr, 722 uint64_t WriteSizeInBits, 723 const TargetData &TD) { 724 // If the loaded or stored value is an first class array or struct, don't try 725 // to transform them. We need to be able to bitcast to integer. 726 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 727 return -1; 728 729 int64_t StoreOffset = 0, LoadOffset = 0; 730 Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); 731 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); 732 if (StoreBase != LoadBase) 733 return -1; 734 735 // If the load and store are to the exact same address, they should have been 736 // a must alias. AA must have gotten confused. 737 // FIXME: Study to see if/when this happens. One case is forwarding a memset 738 // to a load from the base of the memset. 739#if 0 740 if (LoadOffset == StoreOffset) { 741 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 742 << "Base = " << *StoreBase << "\n" 743 << "Store Ptr = " << *WritePtr << "\n" 744 << "Store Offs = " << StoreOffset << "\n" 745 << "Load Ptr = " << *LoadPtr << "\n"; 746 abort(); 747 } 748#endif 749 750 // If the load and store don't overlap at all, the store doesn't provide 751 // anything to the load. In this case, they really don't alias at all, AA 752 // must have gotten confused. 753 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); 754 755 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 756 return -1; 757 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 758 LoadSize >>= 3; 759 760 761 bool isAAFailure = false; 762 if (StoreOffset < LoadOffset) 763 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 764 else 765 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 766 767 if (isAAFailure) { 768#if 0 769 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" 770 << "Base = " << *StoreBase << "\n" 771 << "Store Ptr = " << *WritePtr << "\n" 772 << "Store Offs = " << StoreOffset << "\n" 773 << "Load Ptr = " << *LoadPtr << "\n"; 774 abort(); 775#endif 776 return -1; 777 } 778 779 // If the Load isn't completely contained within the stored bits, we don't 780 // have all the bits to feed it. We could do something crazy in the future 781 // (issue a smaller load then merge the bits in) but this seems unlikely to be 782 // valuable. 783 if (StoreOffset > LoadOffset || 784 StoreOffset+StoreSize < LoadOffset+LoadSize) 785 return -1; 786 787 // Okay, we can do this transformation. Return the number of bytes into the 788 // store that the load is. 789 return LoadOffset-StoreOffset; 790} 791 792/// AnalyzeLoadFromClobberingStore - This function is called when we have a 793/// memdep query of a load that ends up being a clobbering store. 794static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, 795 StoreInst *DepSI, 796 const TargetData &TD) { 797 // Cannot handle reading from store of first-class aggregate yet. 798 if (DepSI->getValueOperand()->getType()->isStructTy() || 799 DepSI->getValueOperand()->getType()->isArrayTy()) 800 return -1; 801 802 Value *StorePtr = DepSI->getPointerOperand(); 803 uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 804 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 805 StorePtr, StoreSize, TD); 806} 807 808static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, 809 MemIntrinsic *MI, 810 const TargetData &TD) { 811 // If the mem operation is a non-constant size, we can't handle it. 812 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 813 if (SizeCst == 0) return -1; 814 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 815 816 // If this is memset, we just need to see if the offset is valid in the size 817 // of the memset.. 818 if (MI->getIntrinsicID() == Intrinsic::memset) 819 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 820 MemSizeInBits, TD); 821 822 // If we have a memcpy/memmove, the only case we can handle is if this is a 823 // copy from constant memory. In that case, we can read directly from the 824 // constant memory. 825 MemTransferInst *MTI = cast<MemTransferInst>(MI); 826 827 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 828 if (Src == 0) return -1; 829 830 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src)); 831 if (GV == 0 || !GV->isConstant()) return -1; 832 833 // See if the access is within the bounds of the transfer. 834 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 835 MI->getDest(), MemSizeInBits, TD); 836 if (Offset == -1) 837 return Offset; 838 839 // Otherwise, see if we can constant fold a load from the constant with the 840 // offset applied as appropriate. 841 Src = ConstantExpr::getBitCast(Src, 842 llvm::Type::getInt8PtrTy(Src->getContext())); 843 Constant *OffsetCst = 844 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 845 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 846 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 847 if (ConstantFoldLoadFromConstPtr(Src, &TD)) 848 return Offset; 849 return -1; 850} 851 852 853/// GetStoreValueForLoad - This function is called when we have a 854/// memdep query of a load that ends up being a clobbering store. This means 855/// that the store *may* provide bits used by the load but we can't be sure 856/// because the pointers don't mustalias. Check this case to see if there is 857/// anything more we can do before we give up. 858static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 859 const Type *LoadTy, 860 Instruction *InsertPt, const TargetData &TD){ 861 LLVMContext &Ctx = SrcVal->getType()->getContext(); 862 863 uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 864 uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; 865 866 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 867 868 // Compute which bits of the stored value are being used by the load. Convert 869 // to an integer type to start with. 870 if (SrcVal->getType()->isPointerTy()) 871 SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); 872 if (!SrcVal->getType()->isIntegerTy()) 873 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), 874 "tmp"); 875 876 // Shift the bits to the least significant depending on endianness. 877 unsigned ShiftAmt; 878 if (TD.isLittleEndian()) 879 ShiftAmt = Offset*8; 880 else 881 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 882 883 if (ShiftAmt) 884 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); 885 886 if (LoadSize != StoreSize) 887 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), 888 "tmp"); 889 890 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); 891} 892 893/// GetMemInstValueForLoad - This function is called when we have a 894/// memdep query of a load that ends up being a clobbering mem intrinsic. 895static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 896 const Type *LoadTy, Instruction *InsertPt, 897 const TargetData &TD){ 898 LLVMContext &Ctx = LoadTy->getContext(); 899 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 900 901 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 902 903 // We know that this method is only called when the mem transfer fully 904 // provides the bits for the load. 905 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 906 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 907 // independently of what the offset is. 908 Value *Val = MSI->getValue(); 909 if (LoadSize != 1) 910 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 911 912 Value *OneElt = Val; 913 914 // Splat the value out to the right number of bits. 915 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 916 // If we can double the number of bytes set, do it. 917 if (NumBytesSet*2 <= LoadSize) { 918 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 919 Val = Builder.CreateOr(Val, ShVal); 920 NumBytesSet <<= 1; 921 continue; 922 } 923 924 // Otherwise insert one byte at a time. 925 Value *ShVal = Builder.CreateShl(Val, 1*8); 926 Val = Builder.CreateOr(OneElt, ShVal); 927 ++NumBytesSet; 928 } 929 930 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); 931 } 932 933 // Otherwise, this is a memcpy/memmove from a constant global. 934 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 935 Constant *Src = cast<Constant>(MTI->getSource()); 936 937 // Otherwise, see if we can constant fold a load from the constant with the 938 // offset applied as appropriate. 939 Src = ConstantExpr::getBitCast(Src, 940 llvm::Type::getInt8PtrTy(Src->getContext())); 941 Constant *OffsetCst = 942 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 943 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 944 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 945 return ConstantFoldLoadFromConstPtr(Src, &TD); 946} 947 948namespace { 949 950struct AvailableValueInBlock { 951 /// BB - The basic block in question. 952 BasicBlock *BB; 953 enum ValType { 954 SimpleVal, // A simple offsetted value that is accessed. 955 MemIntrin // A memory intrinsic which is loaded from. 956 }; 957 958 /// V - The value that is live out of the block. 959 PointerIntPair<Value *, 1, ValType> Val; 960 961 /// Offset - The byte offset in Val that is interesting for the load query. 962 unsigned Offset; 963 964 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 965 unsigned Offset = 0) { 966 AvailableValueInBlock Res; 967 Res.BB = BB; 968 Res.Val.setPointer(V); 969 Res.Val.setInt(SimpleVal); 970 Res.Offset = Offset; 971 return Res; 972 } 973 974 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 975 unsigned Offset = 0) { 976 AvailableValueInBlock Res; 977 Res.BB = BB; 978 Res.Val.setPointer(MI); 979 Res.Val.setInt(MemIntrin); 980 Res.Offset = Offset; 981 return Res; 982 } 983 984 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 985 Value *getSimpleValue() const { 986 assert(isSimpleValue() && "Wrong accessor"); 987 return Val.getPointer(); 988 } 989 990 MemIntrinsic *getMemIntrinValue() const { 991 assert(!isSimpleValue() && "Wrong accessor"); 992 return cast<MemIntrinsic>(Val.getPointer()); 993 } 994 995 /// MaterializeAdjustedValue - Emit code into this block to adjust the value 996 /// defined here to the specified type. This handles various coercion cases. 997 Value *MaterializeAdjustedValue(const Type *LoadTy, 998 const TargetData *TD) const { 999 Value *Res; 1000 if (isSimpleValue()) { 1001 Res = getSimpleValue(); 1002 if (Res->getType() != LoadTy) { 1003 assert(TD && "Need target data to handle type mismatch case"); 1004 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), 1005 *TD); 1006 1007 DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 1008 << *getSimpleValue() << '\n' 1009 << *Res << '\n' << "\n\n\n"); 1010 } 1011 } else { 1012 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, 1013 LoadTy, BB->getTerminator(), *TD); 1014 DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1015 << " " << *getMemIntrinValue() << '\n' 1016 << *Res << '\n' << "\n\n\n"); 1017 } 1018 return Res; 1019 } 1020}; 1021 1022} 1023 1024/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 1025/// construct SSA form, allowing us to eliminate LI. This returns the value 1026/// that should be used at LI's definition site. 1027static Value *ConstructSSAForLoadSet(LoadInst *LI, 1028 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1029 const TargetData *TD, 1030 const DominatorTree &DT, 1031 AliasAnalysis *AA) { 1032 // Check for the fully redundant, dominating load case. In this case, we can 1033 // just use the dominating value directly. 1034 if (ValuesPerBlock.size() == 1 && 1035 DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) 1036 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); 1037 1038 // Otherwise, we have to construct SSA form. 1039 SmallVector<PHINode*, 8> NewPHIs; 1040 SSAUpdater SSAUpdate(&NewPHIs); 1041 SSAUpdate.Initialize(LI->getType(), LI->getName()); 1042 1043 const Type *LoadTy = LI->getType(); 1044 1045 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1046 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1047 BasicBlock *BB = AV.BB; 1048 1049 if (SSAUpdate.HasValueForBlock(BB)) 1050 continue; 1051 1052 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); 1053 } 1054 1055 // Perform PHI construction. 1056 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1057 1058 // If new PHI nodes were created, notify alias analysis. 1059 if (V->getType()->isPointerTy()) 1060 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1061 AA->copyValue(LI, NewPHIs[i]); 1062 1063 // Now that we've copied information to the new PHIs, scan through 1064 // them again and inform alias analysis that we've added potentially 1065 // escaping uses to any values that are operands to these PHIs. 1066 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { 1067 PHINode *P = NewPHIs[i]; 1068 for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) 1069 AA->addEscapingUse(P->getOperandUse(2*ii)); 1070 } 1071 1072 return V; 1073} 1074 1075static bool isLifetimeStart(const Instruction *Inst) { 1076 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1077 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1078 return false; 1079} 1080 1081/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 1082/// non-local by performing PHI construction. 1083bool GVN::processNonLocalLoad(LoadInst *LI, 1084 SmallVectorImpl<Instruction*> &toErase) { 1085 // Find the non-local dependencies of the load. 1086 SmallVector<NonLocalDepResult, 64> Deps; 1087 AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); 1088 MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); 1089 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " 1090 // << Deps.size() << *LI << '\n'); 1091 1092 // If we had to process more than one hundred blocks to find the 1093 // dependencies, this load isn't worth worrying about. Optimizing 1094 // it will be too expensive. 1095 if (Deps.size() > 100) 1096 return false; 1097 1098 // If we had a phi translation failure, we'll have a single entry which is a 1099 // clobber in the current block. Reject this early. 1100 if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { 1101 DEBUG( 1102 dbgs() << "GVN: non-local load "; 1103 WriteAsOperand(dbgs(), LI); 1104 dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; 1105 ); 1106 return false; 1107 } 1108 1109 // Filter out useless results (non-locals, etc). Keep track of the blocks 1110 // where we have a value available in repl, also keep track of whether we see 1111 // dependencies that produce an unknown value for the load (such as a call 1112 // that could potentially clobber the load). 1113 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; 1114 SmallVector<BasicBlock*, 16> UnavailableBlocks; 1115 1116 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 1117 BasicBlock *DepBB = Deps[i].getBB(); 1118 MemDepResult DepInfo = Deps[i].getResult(); 1119 1120 if (DepInfo.isClobber()) { 1121 // The address being loaded in this non-local block may not be the same as 1122 // the pointer operand of the load if PHI translation occurs. Make sure 1123 // to consider the right address. 1124 Value *Address = Deps[i].getAddress(); 1125 1126 // If the dependence is to a store that writes to a superset of the bits 1127 // read by the load, we can extract the bits we need for the load from the 1128 // stored value. 1129 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1130 if (TD && Address) { 1131 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, 1132 DepSI, *TD); 1133 if (Offset != -1) { 1134 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1135 DepSI->getValueOperand(), 1136 Offset)); 1137 continue; 1138 } 1139 } 1140 } 1141 1142 // If the clobbering value is a memset/memcpy/memmove, see if we can 1143 // forward a value on from it. 1144 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1145 if (TD && Address) { 1146 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 1147 DepMI, *TD); 1148 if (Offset != -1) { 1149 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1150 Offset)); 1151 continue; 1152 } 1153 } 1154 } 1155 1156 UnavailableBlocks.push_back(DepBB); 1157 continue; 1158 } 1159 1160 Instruction *DepInst = DepInfo.getInst(); 1161 1162 // Loading the allocation -> undef. 1163 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || 1164 // Loading immediately after lifetime begin -> undef. 1165 isLifetimeStart(DepInst)) { 1166 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1167 UndefValue::get(LI->getType()))); 1168 continue; 1169 } 1170 1171 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1172 // Reject loads and stores that are to the same address but are of 1173 // different types if we have to. 1174 if (S->getValueOperand()->getType() != LI->getType()) { 1175 // If the stored value is larger or equal to the loaded value, we can 1176 // reuse it. 1177 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), 1178 LI->getType(), *TD)) { 1179 UnavailableBlocks.push_back(DepBB); 1180 continue; 1181 } 1182 } 1183 1184 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1185 S->getValueOperand())); 1186 continue; 1187 } 1188 1189 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1190 // If the types mismatch and we can't handle it, reject reuse of the load. 1191 if (LD->getType() != LI->getType()) { 1192 // If the stored value is larger or equal to the loaded value, we can 1193 // reuse it. 1194 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ 1195 UnavailableBlocks.push_back(DepBB); 1196 continue; 1197 } 1198 } 1199 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); 1200 continue; 1201 } 1202 1203 UnavailableBlocks.push_back(DepBB); 1204 continue; 1205 } 1206 1207 // If we have no predecessors that produce a known value for this load, exit 1208 // early. 1209 if (ValuesPerBlock.empty()) return false; 1210 1211 // If all of the instructions we depend on produce a known value for this 1212 // load, then it is fully redundant and we can use PHI insertion to compute 1213 // its value. Insert PHIs and remove the fully redundant value now. 1214 if (UnavailableBlocks.empty()) { 1215 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1216 1217 // Perform PHI construction. 1218 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1219 VN.getAliasAnalysis()); 1220 LI->replaceAllUsesWith(V); 1221 1222 if (isa<PHINode>(V)) 1223 V->takeName(LI); 1224 if (V->getType()->isPointerTy()) 1225 MD->invalidateCachedPointerInfo(V); 1226 VN.erase(LI); 1227 toErase.push_back(LI); 1228 ++NumGVNLoad; 1229 return true; 1230 } 1231 1232 if (!EnablePRE || !EnableLoadPRE) 1233 return false; 1234 1235 // Okay, we have *some* definitions of the value. This means that the value 1236 // is available in some of our (transitive) predecessors. Lets think about 1237 // doing PRE of this load. This will involve inserting a new load into the 1238 // predecessor when it's not available. We could do this in general, but 1239 // prefer to not increase code size. As such, we only do this when we know 1240 // that we only have to insert *one* load (which means we're basically moving 1241 // the load, not inserting a new one). 1242 1243 SmallPtrSet<BasicBlock *, 4> Blockers; 1244 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1245 Blockers.insert(UnavailableBlocks[i]); 1246 1247 // Lets find first basic block with more than one predecessor. Walk backwards 1248 // through predecessors if needed. 1249 BasicBlock *LoadBB = LI->getParent(); 1250 BasicBlock *TmpBB = LoadBB; 1251 1252 bool isSinglePred = false; 1253 bool allSingleSucc = true; 1254 while (TmpBB->getSinglePredecessor()) { 1255 isSinglePred = true; 1256 TmpBB = TmpBB->getSinglePredecessor(); 1257 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1258 return false; 1259 if (Blockers.count(TmpBB)) 1260 return false; 1261 1262 // If any of these blocks has more than one successor (i.e. if the edge we 1263 // just traversed was critical), then there are other paths through this 1264 // block along which the load may not be anticipated. Hoisting the load 1265 // above this block would be adding the load to execution paths along 1266 // which it was not previously executed. 1267 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1268 return false; 1269 } 1270 1271 assert(TmpBB); 1272 LoadBB = TmpBB; 1273 1274 // FIXME: It is extremely unclear what this loop is doing, other than 1275 // artificially restricting loadpre. 1276 if (isSinglePred) { 1277 bool isHot = false; 1278 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1279 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1280 if (AV.isSimpleValue()) 1281 // "Hot" Instruction is in some loop (because it dominates its dep. 1282 // instruction). 1283 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) 1284 if (DT->dominates(LI, I)) { 1285 isHot = true; 1286 break; 1287 } 1288 } 1289 1290 // We are interested only in "hot" instructions. We don't want to do any 1291 // mis-optimizations here. 1292 if (!isHot) 1293 return false; 1294 } 1295 1296 // Check to see how many predecessors have the loaded value fully 1297 // available. 1298 DenseMap<BasicBlock*, Value*> PredLoads; 1299 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1300 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1301 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1302 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1303 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1304 1305 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit; 1306 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1307 PI != E; ++PI) { 1308 BasicBlock *Pred = *PI; 1309 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { 1310 continue; 1311 } 1312 PredLoads[Pred] = 0; 1313 1314 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1315 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1316 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1317 << Pred->getName() << "': " << *LI << '\n'); 1318 return false; 1319 } 1320 unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); 1321 NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); 1322 } 1323 } 1324 if (!NeedToSplit.empty()) { 1325 toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); 1326 return false; 1327 } 1328 1329 // Decide whether PRE is profitable for this load. 1330 unsigned NumUnavailablePreds = PredLoads.size(); 1331 assert(NumUnavailablePreds != 0 && 1332 "Fully available value should be eliminated above!"); 1333 1334 // If this load is unavailable in multiple predecessors, reject it. 1335 // FIXME: If we could restructure the CFG, we could make a common pred with 1336 // all the preds that don't have an available LI and insert a new load into 1337 // that one block. 1338 if (NumUnavailablePreds != 1) 1339 return false; 1340 1341 // Check if the load can safely be moved to all the unavailable predecessors. 1342 bool CanDoPRE = true; 1343 SmallVector<Instruction*, 8> NewInsts; 1344 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1345 E = PredLoads.end(); I != E; ++I) { 1346 BasicBlock *UnavailablePred = I->first; 1347 1348 // Do PHI translation to get its value in the predecessor if necessary. The 1349 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1350 1351 // If all preds have a single successor, then we know it is safe to insert 1352 // the load on the pred (?!?), so we can insert code to materialize the 1353 // pointer if it is not available. 1354 PHITransAddr Address(LI->getPointerOperand(), TD); 1355 Value *LoadPtr = 0; 1356 if (allSingleSucc) { 1357 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1358 *DT, NewInsts); 1359 } else { 1360 Address.PHITranslateValue(LoadBB, UnavailablePred, DT); 1361 LoadPtr = Address.getAddr(); 1362 } 1363 1364 // If we couldn't find or insert a computation of this phi translated value, 1365 // we fail PRE. 1366 if (LoadPtr == 0) { 1367 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1368 << *LI->getPointerOperand() << "\n"); 1369 CanDoPRE = false; 1370 break; 1371 } 1372 1373 // Make sure it is valid to move this load here. We have to watch out for: 1374 // @1 = getelementptr (i8* p, ... 1375 // test p and branch if == 0 1376 // load @1 1377 // It is valid to have the getelementptr before the test, even if p can be 0, 1378 // as getelementptr only does address arithmetic. 1379 // If we are not pushing the value through any multiple-successor blocks 1380 // we do not have this case. Otherwise, check that the load is safe to 1381 // put anywhere; this can be improved, but should be conservatively safe. 1382 if (!allSingleSucc && 1383 // FIXME: REEVALUTE THIS. 1384 !isSafeToLoadUnconditionally(LoadPtr, 1385 UnavailablePred->getTerminator(), 1386 LI->getAlignment(), TD)) { 1387 CanDoPRE = false; 1388 break; 1389 } 1390 1391 I->second = LoadPtr; 1392 } 1393 1394 if (!CanDoPRE) { 1395 while (!NewInsts.empty()) 1396 NewInsts.pop_back_val()->eraseFromParent(); 1397 return false; 1398 } 1399 1400 // Okay, we can eliminate this load by inserting a reload in the predecessor 1401 // and using PHI construction to get the value in the other predecessors, do 1402 // it. 1403 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1404 DEBUG(if (!NewInsts.empty()) 1405 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1406 << *NewInsts.back() << '\n'); 1407 1408 // Assign value numbers to the new instructions. 1409 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 1410 // FIXME: We really _ought_ to insert these value numbers into their 1411 // parent's availability map. However, in doing so, we risk getting into 1412 // ordering issues. If a block hasn't been processed yet, we would be 1413 // marking a value as AVAIL-IN, which isn't what we intend. 1414 VN.lookup_or_add(NewInsts[i]); 1415 } 1416 1417 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1418 E = PredLoads.end(); I != E; ++I) { 1419 BasicBlock *UnavailablePred = I->first; 1420 Value *LoadPtr = I->second; 1421 1422 Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1423 LI->getAlignment(), 1424 UnavailablePred->getTerminator()); 1425 1426 // Transfer the old load's TBAA tag to the new load. 1427 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) 1428 NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1429 1430 // Add the newly created load. 1431 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1432 NewLoad)); 1433 MD->invalidateCachedPointerInfo(LoadPtr); 1434 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1435 } 1436 1437 // Perform PHI construction. 1438 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1439 VN.getAliasAnalysis()); 1440 LI->replaceAllUsesWith(V); 1441 if (isa<PHINode>(V)) 1442 V->takeName(LI); 1443 if (V->getType()->isPointerTy()) 1444 MD->invalidateCachedPointerInfo(V); 1445 VN.erase(LI); 1446 toErase.push_back(LI); 1447 ++NumPRELoad; 1448 return true; 1449} 1450 1451/// processLoad - Attempt to eliminate a load, first by eliminating it 1452/// locally, and then attempting non-local elimination if that fails. 1453bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1454 if (!MD) 1455 return false; 1456 1457 if (L->isVolatile()) 1458 return false; 1459 1460 // ... to a pointer that has been loaded from before... 1461 MemDepResult Dep = MD->getDependency(L); 1462 1463 // If the value isn't available, don't do anything! 1464 if (Dep.isClobber()) { 1465 // Check to see if we have something like this: 1466 // store i32 123, i32* %P 1467 // %A = bitcast i32* %P to i8* 1468 // %B = gep i8* %A, i32 1 1469 // %C = load i8* %B 1470 // 1471 // We could do that by recognizing if the clobber instructions are obviously 1472 // a common base + constant offset, and if the previous store (or memset) 1473 // completely covers this load. This sort of thing can happen in bitfield 1474 // access code. 1475 Value *AvailVal = 0; 1476 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) 1477 if (TD) { 1478 int Offset = AnalyzeLoadFromClobberingStore(L->getType(), 1479 L->getPointerOperand(), 1480 DepSI, *TD); 1481 if (Offset != -1) 1482 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, 1483 L->getType(), L, *TD); 1484 } 1485 1486 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1487 // a value on from it. 1488 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1489 if (TD) { 1490 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), 1491 L->getPointerOperand(), 1492 DepMI, *TD); 1493 if (Offset != -1) 1494 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); 1495 } 1496 } 1497 1498 if (AvailVal) { 1499 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1500 << *AvailVal << '\n' << *L << "\n\n\n"); 1501 1502 // Replace the load! 1503 L->replaceAllUsesWith(AvailVal); 1504 if (AvailVal->getType()->isPointerTy()) 1505 MD->invalidateCachedPointerInfo(AvailVal); 1506 VN.erase(L); 1507 toErase.push_back(L); 1508 ++NumGVNLoad; 1509 return true; 1510 } 1511 1512 DEBUG( 1513 // fast print dep, using operator<< on instruction would be too slow 1514 dbgs() << "GVN: load "; 1515 WriteAsOperand(dbgs(), L); 1516 Instruction *I = Dep.getInst(); 1517 dbgs() << " is clobbered by " << *I << '\n'; 1518 ); 1519 return false; 1520 } 1521 1522 // If it is defined in another block, try harder. 1523 if (Dep.isNonLocal()) 1524 return processNonLocalLoad(L, toErase); 1525 1526 Instruction *DepInst = Dep.getInst(); 1527 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1528 Value *StoredVal = DepSI->getValueOperand(); 1529 1530 // The store and load are to a must-aliased pointer, but they may not 1531 // actually have the same type. See if we know how to reuse the stored 1532 // value (depending on its type). 1533 if (StoredVal->getType() != L->getType()) { 1534 if (TD) { 1535 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 1536 L, *TD); 1537 if (StoredVal == 0) 1538 return false; 1539 1540 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1541 << '\n' << *L << "\n\n\n"); 1542 } 1543 else 1544 return false; 1545 } 1546 1547 // Remove it! 1548 L->replaceAllUsesWith(StoredVal); 1549 if (StoredVal->getType()->isPointerTy()) 1550 MD->invalidateCachedPointerInfo(StoredVal); 1551 VN.erase(L); 1552 toErase.push_back(L); 1553 ++NumGVNLoad; 1554 return true; 1555 } 1556 1557 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1558 Value *AvailableVal = DepLI; 1559 1560 // The loads are of a must-aliased pointer, but they may not actually have 1561 // the same type. See if we know how to reuse the previously loaded value 1562 // (depending on its type). 1563 if (DepLI->getType() != L->getType()) { 1564 if (TD) { 1565 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); 1566 if (AvailableVal == 0) 1567 return false; 1568 1569 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1570 << "\n" << *L << "\n\n\n"); 1571 } 1572 else 1573 return false; 1574 } 1575 1576 // Remove it! 1577 L->replaceAllUsesWith(AvailableVal); 1578 if (DepLI->getType()->isPointerTy()) 1579 MD->invalidateCachedPointerInfo(DepLI); 1580 VN.erase(L); 1581 toErase.push_back(L); 1582 ++NumGVNLoad; 1583 return true; 1584 } 1585 1586 // If this load really doesn't depend on anything, then we must be loading an 1587 // undef value. This can happen when loading for a fresh allocation with no 1588 // intervening stores, for example. 1589 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { 1590 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1591 VN.erase(L); 1592 toErase.push_back(L); 1593 ++NumGVNLoad; 1594 return true; 1595 } 1596 1597 // If this load occurs either right after a lifetime begin, 1598 // then the loaded value is undefined. 1599 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { 1600 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1601 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1602 VN.erase(L); 1603 toErase.push_back(L); 1604 ++NumGVNLoad; 1605 return true; 1606 } 1607 } 1608 1609 return false; 1610} 1611 1612// lookupNumber - In order to find a leader for a given value number at a 1613// specific basic block, we first obtain the list of all Values for that number, 1614// and then scan the list to find one whose block dominates the block in 1615// question. This is fast because dominator tree queries consist of only 1616// a few comparisons of DFS numbers. 1617Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { 1618 NumberTableEntry Vals = NumberTable[num]; 1619 if (!Vals.Val) return 0; 1620 1621 Value *Val = 0; 1622 if (DT->dominates(Vals.BB, BB)) { 1623 Val = Vals.Val; 1624 if (isa<Constant>(Val)) return Val; 1625 } 1626 1627 NumberTableEntry* Next = Vals.Next; 1628 while (Next) { 1629 if (DT->dominates(Next->BB, BB)) { 1630 if (isa<Constant>(Next->Val)) return Next->Val; 1631 if (!Val) Val = Next->Val; 1632 } 1633 1634 Next = Next->Next; 1635 } 1636 1637 return Val; 1638} 1639 1640 1641/// processInstruction - When calculating availability, handle an instruction 1642/// by inserting it into the appropriate sets 1643bool GVN::processInstruction(Instruction *I, 1644 SmallVectorImpl<Instruction*> &toErase) { 1645 // Ignore dbg info intrinsics. 1646 if (isa<DbgInfoIntrinsic>(I)) 1647 return false; 1648 1649 // If the instruction can be easily simplified then do so now in preference 1650 // to value numbering it. Value numbering often exposes redundancies, for 1651 // example if it determines that %y is equal to %x then the instruction 1652 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 1653 if (Value *V = SimplifyInstruction(I, TD, DT)) { 1654 I->replaceAllUsesWith(V); 1655 if (MD && V->getType()->isPointerTy()) 1656 MD->invalidateCachedPointerInfo(V); 1657 VN.erase(I); 1658 toErase.push_back(I); 1659 return true; 1660 } 1661 1662 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1663 bool Changed = processLoad(LI, toErase); 1664 1665 if (!Changed) { 1666 unsigned Num = VN.lookup_or_add(LI); 1667 insert_table(Num, LI, LI->getParent()); 1668 } 1669 1670 return Changed; 1671 } 1672 1673 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1674 unsigned Num = VN.lookup_or_add(I); 1675 1676 // For conditions branches, we can perform simple conditional propagation on 1677 // the condition value itself. 1678 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1679 insert_table(Num, I, I->getParent()); 1680 1681 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 1682 return false; 1683 1684 Value *BranchCond = BI->getCondition(); 1685 uint32_t CondVN = VN.lookup_or_add(BranchCond); 1686 1687 BasicBlock *TrueSucc = BI->getSuccessor(0); 1688 BasicBlock *FalseSucc = BI->getSuccessor(1); 1689 1690 if (TrueSucc->getSinglePredecessor()) 1691 insert_table(CondVN, 1692 ConstantInt::getTrue(TrueSucc->getContext()), 1693 TrueSucc); 1694 if (FalseSucc->getSinglePredecessor()) 1695 insert_table(CondVN, 1696 ConstantInt::getFalse(TrueSucc->getContext()), 1697 FalseSucc); 1698 1699 return false; 1700 } 1701 1702 // Allocations are always uniquely numbered, so we can save time and memory 1703 // by fast failing them. 1704 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { 1705 insert_table(Num, I, I->getParent()); 1706 return false; 1707 } 1708 1709 // If the number we were assigned was a brand new VN, then we don't 1710 // need to do a lookup to see if the number already exists 1711 // somewhere in the domtree: it can't! 1712 if (Num == NextNum) { 1713 insert_table(Num, I, I->getParent()); 1714 return false; 1715 } 1716 1717 // Perform fast-path value-number based elimination of values inherited from 1718 // dominators. 1719 Value *repl = lookupNumber(I->getParent(), Num); 1720 if (repl == 0) { 1721 // Failure, just remember this instance for future use. 1722 insert_table(Num, I, I->getParent()); 1723 return false; 1724 } 1725 1726 // Remove it! 1727 VN.erase(I); 1728 I->replaceAllUsesWith(repl); 1729 if (MD && repl->getType()->isPointerTy()) 1730 MD->invalidateCachedPointerInfo(repl); 1731 toErase.push_back(I); 1732 return true; 1733} 1734 1735/// runOnFunction - This is the main transformation entry point for a function. 1736bool GVN::runOnFunction(Function& F) { 1737 if (!NoLoads) 1738 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1739 DT = &getAnalysis<DominatorTree>(); 1740 TD = getAnalysisIfAvailable<TargetData>(); 1741 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1742 VN.setMemDep(MD); 1743 VN.setDomTree(DT); 1744 1745 bool Changed = false; 1746 bool ShouldContinue = true; 1747 1748 // Merge unconditional branches, allowing PRE to catch more 1749 // optimization opportunities. 1750 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1751 BasicBlock *BB = FI; 1752 ++FI; 1753 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1754 if (removedBlock) ++NumGVNBlocks; 1755 1756 Changed |= removedBlock; 1757 } 1758 1759 unsigned Iteration = 0; 1760 1761 while (ShouldContinue) { 1762 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 1763 ShouldContinue = iterateOnFunction(F); 1764 if (splitCriticalEdges()) 1765 ShouldContinue = true; 1766 Changed |= ShouldContinue; 1767 ++Iteration; 1768 } 1769 1770 if (EnablePRE) { 1771 bool PREChanged = true; 1772 while (PREChanged) { 1773 PREChanged = performPRE(F); 1774 Changed |= PREChanged; 1775 } 1776 } 1777 // FIXME: Should perform GVN again after PRE does something. PRE can move 1778 // computations into blocks where they become fully redundant. Note that 1779 // we can't do this until PRE's critical edge splitting updates memdep. 1780 // Actually, when this happens, we should just fully integrate PRE into GVN. 1781 1782 cleanupGlobalSets(); 1783 1784 return Changed; 1785} 1786 1787 1788bool GVN::processBlock(BasicBlock *BB) { 1789 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 1790 // incrementing BI before processing an instruction). 1791 SmallVector<Instruction*, 8> toErase; 1792 bool ChangedFunction = false; 1793 1794 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1795 BI != BE;) { 1796 ChangedFunction |= processInstruction(BI, toErase); 1797 if (toErase.empty()) { 1798 ++BI; 1799 continue; 1800 } 1801 1802 // If we need some instructions deleted, do it now. 1803 NumGVNInstr += toErase.size(); 1804 1805 // Avoid iterator invalidation. 1806 bool AtStart = BI == BB->begin(); 1807 if (!AtStart) 1808 --BI; 1809 1810 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 1811 E = toErase.end(); I != E; ++I) { 1812 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 1813 if (MD) MD->removeInstruction(*I); 1814 (*I)->eraseFromParent(); 1815 DEBUG(verifyRemoved(*I)); 1816 } 1817 toErase.clear(); 1818 1819 if (AtStart) 1820 BI = BB->begin(); 1821 else 1822 ++BI; 1823 } 1824 1825 return ChangedFunction; 1826} 1827 1828/// performPRE - Perform a purely local form of PRE that looks for diamond 1829/// control flow patterns and attempts to perform simple PRE at the join point. 1830bool GVN::performPRE(Function &F) { 1831 bool Changed = false; 1832 DenseMap<BasicBlock*, Value*> predMap; 1833 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 1834 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 1835 BasicBlock *CurrentBlock = *DI; 1836 1837 // Nothing to PRE in the entry block. 1838 if (CurrentBlock == &F.getEntryBlock()) continue; 1839 1840 for (BasicBlock::iterator BI = CurrentBlock->begin(), 1841 BE = CurrentBlock->end(); BI != BE; ) { 1842 Instruction *CurInst = BI++; 1843 1844 if (isa<AllocaInst>(CurInst) || 1845 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 1846 CurInst->getType()->isVoidTy() || 1847 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 1848 isa<DbgInfoIntrinsic>(CurInst)) 1849 continue; 1850 1851 // We don't currently value number ANY inline asm calls. 1852 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 1853 if (CallI->isInlineAsm()) 1854 continue; 1855 1856 uint32_t ValNo = VN.lookup(CurInst); 1857 1858 // Look for the predecessors for PRE opportunities. We're 1859 // only trying to solve the basic diamond case, where 1860 // a value is computed in the successor and one predecessor, 1861 // but not the other. We also explicitly disallow cases 1862 // where the successor is its own predecessor, because they're 1863 // more complicated to get right. 1864 unsigned NumWith = 0; 1865 unsigned NumWithout = 0; 1866 BasicBlock *PREPred = 0; 1867 predMap.clear(); 1868 1869 for (pred_iterator PI = pred_begin(CurrentBlock), 1870 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 1871 BasicBlock *P = *PI; 1872 // We're not interested in PRE where the block is its 1873 // own predecessor, or in blocks with predecessors 1874 // that are not reachable. 1875 if (P == CurrentBlock) { 1876 NumWithout = 2; 1877 break; 1878 } else if (!DT->dominates(&F.getEntryBlock(), P)) { 1879 NumWithout = 2; 1880 break; 1881 } 1882 1883 Value* predV = lookupNumber(P, ValNo); 1884 if (predV == 0) { 1885 PREPred = P; 1886 ++NumWithout; 1887 } else if (predV == CurInst) { 1888 NumWithout = 2; 1889 } else { 1890 predMap[P] = predV; 1891 ++NumWith; 1892 } 1893 } 1894 1895 // Don't do PRE when it might increase code size, i.e. when 1896 // we would need to insert instructions in more than one pred. 1897 if (NumWithout != 1 || NumWith == 0) 1898 continue; 1899 1900 // Don't do PRE across indirect branch. 1901 if (isa<IndirectBrInst>(PREPred->getTerminator())) 1902 continue; 1903 1904 // We can't do PRE safely on a critical edge, so instead we schedule 1905 // the edge to be split and perform the PRE the next time we iterate 1906 // on the function. 1907 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 1908 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 1909 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 1910 continue; 1911 } 1912 1913 // Instantiate the expression in the predecessor that lacked it. 1914 // Because we are going top-down through the block, all value numbers 1915 // will be available in the predecessor by the time we need them. Any 1916 // that weren't originally present will have been instantiated earlier 1917 // in this loop. 1918 Instruction *PREInstr = CurInst->clone(); 1919 bool success = true; 1920 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 1921 Value *Op = PREInstr->getOperand(i); 1922 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 1923 continue; 1924 1925 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { 1926 PREInstr->setOperand(i, V); 1927 } else { 1928 success = false; 1929 break; 1930 } 1931 } 1932 1933 // Fail out if we encounter an operand that is not available in 1934 // the PRE predecessor. This is typically because of loads which 1935 // are not value numbered precisely. 1936 if (!success) { 1937 delete PREInstr; 1938 DEBUG(verifyRemoved(PREInstr)); 1939 continue; 1940 } 1941 1942 PREInstr->insertBefore(PREPred->getTerminator()); 1943 PREInstr->setName(CurInst->getName() + ".pre"); 1944 predMap[PREPred] = PREInstr; 1945 VN.add(PREInstr, ValNo); 1946 ++NumGVNPRE; 1947 1948 // Update the availability map to include the new instruction. 1949 insert_table(ValNo, PREInstr, PREPred); 1950 1951 // Create a PHI to make the value available in this block. 1952 PHINode* Phi = PHINode::Create(CurInst->getType(), 1953 CurInst->getName() + ".pre-phi", 1954 CurrentBlock->begin()); 1955 for (pred_iterator PI = pred_begin(CurrentBlock), 1956 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 1957 BasicBlock *P = *PI; 1958 Phi->addIncoming(predMap[P], P); 1959 } 1960 1961 VN.add(Phi, ValNo); 1962 insert_table(ValNo, Phi, CurrentBlock); 1963 1964 CurInst->replaceAllUsesWith(Phi); 1965 if (Phi->getType()->isPointerTy()) { 1966 // Because we have added a PHI-use of the pointer value, it has now 1967 // "escaped" from alias analysis' perspective. We need to inform 1968 // AA of this. 1969 for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) 1970 VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii)); 1971 1972 if (MD) 1973 MD->invalidateCachedPointerInfo(Phi); 1974 } 1975 VN.erase(CurInst); 1976 erase_table(ValNo, CurInst, CurrentBlock); 1977 1978 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 1979 if (MD) MD->removeInstruction(CurInst); 1980 CurInst->eraseFromParent(); 1981 DEBUG(verifyRemoved(CurInst)); 1982 Changed = true; 1983 } 1984 } 1985 1986 if (splitCriticalEdges()) 1987 Changed = true; 1988 1989 return Changed; 1990} 1991 1992/// splitCriticalEdges - Split critical edges found during the previous 1993/// iteration that may enable further optimization. 1994bool GVN::splitCriticalEdges() { 1995 if (toSplit.empty()) 1996 return false; 1997 do { 1998 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 1999 SplitCriticalEdge(Edge.first, Edge.second, this); 2000 } while (!toSplit.empty()); 2001 if (MD) MD->invalidateCachedPredecessors(); 2002 return true; 2003} 2004 2005/// iterateOnFunction - Executes one iteration of GVN 2006bool GVN::iterateOnFunction(Function &F) { 2007 cleanupGlobalSets(); 2008 2009 // Top-down walk of the dominator tree 2010 bool Changed = false; 2011#if 0 2012 // Needed for value numbering with phi construction to work. 2013 ReversePostOrderTraversal<Function*> RPOT(&F); 2014 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 2015 RE = RPOT.end(); RI != RE; ++RI) 2016 Changed |= processBlock(*RI); 2017#else 2018 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2019 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 2020 Changed |= processBlock(DI->getBlock()); 2021#endif 2022 2023 return Changed; 2024} 2025 2026void GVN::cleanupGlobalSets() { 2027 VN.clear(); 2028 NumberTable.clear(); 2029 TableAllocator.Reset(); 2030} 2031 2032/// verifyRemoved - Verify that the specified instruction does not occur in our 2033/// internal data structures. 2034void GVN::verifyRemoved(const Instruction *Inst) const { 2035 VN.verifyRemoved(Inst); 2036 2037 // Walk through the value number scope to make sure the instruction isn't 2038 // ferreted away in it. 2039 for (DenseMap<uint32_t, NumberTableEntry>::const_iterator 2040 I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) { 2041 const NumberTableEntry *Node = &I->second; 2042 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2043 2044 while (Node->Next) { 2045 Node = Node->Next; 2046 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2047 } 2048 } 2049} 2050