PromoteMemoryToRegister.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file promotes memory references to be register references. It promotes 11// alloca instructions which only have loads and stores as uses. An alloca is 12// transformed by using iterated dominator frontiers to place PHI nodes, then 13// traversing the function in depth-first order to rewrite loads and stores as 14// appropriate. 15// 16// The algorithm used here is based on: 17// 18// Sreedhar and Gao. A linear time algorithm for placing phi-nodes. 19// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of 20// Programming Languages 21// POPL '95. ACM, New York, NY, 62-73. 22// 23// It has been modified to not explicitly use the DJ graph data structure and to 24// directly compute pruned SSA using per-variable liveness information. 25// 26//===----------------------------------------------------------------------===// 27 28#define DEBUG_TYPE "mem2reg" 29#include "llvm/Transforms/Utils/PromoteMemToReg.h" 30#include "llvm/ADT/ArrayRef.h" 31#include "llvm/ADT/DenseMap.h" 32#include "llvm/ADT/STLExtras.h" 33#include "llvm/ADT/SmallPtrSet.h" 34#include "llvm/ADT/SmallVector.h" 35#include "llvm/ADT/Statistic.h" 36#include "llvm/Analysis/AliasSetTracker.h" 37#include "llvm/Analysis/InstructionSimplify.h" 38#include "llvm/Analysis/ValueTracking.h" 39#include "llvm/IR/CFG.h" 40#include "llvm/IR/Constants.h" 41#include "llvm/IR/DIBuilder.h" 42#include "llvm/IR/DebugInfo.h" 43#include "llvm/IR/DerivedTypes.h" 44#include "llvm/IR/Dominators.h" 45#include "llvm/IR/Function.h" 46#include "llvm/IR/Instructions.h" 47#include "llvm/IR/IntrinsicInst.h" 48#include "llvm/IR/Metadata.h" 49#include "llvm/Transforms/Utils/Local.h" 50#include <algorithm> 51#include <queue> 52using namespace llvm; 53 54STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 55STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 56STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 57STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 58 59bool llvm::isAllocaPromotable(const AllocaInst *AI) { 60 // FIXME: If the memory unit is of pointer or integer type, we can permit 61 // assignments to subsections of the memory unit. 62 63 // Only allow direct and non-volatile loads and stores... 64 for (const User *U : AI->users()) { 65 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 66 // Note that atomic loads can be transformed; atomic semantics do 67 // not have any meaning for a local alloca. 68 if (LI->isVolatile()) 69 return false; 70 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 71 if (SI->getOperand(0) == AI) 72 return false; // Don't allow a store OF the AI, only INTO the AI. 73 // Note that atomic stores can be transformed; atomic semantics do 74 // not have any meaning for a local alloca. 75 if (SI->isVolatile()) 76 return false; 77 } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 78 if (II->getIntrinsicID() != Intrinsic::lifetime_start && 79 II->getIntrinsicID() != Intrinsic::lifetime_end) 80 return false; 81 } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 82 if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) 83 return false; 84 if (!onlyUsedByLifetimeMarkers(BCI)) 85 return false; 86 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 87 if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) 88 return false; 89 if (!GEPI->hasAllZeroIndices()) 90 return false; 91 if (!onlyUsedByLifetimeMarkers(GEPI)) 92 return false; 93 } else { 94 return false; 95 } 96 } 97 98 return true; 99} 100 101namespace { 102 103struct AllocaInfo { 104 SmallVector<BasicBlock *, 32> DefiningBlocks; 105 SmallVector<BasicBlock *, 32> UsingBlocks; 106 107 StoreInst *OnlyStore; 108 BasicBlock *OnlyBlock; 109 bool OnlyUsedInOneBlock; 110 111 Value *AllocaPointerVal; 112 DbgDeclareInst *DbgDeclare; 113 114 void clear() { 115 DefiningBlocks.clear(); 116 UsingBlocks.clear(); 117 OnlyStore = 0; 118 OnlyBlock = 0; 119 OnlyUsedInOneBlock = true; 120 AllocaPointerVal = 0; 121 DbgDeclare = 0; 122 } 123 124 /// Scan the uses of the specified alloca, filling in the AllocaInfo used 125 /// by the rest of the pass to reason about the uses of this alloca. 126 void AnalyzeAlloca(AllocaInst *AI) { 127 clear(); 128 129 // As we scan the uses of the alloca instruction, keep track of stores, 130 // and decide whether all of the loads and stores to the alloca are within 131 // the same basic block. 132 for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { 133 Instruction *User = cast<Instruction>(*UI++); 134 135 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 136 // Remember the basic blocks which define new values for the alloca 137 DefiningBlocks.push_back(SI->getParent()); 138 AllocaPointerVal = SI->getOperand(0); 139 OnlyStore = SI; 140 } else { 141 LoadInst *LI = cast<LoadInst>(User); 142 // Otherwise it must be a load instruction, keep track of variable 143 // reads. 144 UsingBlocks.push_back(LI->getParent()); 145 AllocaPointerVal = LI; 146 } 147 148 if (OnlyUsedInOneBlock) { 149 if (OnlyBlock == 0) 150 OnlyBlock = User->getParent(); 151 else if (OnlyBlock != User->getParent()) 152 OnlyUsedInOneBlock = false; 153 } 154 } 155 156 DbgDeclare = FindAllocaDbgDeclare(AI); 157 } 158}; 159 160// Data package used by RenamePass() 161class RenamePassData { 162public: 163 typedef std::vector<Value *> ValVector; 164 165 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 166 RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) 167 : BB(B), Pred(P), Values(V) {} 168 BasicBlock *BB; 169 BasicBlock *Pred; 170 ValVector Values; 171 172 void swap(RenamePassData &RHS) { 173 std::swap(BB, RHS.BB); 174 std::swap(Pred, RHS.Pred); 175 Values.swap(RHS.Values); 176 } 177}; 178 179/// \brief This assigns and keeps a per-bb relative ordering of load/store 180/// instructions in the block that directly load or store an alloca. 181/// 182/// This functionality is important because it avoids scanning large basic 183/// blocks multiple times when promoting many allocas in the same block. 184class LargeBlockInfo { 185 /// \brief For each instruction that we track, keep the index of the 186 /// instruction. 187 /// 188 /// The index starts out as the number of the instruction from the start of 189 /// the block. 190 DenseMap<const Instruction *, unsigned> InstNumbers; 191 192public: 193 194 /// This code only looks at accesses to allocas. 195 static bool isInterestingInstruction(const Instruction *I) { 196 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 197 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 198 } 199 200 /// Get or calculate the index of the specified instruction. 201 unsigned getInstructionIndex(const Instruction *I) { 202 assert(isInterestingInstruction(I) && 203 "Not a load/store to/from an alloca?"); 204 205 // If we already have this instruction number, return it. 206 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 207 if (It != InstNumbers.end()) 208 return It->second; 209 210 // Scan the whole block to get the instruction. This accumulates 211 // information for every interesting instruction in the block, in order to 212 // avoid gratuitus rescans. 213 const BasicBlock *BB = I->getParent(); 214 unsigned InstNo = 0; 215 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E; 216 ++BBI) 217 if (isInterestingInstruction(BBI)) 218 InstNumbers[BBI] = InstNo++; 219 It = InstNumbers.find(I); 220 221 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 222 return It->second; 223 } 224 225 void deleteValue(const Instruction *I) { InstNumbers.erase(I); } 226 227 void clear() { InstNumbers.clear(); } 228}; 229 230struct PromoteMem2Reg { 231 /// The alloca instructions being promoted. 232 std::vector<AllocaInst *> Allocas; 233 DominatorTree &DT; 234 DIBuilder DIB; 235 236 /// An AliasSetTracker object to update. If null, don't update it. 237 AliasSetTracker *AST; 238 239 /// Reverse mapping of Allocas. 240 DenseMap<AllocaInst *, unsigned> AllocaLookup; 241 242 /// \brief The PhiNodes we're adding. 243 /// 244 /// That map is used to simplify some Phi nodes as we iterate over it, so 245 /// it should have deterministic iterators. We could use a MapVector, but 246 /// since we already maintain a map from BasicBlock* to a stable numbering 247 /// (BBNumbers), the DenseMap is more efficient (also supports removal). 248 DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes; 249 250 /// For each PHI node, keep track of which entry in Allocas it corresponds 251 /// to. 252 DenseMap<PHINode *, unsigned> PhiToAllocaMap; 253 254 /// If we are updating an AliasSetTracker, then for each alloca that is of 255 /// pointer type, we keep track of what to copyValue to the inserted PHI 256 /// nodes here. 257 std::vector<Value *> PointerAllocaValues; 258 259 /// For each alloca, we keep track of the dbg.declare intrinsic that 260 /// describes it, if any, so that we can convert it to a dbg.value 261 /// intrinsic if the alloca gets promoted. 262 SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares; 263 264 /// The set of basic blocks the renamer has already visited. 265 /// 266 SmallPtrSet<BasicBlock *, 16> Visited; 267 268 /// Contains a stable numbering of basic blocks to avoid non-determinstic 269 /// behavior. 270 DenseMap<BasicBlock *, unsigned> BBNumbers; 271 272 /// Maps DomTreeNodes to their level in the dominator tree. 273 DenseMap<DomTreeNode *, unsigned> DomLevels; 274 275 /// Lazily compute the number of predecessors a block has. 276 DenseMap<const BasicBlock *, unsigned> BBNumPreds; 277 278public: 279 PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, 280 AliasSetTracker *AST) 281 : Allocas(Allocas.begin(), Allocas.end()), DT(DT), 282 DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {} 283 284 void run(); 285 286private: 287 void RemoveFromAllocasList(unsigned &AllocaIdx) { 288 Allocas[AllocaIdx] = Allocas.back(); 289 Allocas.pop_back(); 290 --AllocaIdx; 291 } 292 293 unsigned getNumPreds(const BasicBlock *BB) { 294 unsigned &NP = BBNumPreds[BB]; 295 if (NP == 0) 296 NP = std::distance(pred_begin(BB), pred_end(BB)) + 1; 297 return NP - 1; 298 } 299 300 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 301 AllocaInfo &Info); 302 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 303 const SmallPtrSet<BasicBlock *, 32> &DefBlocks, 304 SmallPtrSet<BasicBlock *, 32> &LiveInBlocks); 305 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 306 RenamePassData::ValVector &IncVals, 307 std::vector<RenamePassData> &Worklist); 308 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); 309}; 310 311} // end of anonymous namespace 312 313static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { 314 // Knowing that this alloca is promotable, we know that it's safe to kill all 315 // instructions except for load and store. 316 317 for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { 318 Instruction *I = cast<Instruction>(*UI); 319 ++UI; 320 if (isa<LoadInst>(I) || isa<StoreInst>(I)) 321 continue; 322 323 if (!I->getType()->isVoidTy()) { 324 // The only users of this bitcast/GEP instruction are lifetime intrinsics. 325 // Follow the use/def chain to erase them now instead of leaving it for 326 // dead code elimination later. 327 for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { 328 Instruction *Inst = cast<Instruction>(*UUI); 329 ++UUI; 330 Inst->eraseFromParent(); 331 } 332 } 333 I->eraseFromParent(); 334 } 335} 336 337/// \brief Rewrite as many loads as possible given a single store. 338/// 339/// When there is only a single store, we can use the domtree to trivially 340/// replace all of the dominated loads with the stored value. Do so, and return 341/// true if this has successfully promoted the alloca entirely. If this returns 342/// false there were some loads which were not dominated by the single store 343/// and thus must be phi-ed with undef. We fall back to the standard alloca 344/// promotion algorithm in that case. 345static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 346 LargeBlockInfo &LBI, 347 DominatorTree &DT, 348 AliasSetTracker *AST) { 349 StoreInst *OnlyStore = Info.OnlyStore; 350 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 351 BasicBlock *StoreBB = OnlyStore->getParent(); 352 int StoreIndex = -1; 353 354 // Clear out UsingBlocks. We will reconstruct it here if needed. 355 Info.UsingBlocks.clear(); 356 357 for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { 358 Instruction *UserInst = cast<Instruction>(*UI++); 359 if (!isa<LoadInst>(UserInst)) { 360 assert(UserInst == OnlyStore && "Should only have load/stores"); 361 continue; 362 } 363 LoadInst *LI = cast<LoadInst>(UserInst); 364 365 // Okay, if we have a load from the alloca, we want to replace it with the 366 // only value stored to the alloca. We can do this if the value is 367 // dominated by the store. If not, we use the rest of the mem2reg machinery 368 // to insert the phi nodes as needed. 369 if (!StoringGlobalVal) { // Non-instructions are always dominated. 370 if (LI->getParent() == StoreBB) { 371 // If we have a use that is in the same block as the store, compare the 372 // indices of the two instructions to see which one came first. If the 373 // load came before the store, we can't handle it. 374 if (StoreIndex == -1) 375 StoreIndex = LBI.getInstructionIndex(OnlyStore); 376 377 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 378 // Can't handle this load, bail out. 379 Info.UsingBlocks.push_back(StoreBB); 380 continue; 381 } 382 383 } else if (LI->getParent() != StoreBB && 384 !DT.dominates(StoreBB, LI->getParent())) { 385 // If the load and store are in different blocks, use BB dominance to 386 // check their relationships. If the store doesn't dom the use, bail 387 // out. 388 Info.UsingBlocks.push_back(LI->getParent()); 389 continue; 390 } 391 } 392 393 // Otherwise, we *can* safely rewrite this load. 394 Value *ReplVal = OnlyStore->getOperand(0); 395 // If the replacement value is the load, this must occur in unreachable 396 // code. 397 if (ReplVal == LI) 398 ReplVal = UndefValue::get(LI->getType()); 399 LI->replaceAllUsesWith(ReplVal); 400 if (AST && LI->getType()->isPointerTy()) 401 AST->deleteValue(LI); 402 LI->eraseFromParent(); 403 LBI.deleteValue(LI); 404 } 405 406 // Finally, after the scan, check to see if the store is all that is left. 407 if (!Info.UsingBlocks.empty()) 408 return false; // If not, we'll have to fall back for the remainder. 409 410 // Record debuginfo for the store and remove the declaration's 411 // debuginfo. 412 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 413 DIBuilder DIB(*AI->getParent()->getParent()->getParent()); 414 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); 415 DDI->eraseFromParent(); 416 LBI.deleteValue(DDI); 417 } 418 // Remove the (now dead) store and alloca. 419 Info.OnlyStore->eraseFromParent(); 420 LBI.deleteValue(Info.OnlyStore); 421 422 if (AST) 423 AST->deleteValue(AI); 424 AI->eraseFromParent(); 425 LBI.deleteValue(AI); 426 return true; 427} 428 429/// Many allocas are only used within a single basic block. If this is the 430/// case, avoid traversing the CFG and inserting a lot of potentially useless 431/// PHI nodes by just performing a single linear pass over the basic block 432/// using the Alloca. 433/// 434/// If we cannot promote this alloca (because it is read before it is written), 435/// return true. This is necessary in cases where, due to control flow, the 436/// alloca is potentially undefined on some control flow paths. e.g. code like 437/// this is potentially correct: 438/// 439/// for (...) { if (c) { A = undef; undef = B; } } 440/// 441/// ... so long as A is not used before undef is set. 442static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, 443 LargeBlockInfo &LBI, 444 AliasSetTracker *AST) { 445 // The trickiest case to handle is when we have large blocks. Because of this, 446 // this code is optimized assuming that large blocks happen. This does not 447 // significantly pessimize the small block case. This uses LargeBlockInfo to 448 // make it efficient to get the index of various operations in the block. 449 450 // Walk the use-def list of the alloca, getting the locations of all stores. 451 typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; 452 StoresByIndexTy StoresByIndex; 453 454 for (User *U : AI->users()) 455 if (StoreInst *SI = dyn_cast<StoreInst>(U)) 456 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 457 458 // Sort the stores by their index, making it efficient to do a lookup with a 459 // binary search. 460 std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first()); 461 462 // Walk all of the loads from this alloca, replacing them with the nearest 463 // store above them, if any. 464 for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { 465 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 466 if (!LI) 467 continue; 468 469 unsigned LoadIdx = LBI.getInstructionIndex(LI); 470 471 // Find the nearest store that has a lower index than this load. 472 StoresByIndexTy::iterator I = 473 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 474 std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), 475 less_first()); 476 477 if (I == StoresByIndex.begin()) 478 // If there is no store before this load, the load takes the undef value. 479 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 480 else 481 // Otherwise, there was a store before this load, the load takes its value. 482 LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0)); 483 484 if (AST && LI->getType()->isPointerTy()) 485 AST->deleteValue(LI); 486 LI->eraseFromParent(); 487 LBI.deleteValue(LI); 488 } 489 490 // Remove the (now dead) stores and alloca. 491 while (!AI->use_empty()) { 492 StoreInst *SI = cast<StoreInst>(AI->user_back()); 493 // Record debuginfo for the store before removing it. 494 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 495 DIBuilder DIB(*AI->getParent()->getParent()->getParent()); 496 ConvertDebugDeclareToDebugValue(DDI, SI, DIB); 497 } 498 SI->eraseFromParent(); 499 LBI.deleteValue(SI); 500 } 501 502 if (AST) 503 AST->deleteValue(AI); 504 AI->eraseFromParent(); 505 LBI.deleteValue(AI); 506 507 // The alloca's debuginfo can be removed as well. 508 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 509 DDI->eraseFromParent(); 510 LBI.deleteValue(DDI); 511 } 512 513 ++NumLocalPromoted; 514} 515 516void PromoteMem2Reg::run() { 517 Function &F = *DT.getRoot()->getParent(); 518 519 if (AST) 520 PointerAllocaValues.resize(Allocas.size()); 521 AllocaDbgDeclares.resize(Allocas.size()); 522 523 AllocaInfo Info; 524 LargeBlockInfo LBI; 525 526 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 527 AllocaInst *AI = Allocas[AllocaNum]; 528 529 assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); 530 assert(AI->getParent()->getParent() == &F && 531 "All allocas should be in the same function, which is same as DF!"); 532 533 removeLifetimeIntrinsicUsers(AI); 534 535 if (AI->use_empty()) { 536 // If there are no uses of the alloca, just delete it now. 537 if (AST) 538 AST->deleteValue(AI); 539 AI->eraseFromParent(); 540 541 // Remove the alloca from the Allocas list, since it has been processed 542 RemoveFromAllocasList(AllocaNum); 543 ++NumDeadAlloca; 544 continue; 545 } 546 547 // Calculate the set of read and write-locations for each alloca. This is 548 // analogous to finding the 'uses' and 'definitions' of each variable. 549 Info.AnalyzeAlloca(AI); 550 551 // If there is only a single store to this value, replace any loads of 552 // it that are directly dominated by the definition with the value stored. 553 if (Info.DefiningBlocks.size() == 1) { 554 if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { 555 // The alloca has been processed, move on. 556 RemoveFromAllocasList(AllocaNum); 557 ++NumSingleStore; 558 continue; 559 } 560 } 561 562 // If the alloca is only read and written in one basic block, just perform a 563 // linear sweep over the block to eliminate it. 564 if (Info.OnlyUsedInOneBlock) { 565 promoteSingleBlockAlloca(AI, Info, LBI, AST); 566 567 // The alloca has been processed, move on. 568 RemoveFromAllocasList(AllocaNum); 569 continue; 570 } 571 572 // If we haven't computed dominator tree levels, do so now. 573 if (DomLevels.empty()) { 574 SmallVector<DomTreeNode *, 32> Worklist; 575 576 DomTreeNode *Root = DT.getRootNode(); 577 DomLevels[Root] = 0; 578 Worklist.push_back(Root); 579 580 while (!Worklist.empty()) { 581 DomTreeNode *Node = Worklist.pop_back_val(); 582 unsigned ChildLevel = DomLevels[Node] + 1; 583 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); 584 CI != CE; ++CI) { 585 DomLevels[*CI] = ChildLevel; 586 Worklist.push_back(*CI); 587 } 588 } 589 } 590 591 // If we haven't computed a numbering for the BB's in the function, do so 592 // now. 593 if (BBNumbers.empty()) { 594 unsigned ID = 0; 595 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 596 BBNumbers[I] = ID++; 597 } 598 599 // If we have an AST to keep updated, remember some pointer value that is 600 // stored into the alloca. 601 if (AST) 602 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 603 604 // Remember the dbg.declare intrinsic describing this alloca, if any. 605 if (Info.DbgDeclare) 606 AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 607 608 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 609 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 610 611 // At this point, we're committed to promoting the alloca using IDF's, and 612 // the standard SSA construction algorithm. Determine which blocks need PHI 613 // nodes and see if we can optimize out some work by avoiding insertion of 614 // dead phi nodes. 615 DetermineInsertionPoint(AI, AllocaNum, Info); 616 } 617 618 if (Allocas.empty()) 619 return; // All of the allocas must have been trivial! 620 621 LBI.clear(); 622 623 // Set the incoming values for the basic block to be null values for all of 624 // the alloca's. We do this in case there is a load of a value that has not 625 // been stored yet. In this case, it will get this null value. 626 // 627 RenamePassData::ValVector Values(Allocas.size()); 628 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 629 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 630 631 // Walks all basic blocks in the function performing the SSA rename algorithm 632 // and inserting the phi nodes we marked as necessary 633 // 634 std::vector<RenamePassData> RenamePassWorkList; 635 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 636 do { 637 RenamePassData RPD; 638 RPD.swap(RenamePassWorkList.back()); 639 RenamePassWorkList.pop_back(); 640 // RenamePass may add new worklist entries. 641 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 642 } while (!RenamePassWorkList.empty()); 643 644 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 645 Visited.clear(); 646 647 // Remove the allocas themselves from the function. 648 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 649 Instruction *A = Allocas[i]; 650 651 // If there are any uses of the alloca instructions left, they must be in 652 // unreachable basic blocks that were not processed by walking the dominator 653 // tree. Just delete the users now. 654 if (!A->use_empty()) 655 A->replaceAllUsesWith(UndefValue::get(A->getType())); 656 if (AST) 657 AST->deleteValue(A); 658 A->eraseFromParent(); 659 } 660 661 // Remove alloca's dbg.declare instrinsics from the function. 662 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 663 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 664 DDI->eraseFromParent(); 665 666 // Loop over all of the PHI nodes and see if there are any that we can get 667 // rid of because they merge all of the same incoming values. This can 668 // happen due to undef values coming into the PHI nodes. This process is 669 // iterative, because eliminating one PHI node can cause others to be removed. 670 bool EliminatedAPHI = true; 671 while (EliminatedAPHI) { 672 EliminatedAPHI = false; 673 674 // Iterating over NewPhiNodes is deterministic, so it is safe to try to 675 // simplify and RAUW them as we go. If it was not, we could add uses to 676 // the values we replace with in a non-deterministic order, thus creating 677 // non-deterministic def->use chains. 678 for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator 679 I = NewPhiNodes.begin(), 680 E = NewPhiNodes.end(); 681 I != E;) { 682 PHINode *PN = I->second; 683 684 // If this PHI node merges one value and/or undefs, get the value. 685 if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { 686 if (AST && PN->getType()->isPointerTy()) 687 AST->deleteValue(PN); 688 PN->replaceAllUsesWith(V); 689 PN->eraseFromParent(); 690 NewPhiNodes.erase(I++); 691 EliminatedAPHI = true; 692 continue; 693 } 694 ++I; 695 } 696 } 697 698 // At this point, the renamer has added entries to PHI nodes for all reachable 699 // code. Unfortunately, there may be unreachable blocks which the renamer 700 // hasn't traversed. If this is the case, the PHI nodes may not 701 // have incoming values for all predecessors. Loop over all PHI nodes we have 702 // created, inserting undef values if they are missing any incoming values. 703 // 704 for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator 705 I = NewPhiNodes.begin(), 706 E = NewPhiNodes.end(); 707 I != E; ++I) { 708 // We want to do this once per basic block. As such, only process a block 709 // when we find the PHI that is the first entry in the block. 710 PHINode *SomePHI = I->second; 711 BasicBlock *BB = SomePHI->getParent(); 712 if (&BB->front() != SomePHI) 713 continue; 714 715 // Only do work here if there the PHI nodes are missing incoming values. We 716 // know that all PHI nodes that were inserted in a block will have the same 717 // number of incoming values, so we can just check any of them. 718 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 719 continue; 720 721 // Get the preds for BB. 722 SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); 723 724 // Ok, now we know that all of the PHI nodes are missing entries for some 725 // basic blocks. Start by sorting the incoming predecessors for efficient 726 // access. 727 std::sort(Preds.begin(), Preds.end()); 728 729 // Now we loop through all BB's which have entries in SomePHI and remove 730 // them from the Preds list. 731 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 732 // Do a log(n) search of the Preds list for the entry we want. 733 SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( 734 Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); 735 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && 736 "PHI node has entry for a block which is not a predecessor!"); 737 738 // Remove the entry 739 Preds.erase(EntIt); 740 } 741 742 // At this point, the blocks left in the preds list must have dummy 743 // entries inserted into every PHI nodes for the block. Update all the phi 744 // nodes in this block that we are inserting (there could be phis before 745 // mem2reg runs). 746 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 747 BasicBlock::iterator BBI = BB->begin(); 748 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 749 SomePHI->getNumIncomingValues() == NumBadPreds) { 750 Value *UndefVal = UndefValue::get(SomePHI->getType()); 751 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 752 SomePHI->addIncoming(UndefVal, Preds[pred]); 753 } 754 } 755 756 NewPhiNodes.clear(); 757} 758 759/// \brief Determine which blocks the value is live in. 760/// 761/// These are blocks which lead to uses. Knowing this allows us to avoid 762/// inserting PHI nodes into blocks which don't lead to uses (thus, the 763/// inserted phi nodes would be dead). 764void PromoteMem2Reg::ComputeLiveInBlocks( 765 AllocaInst *AI, AllocaInfo &Info, 766 const SmallPtrSet<BasicBlock *, 32> &DefBlocks, 767 SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) { 768 769 // To determine liveness, we must iterate through the predecessors of blocks 770 // where the def is live. Blocks are added to the worklist if we need to 771 // check their predecessors. Start with all the using blocks. 772 SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 773 Info.UsingBlocks.end()); 774 775 // If any of the using blocks is also a definition block, check to see if the 776 // definition occurs before or after the use. If it happens before the use, 777 // the value isn't really live-in. 778 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 779 BasicBlock *BB = LiveInBlockWorklist[i]; 780 if (!DefBlocks.count(BB)) 781 continue; 782 783 // Okay, this is a block that both uses and defines the value. If the first 784 // reference to the alloca is a def (store), then we know it isn't live-in. 785 for (BasicBlock::iterator I = BB->begin();; ++I) { 786 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 787 if (SI->getOperand(1) != AI) 788 continue; 789 790 // We found a store to the alloca before a load. The alloca is not 791 // actually live-in here. 792 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 793 LiveInBlockWorklist.pop_back(); 794 --i, --e; 795 break; 796 } 797 798 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 799 if (LI->getOperand(0) != AI) 800 continue; 801 802 // Okay, we found a load before a store to the alloca. It is actually 803 // live into this block. 804 break; 805 } 806 } 807 } 808 809 // Now that we have a set of blocks where the phi is live-in, recursively add 810 // their predecessors until we find the full region the value is live. 811 while (!LiveInBlockWorklist.empty()) { 812 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 813 814 // The block really is live in here, insert it into the set. If already in 815 // the set, then it has already been processed. 816 if (!LiveInBlocks.insert(BB)) 817 continue; 818 819 // Since the value is live into BB, it is either defined in a predecessor or 820 // live into it to. Add the preds to the worklist unless they are a 821 // defining block. 822 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 823 BasicBlock *P = *PI; 824 825 // The value is not live into a predecessor if it defines the value. 826 if (DefBlocks.count(P)) 827 continue; 828 829 // Otherwise it is, add to the worklist. 830 LiveInBlockWorklist.push_back(P); 831 } 832 } 833} 834 835/// At this point, we're committed to promoting the alloca using IDF's, and the 836/// standard SSA construction algorithm. Determine which blocks need phi nodes 837/// and see if we can optimize out some work by avoiding insertion of dead phi 838/// nodes. 839void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 840 AllocaInfo &Info) { 841 // Unique the set of defining blocks for efficient lookup. 842 SmallPtrSet<BasicBlock *, 32> DefBlocks; 843 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 844 845 // Determine which blocks the value is live in. These are blocks which lead 846 // to uses. 847 SmallPtrSet<BasicBlock *, 32> LiveInBlocks; 848 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 849 850 // Use a priority queue keyed on dominator tree level so that inserted nodes 851 // are handled from the bottom of the dominator tree upwards. 852 typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; 853 typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, 854 less_second> IDFPriorityQueue; 855 IDFPriorityQueue PQ; 856 857 for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(), 858 E = DefBlocks.end(); 859 I != E; ++I) { 860 if (DomTreeNode *Node = DT.getNode(*I)) 861 PQ.push(std::make_pair(Node, DomLevels[Node])); 862 } 863 864 SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; 865 SmallPtrSet<DomTreeNode *, 32> Visited; 866 SmallVector<DomTreeNode *, 32> Worklist; 867 while (!PQ.empty()) { 868 DomTreeNodePair RootPair = PQ.top(); 869 PQ.pop(); 870 DomTreeNode *Root = RootPair.first; 871 unsigned RootLevel = RootPair.second; 872 873 // Walk all dominator tree children of Root, inspecting their CFG edges with 874 // targets elsewhere on the dominator tree. Only targets whose level is at 875 // most Root's level are added to the iterated dominance frontier of the 876 // definition set. 877 878 Worklist.clear(); 879 Worklist.push_back(Root); 880 881 while (!Worklist.empty()) { 882 DomTreeNode *Node = Worklist.pop_back_val(); 883 BasicBlock *BB = Node->getBlock(); 884 885 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; 886 ++SI) { 887 DomTreeNode *SuccNode = DT.getNode(*SI); 888 889 // Quickly skip all CFG edges that are also dominator tree edges instead 890 // of catching them below. 891 if (SuccNode->getIDom() == Node) 892 continue; 893 894 unsigned SuccLevel = DomLevels[SuccNode]; 895 if (SuccLevel > RootLevel) 896 continue; 897 898 if (!Visited.insert(SuccNode)) 899 continue; 900 901 BasicBlock *SuccBB = SuccNode->getBlock(); 902 if (!LiveInBlocks.count(SuccBB)) 903 continue; 904 905 DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); 906 if (!DefBlocks.count(SuccBB)) 907 PQ.push(std::make_pair(SuccNode, SuccLevel)); 908 } 909 910 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; 911 ++CI) { 912 if (!Visited.count(*CI)) 913 Worklist.push_back(*CI); 914 } 915 } 916 } 917 918 if (DFBlocks.size() > 1) 919 std::sort(DFBlocks.begin(), DFBlocks.end()); 920 921 unsigned CurrentVersion = 0; 922 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) 923 QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); 924} 925 926/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. 927/// 928/// Returns true if there wasn't already a phi-node for that variable 929bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 930 unsigned &Version) { 931 // Look up the basic-block in question. 932 PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; 933 934 // If the BB already has a phi node added for the i'th alloca then we're done! 935 if (PN) 936 return false; 937 938 // Create a PhiNode using the dereferenced type... and add the phi-node to the 939 // BasicBlock. 940 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), 941 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 942 BB->begin()); 943 ++NumPHIInsert; 944 PhiToAllocaMap[PN] = AllocaNo; 945 946 if (AST && PN->getType()->isPointerTy()) 947 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 948 949 return true; 950} 951 952/// \brief Recursively traverse the CFG of the function, renaming loads and 953/// stores to the allocas which we are promoting. 954/// 955/// IncomingVals indicates what value each Alloca contains on exit from the 956/// predecessor block Pred. 957void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 958 RenamePassData::ValVector &IncomingVals, 959 std::vector<RenamePassData> &Worklist) { 960NextIteration: 961 // If we are inserting any phi nodes into this BB, they will already be in the 962 // block. 963 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 964 // If we have PHI nodes to update, compute the number of edges from Pred to 965 // BB. 966 if (PhiToAllocaMap.count(APN)) { 967 // We want to be able to distinguish between PHI nodes being inserted by 968 // this invocation of mem2reg from those phi nodes that already existed in 969 // the IR before mem2reg was run. We determine that APN is being inserted 970 // because it is missing incoming edges. All other PHI nodes being 971 // inserted by this pass of mem2reg will have the same number of incoming 972 // operands so far. Remember this count. 973 unsigned NewPHINumOperands = APN->getNumOperands(); 974 975 unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); 976 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 977 978 // Add entries for all the phis. 979 BasicBlock::iterator PNI = BB->begin(); 980 do { 981 unsigned AllocaNo = PhiToAllocaMap[APN]; 982 983 // Add N incoming values to the PHI node. 984 for (unsigned i = 0; i != NumEdges; ++i) 985 APN->addIncoming(IncomingVals[AllocaNo], Pred); 986 987 // The currently active variable for this block is now the PHI. 988 IncomingVals[AllocaNo] = APN; 989 990 // Get the next phi node. 991 ++PNI; 992 APN = dyn_cast<PHINode>(PNI); 993 if (APN == 0) 994 break; 995 996 // Verify that it is missing entries. If not, it is not being inserted 997 // by this mem2reg invocation so we want to ignore it. 998 } while (APN->getNumOperands() == NewPHINumOperands); 999 } 1000 } 1001 1002 // Don't revisit blocks. 1003 if (!Visited.insert(BB)) 1004 return; 1005 1006 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { 1007 Instruction *I = II++; // get the instruction, increment iterator 1008 1009 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1010 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 1011 if (!Src) 1012 continue; 1013 1014 DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); 1015 if (AI == AllocaLookup.end()) 1016 continue; 1017 1018 Value *V = IncomingVals[AI->second]; 1019 1020 // Anything using the load now uses the current value. 1021 LI->replaceAllUsesWith(V); 1022 if (AST && LI->getType()->isPointerTy()) 1023 AST->deleteValue(LI); 1024 BB->getInstList().erase(LI); 1025 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1026 // Delete this instruction and mark the name as the current holder of the 1027 // value 1028 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1029 if (!Dest) 1030 continue; 1031 1032 DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1033 if (ai == AllocaLookup.end()) 1034 continue; 1035 1036 // what value were we writing? 1037 IncomingVals[ai->second] = SI->getOperand(0); 1038 // Record debuginfo for the store before removing it. 1039 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) 1040 ConvertDebugDeclareToDebugValue(DDI, SI, DIB); 1041 BB->getInstList().erase(SI); 1042 } 1043 } 1044 1045 // 'Recurse' to our successors. 1046 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1047 if (I == E) 1048 return; 1049 1050 // Keep track of the successors so we don't visit the same successor twice 1051 SmallPtrSet<BasicBlock *, 8> VisitedSuccs; 1052 1053 // Handle the first successor without using the worklist. 1054 VisitedSuccs.insert(*I); 1055 Pred = BB; 1056 BB = *I; 1057 ++I; 1058 1059 for (; I != E; ++I) 1060 if (VisitedSuccs.insert(*I)) 1061 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1062 1063 goto NextIteration; 1064} 1065 1066void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, 1067 AliasSetTracker *AST) { 1068 // If there is nothing to do, bail out... 1069 if (Allocas.empty()) 1070 return; 1071 1072 PromoteMem2Reg(Allocas, DT, AST).run(); 1073} 1074