PromoteMemoryToRegister.cpp revision 2663ffe7512ccd50ca246d5c8d9ee7a87b33883c
1//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file promotes memory references to be register references. It promotes 11// alloca instructions which only have loads and stores as uses. An alloca is 12// transformed by using dominator frontiers to place PHI nodes, then traversing 13// the function in depth-first order to rewrite loads and stores as appropriate. 14// This is just the standard SSA construction algorithm to construct "pruned" 15// SSA form. 16// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "mem2reg" 20#include "llvm/Transforms/Utils/PromoteMemToReg.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/Instructions.h" 25#include "llvm/Analysis/Dominators.h" 26#include "llvm/Analysis/AliasSetTracker.h" 27#include "llvm/ADT/DenseMap.h" 28#include "llvm/ADT/SmallPtrSet.h" 29#include "llvm/ADT/SmallVector.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/StringExtras.h" 32#include "llvm/Support/CFG.h" 33#include "llvm/Support/Compiler.h" 34#include <algorithm> 35using namespace llvm; 36 37STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 38STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 39STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 40STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 41 42// Provide DenseMapInfo for all pointers. 43namespace llvm { 44template<> 45struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 46 typedef std::pair<BasicBlock*, unsigned> EltTy; 47 static inline EltTy getEmptyKey() { 48 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 49 } 50 static inline EltTy getTombstoneKey() { 51 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 52 } 53 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 54 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 55 } 56 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 57 return LHS == RHS; 58 } 59 static bool isPod() { return true; } 60}; 61} 62 63/// isAllocaPromotable - Return true if this alloca is legal for promotion. 64/// This is true if there are only loads and stores to the alloca. 65/// 66bool llvm::isAllocaPromotable(const AllocaInst *AI) { 67 // FIXME: If the memory unit is of pointer or integer type, we can permit 68 // assignments to subsections of the memory unit. 69 70 // Only allow direct and non-volatile loads and stores... 71 for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end(); 72 UI != UE; ++UI) // Loop over all of the uses of the alloca 73 if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 74 if (LI->isVolatile()) 75 return false; 76 } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 77 if (SI->getOperand(0) == AI) 78 return false; // Don't allow a store OF the AI, only INTO the AI. 79 if (SI->isVolatile()) 80 return false; 81 } else { 82 return false; // Not a load or store. 83 } 84 85 return true; 86} 87 88namespace { 89 struct AllocaInfo; 90 91 // Data package used by RenamePass() 92 class VISIBILITY_HIDDEN RenamePassData { 93 public: 94 typedef std::vector<Value *> ValVector; 95 96 RenamePassData() {} 97 RenamePassData(BasicBlock *B, BasicBlock *P, 98 const ValVector &V) : BB(B), Pred(P), Values(V) {} 99 BasicBlock *BB; 100 BasicBlock *Pred; 101 ValVector Values; 102 103 void swap(RenamePassData &RHS) { 104 std::swap(BB, RHS.BB); 105 std::swap(Pred, RHS.Pred); 106 Values.swap(RHS.Values); 107 } 108 }; 109 110 struct VISIBILITY_HIDDEN PromoteMem2Reg { 111 /// Allocas - The alloca instructions being promoted. 112 /// 113 std::vector<AllocaInst*> Allocas; 114 SmallVector<AllocaInst*, 16> &RetryList; 115 DominatorTree &DT; 116 DominanceFrontier &DF; 117 118 /// AST - An AliasSetTracker object to update. If null, don't update it. 119 /// 120 AliasSetTracker *AST; 121 122 /// AllocaLookup - Reverse mapping of Allocas. 123 /// 124 std::map<AllocaInst*, unsigned> AllocaLookup; 125 126 /// NewPhiNodes - The PhiNodes we're adding. 127 /// 128 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 129 130 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 131 /// it corresponds to. 132 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 133 134 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 135 /// each alloca that is of pointer type, we keep track of what to copyValue 136 /// to the inserted PHI nodes here. 137 /// 138 std::vector<Value*> PointerAllocaValues; 139 140 /// Visited - The set of basic blocks the renamer has already visited. 141 /// 142 SmallPtrSet<BasicBlock*, 16> Visited; 143 144 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 145 /// non-determinstic behavior. 146 DenseMap<BasicBlock*, unsigned> BBNumbers; 147 148 /// BBNumPreds - Lazily compute the number of predecessors a block has. 149 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 150 public: 151 PromoteMem2Reg(const std::vector<AllocaInst*> &A, 152 SmallVector<AllocaInst*, 16> &Retry, DominatorTree &dt, 153 DominanceFrontier &df, AliasSetTracker *ast) 154 : Allocas(A), RetryList(Retry), DT(dt), DF(df), AST(ast) {} 155 156 void run(); 157 158 /// properlyDominates - Return true if I1 properly dominates I2. 159 /// 160 bool properlyDominates(Instruction *I1, Instruction *I2) const { 161 if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) 162 I1 = II->getNormalDest()->begin(); 163 return DT.properlyDominates(I1->getParent(), I2->getParent()); 164 } 165 166 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 167 /// 168 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 169 return DT.dominates(BB1, BB2); 170 } 171 172 private: 173 void RemoveFromAllocasList(unsigned &AllocaIdx) { 174 Allocas[AllocaIdx] = Allocas.back(); 175 Allocas.pop_back(); 176 --AllocaIdx; 177 } 178 179 unsigned getNumPreds(const BasicBlock *BB) { 180 unsigned &NP = BBNumPreds[BB]; 181 if (NP == 0) 182 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 183 return NP-1; 184 } 185 186 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 187 AllocaInfo &Info); 188 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 189 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 190 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 191 192 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info); 193 194 bool PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI); 195 void PromoteLocallyUsedAllocas(BasicBlock *BB, 196 const std::vector<AllocaInst*> &AIs); 197 198 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 199 RenamePassData::ValVector &IncVals, 200 std::vector<RenamePassData> &Worklist); 201 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, 202 SmallPtrSet<PHINode*, 16> &InsertedPHINodes); 203 }; 204 205 struct AllocaInfo { 206 std::vector<BasicBlock*> DefiningBlocks; 207 std::vector<BasicBlock*> UsingBlocks; 208 209 StoreInst *OnlyStore; 210 BasicBlock *OnlyBlock; 211 bool OnlyUsedInOneBlock; 212 213 Value *AllocaPointerVal; 214 215 void clear() { 216 DefiningBlocks.clear(); 217 UsingBlocks.clear(); 218 OnlyStore = 0; 219 OnlyBlock = 0; 220 OnlyUsedInOneBlock = true; 221 AllocaPointerVal = 0; 222 } 223 224 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 225 /// ivars. 226 void AnalyzeAlloca(AllocaInst *AI) { 227 clear(); 228 229 // As we scan the uses of the alloca instruction, keep track of stores, 230 // and decide whether all of the loads and stores to the alloca are within 231 // the same basic block. 232 for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); 233 U != E; ++U) { 234 Instruction *User = cast<Instruction>(*U); 235 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 236 // Remember the basic blocks which define new values for the alloca 237 DefiningBlocks.push_back(SI->getParent()); 238 AllocaPointerVal = SI->getOperand(0); 239 OnlyStore = SI; 240 } else { 241 LoadInst *LI = cast<LoadInst>(User); 242 // Otherwise it must be a load instruction, keep track of variable 243 // reads. 244 UsingBlocks.push_back(LI->getParent()); 245 AllocaPointerVal = LI; 246 } 247 248 if (OnlyUsedInOneBlock) { 249 if (OnlyBlock == 0) 250 OnlyBlock = User->getParent(); 251 else if (OnlyBlock != User->getParent()) 252 OnlyUsedInOneBlock = false; 253 } 254 } 255 } 256 }; 257 258} // end of anonymous namespace 259 260 261void PromoteMem2Reg::run() { 262 Function &F = *DF.getRoot()->getParent(); 263 264 // LocallyUsedAllocas - Keep track of all of the alloca instructions which are 265 // only used in a single basic block. These instructions can be efficiently 266 // promoted by performing a single linear scan over that one block. Since 267 // individual basic blocks are sometimes large, we group together all allocas 268 // that are live in a single basic block by the basic block they are live in. 269 std::map<BasicBlock*, std::vector<AllocaInst*> > LocallyUsedAllocas; 270 271 if (AST) PointerAllocaValues.resize(Allocas.size()); 272 273 AllocaInfo Info; 274 275 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 276 AllocaInst *AI = Allocas[AllocaNum]; 277 278 assert(isAllocaPromotable(AI) && 279 "Cannot promote non-promotable alloca!"); 280 assert(AI->getParent()->getParent() == &F && 281 "All allocas should be in the same function, which is same as DF!"); 282 283 if (AI->use_empty()) { 284 // If there are no uses of the alloca, just delete it now. 285 if (AST) AST->deleteValue(AI); 286 AI->eraseFromParent(); 287 288 // Remove the alloca from the Allocas list, since it has been processed 289 RemoveFromAllocasList(AllocaNum); 290 ++NumDeadAlloca; 291 continue; 292 } 293 294 // Calculate the set of read and write-locations for each alloca. This is 295 // analogous to finding the 'uses' and 'definitions' of each variable. 296 Info.AnalyzeAlloca(AI); 297 298 // If there is only a single store to this value, replace any loads of 299 // it that are directly dominated by the definition with the value stored. 300 if (Info.DefiningBlocks.size() == 1) { 301 RewriteSingleStoreAlloca(AI, Info); 302 303 // Finally, after the scan, check to see if the store is all that is left. 304 if (Info.UsingBlocks.empty()) { 305 // Remove the (now dead) store and alloca. 306 Info.OnlyStore->eraseFromParent(); 307 if (AST) AST->deleteValue(AI); 308 AI->eraseFromParent(); 309 310 // The alloca has been processed, move on. 311 RemoveFromAllocasList(AllocaNum); 312 313 ++NumSingleStore; 314 continue; 315 } 316 } 317 318 // If the alloca is only read and written in one basic block, just perform a 319 // linear sweep over the block to eliminate it. 320 if (Info.OnlyUsedInOneBlock) { 321 LocallyUsedAllocas[Info.OnlyBlock].push_back(AI); 322 323 // Remove the alloca from the Allocas list, since it will be processed. 324 RemoveFromAllocasList(AllocaNum); 325 continue; 326 } 327 328 // If we haven't computed a numbering for the BB's in the function, do so 329 // now. 330 if (BBNumbers.empty()) { 331 unsigned ID = 0; 332 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 333 BBNumbers[I] = ID++; 334 } 335 336 // If we have an AST to keep updated, remember some pointer value that is 337 // stored into the alloca. 338 if (AST) 339 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 340 341 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 342 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 343 344 // At this point, we're committed to promoting the alloca using IDF's, and 345 // the standard SSA construction algorithm. Determine which blocks need phi 346 // nodes and see if we can optimize out some work by avoiding insertion of 347 // dead phi nodes. 348 DetermineInsertionPoint(AI, AllocaNum, Info); 349 } 350 351 // Process all allocas which are only used in a single basic block. 352 for (std::map<BasicBlock*, std::vector<AllocaInst*> >::iterator I = 353 LocallyUsedAllocas.begin(), E = LocallyUsedAllocas.end(); I != E; ++I){ 354 const std::vector<AllocaInst*> &LocAllocas = I->second; 355 assert(!LocAllocas.empty() && "empty alloca list??"); 356 357 // It's common for there to only be one alloca in the list. Handle it 358 // efficiently. 359 if (LocAllocas.size() == 1) { 360 // If we can do the quick promotion pass, do so now. 361 if (PromoteLocallyUsedAlloca(I->first, LocAllocas[0])) 362 RetryList.push_back(LocAllocas[0]); // Failed, retry later. 363 } else { 364 // Locally promote anything possible. Note that if this is unable to 365 // promote a particular alloca, it puts the alloca onto the Allocas vector 366 // for global processing. 367 PromoteLocallyUsedAllocas(I->first, LocAllocas); 368 } 369 } 370 371 if (Allocas.empty()) 372 return; // All of the allocas must have been trivial! 373 374 // Set the incoming values for the basic block to be null values for all of 375 // the alloca's. We do this in case there is a load of a value that has not 376 // been stored yet. In this case, it will get this null value. 377 // 378 RenamePassData::ValVector Values(Allocas.size()); 379 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 380 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 381 382 // Walks all basic blocks in the function performing the SSA rename algorithm 383 // and inserting the phi nodes we marked as necessary 384 // 385 std::vector<RenamePassData> RenamePassWorkList; 386 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 387 while (!RenamePassWorkList.empty()) { 388 RenamePassData RPD; 389 RPD.swap(RenamePassWorkList.back()); 390 RenamePassWorkList.pop_back(); 391 // RenamePass may add new worklist entries. 392 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 393 } 394 395 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 396 Visited.clear(); 397 398 // Remove the allocas themselves from the function. 399 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 400 Instruction *A = Allocas[i]; 401 402 // If there are any uses of the alloca instructions left, they must be in 403 // sections of dead code that were not processed on the dominance frontier. 404 // Just delete the users now. 405 // 406 if (!A->use_empty()) 407 A->replaceAllUsesWith(UndefValue::get(A->getType())); 408 if (AST) AST->deleteValue(A); 409 A->eraseFromParent(); 410 } 411 412 413 // Loop over all of the PHI nodes and see if there are any that we can get 414 // rid of because they merge all of the same incoming values. This can 415 // happen due to undef values coming into the PHI nodes. This process is 416 // iterative, because eliminating one PHI node can cause others to be removed. 417 bool EliminatedAPHI = true; 418 while (EliminatedAPHI) { 419 EliminatedAPHI = false; 420 421 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 422 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 423 PHINode *PN = I->second; 424 425 // If this PHI node merges one value and/or undefs, get the value. 426 if (Value *V = PN->hasConstantValue(true)) { 427 if (!isa<Instruction>(V) || 428 properlyDominates(cast<Instruction>(V), PN)) { 429 if (AST && isa<PointerType>(PN->getType())) 430 AST->deleteValue(PN); 431 PN->replaceAllUsesWith(V); 432 PN->eraseFromParent(); 433 NewPhiNodes.erase(I++); 434 EliminatedAPHI = true; 435 continue; 436 } 437 } 438 ++I; 439 } 440 } 441 442 // At this point, the renamer has added entries to PHI nodes for all reachable 443 // code. Unfortunately, there may be unreachable blocks which the renamer 444 // hasn't traversed. If this is the case, the PHI nodes may not 445 // have incoming values for all predecessors. Loop over all PHI nodes we have 446 // created, inserting undef values if they are missing any incoming values. 447 // 448 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 449 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 450 // We want to do this once per basic block. As such, only process a block 451 // when we find the PHI that is the first entry in the block. 452 PHINode *SomePHI = I->second; 453 BasicBlock *BB = SomePHI->getParent(); 454 if (&BB->front() != SomePHI) 455 continue; 456 457 // Only do work here if there the PHI nodes are missing incoming values. We 458 // know that all PHI nodes that were inserted in a block will have the same 459 // number of incoming values, so we can just check any of them. 460 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 461 continue; 462 463 // Get the preds for BB. 464 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 465 466 // Ok, now we know that all of the PHI nodes are missing entries for some 467 // basic blocks. Start by sorting the incoming predecessors for efficient 468 // access. 469 std::sort(Preds.begin(), Preds.end()); 470 471 // Now we loop through all BB's which have entries in SomePHI and remove 472 // them from the Preds list. 473 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 474 // Do a log(n) search of the Preds list for the entry we want. 475 SmallVector<BasicBlock*, 16>::iterator EntIt = 476 std::lower_bound(Preds.begin(), Preds.end(), 477 SomePHI->getIncomingBlock(i)); 478 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 479 "PHI node has entry for a block which is not a predecessor!"); 480 481 // Remove the entry 482 Preds.erase(EntIt); 483 } 484 485 // At this point, the blocks left in the preds list must have dummy 486 // entries inserted into every PHI nodes for the block. Update all the phi 487 // nodes in this block that we are inserting (there could be phis before 488 // mem2reg runs). 489 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 490 BasicBlock::iterator BBI = BB->begin(); 491 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 492 SomePHI->getNumIncomingValues() == NumBadPreds) { 493 Value *UndefVal = UndefValue::get(SomePHI->getType()); 494 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 495 SomePHI->addIncoming(UndefVal, Preds[pred]); 496 } 497 } 498 499 NewPhiNodes.clear(); 500} 501 502 503/// ComputeLiveInBlocks - Determine which blocks the value is live in. These 504/// are blocks which lead to uses. Knowing this allows us to avoid inserting 505/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 506/// would be dead). 507void PromoteMem2Reg:: 508ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 509 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 510 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 511 512 // To determine liveness, we must iterate through the predecessors of blocks 513 // where the def is live. Blocks are added to the worklist if we need to 514 // check their predecessors. Start with all the using blocks. 515 SmallVector<BasicBlock*, 64> LiveInBlockWorklist; 516 LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), 517 Info.UsingBlocks.begin(), Info.UsingBlocks.end()); 518 519 // If any of the using blocks is also a definition block, check to see if the 520 // definition occurs before or after the use. If it happens before the use, 521 // the value isn't really live-in. 522 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 523 BasicBlock *BB = LiveInBlockWorklist[i]; 524 if (!DefBlocks.count(BB)) continue; 525 526 // Okay, this is a block that both uses and defines the value. If the first 527 // reference to the alloca is a def (store), then we know it isn't live-in. 528 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 529 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 530 if (SI->getOperand(1) != AI) continue; 531 532 // We found a store to the alloca before a load. The alloca is not 533 // actually live-in here. 534 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 535 LiveInBlockWorklist.pop_back(); 536 --i, --e; 537 break; 538 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 539 if (LI->getOperand(0) != AI) continue; 540 541 // Okay, we found a load before a store to the alloca. It is actually 542 // live into this block. 543 break; 544 } 545 } 546 } 547 548 // Now that we have a set of blocks where the phi is live-in, recursively add 549 // their predecessors until we find the full region the value is live. 550 while (!LiveInBlockWorklist.empty()) { 551 BasicBlock *BB = LiveInBlockWorklist.back(); 552 LiveInBlockWorklist.pop_back(); 553 554 // The block really is live in here, insert it into the set. If already in 555 // the set, then it has already been processed. 556 if (!LiveInBlocks.insert(BB)) 557 continue; 558 559 // Since the value is live into BB, it is either defined in a predecessor or 560 // live into it to. Add the preds to the worklist unless they are a 561 // defining block. 562 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 563 BasicBlock *P = *PI; 564 565 // The value is not live into a predecessor if it defines the value. 566 if (DefBlocks.count(P)) 567 continue; 568 569 // Otherwise it is, add to the worklist. 570 LiveInBlockWorklist.push_back(P); 571 } 572 } 573} 574 575/// DetermineInsertionPoint - At this point, we're committed to promoting the 576/// alloca using IDF's, and the standard SSA construction algorithm. Determine 577/// which blocks need phi nodes and see if we can optimize out some work by 578/// avoiding insertion of dead phi nodes. 579void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 580 AllocaInfo &Info) { 581 582 // Unique the set of defining blocks for efficient lookup. 583 SmallPtrSet<BasicBlock*, 32> DefBlocks; 584 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 585 586 // Determine which blocks the value is live in. These are blocks which lead 587 // to uses. 588 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 589 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 590 591 // Compute the locations where PhiNodes need to be inserted. Look at the 592 // dominance frontier of EACH basic-block we have a write in. 593 unsigned CurrentVersion = 0; 594 SmallPtrSet<PHINode*, 16> InsertedPHINodes; 595 std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; 596 while (!Info.DefiningBlocks.empty()) { 597 BasicBlock *BB = Info.DefiningBlocks.back(); 598 Info.DefiningBlocks.pop_back(); 599 600 // Look up the DF for this write, add it to defining blocks. 601 DominanceFrontier::const_iterator it = DF.find(BB); 602 if (it == DF.end()) continue; 603 604 const DominanceFrontier::DomSetType &S = it->second; 605 606 // In theory we don't need the indirection through the DFBlocks vector. 607 // In practice, the order of calling QueuePhiNode would depend on the 608 // (unspecified) ordering of basic blocks in the dominance frontier, 609 // which would give PHI nodes non-determinstic subscripts. Fix this by 610 // processing blocks in order of the occurance in the function. 611 for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), 612 PE = S.end(); P != PE; ++P) { 613 // If the frontier block is not in the live-in set for the alloca, don't 614 // bother processing it. 615 if (!LiveInBlocks.count(*P)) 616 continue; 617 618 DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); 619 } 620 621 // Sort by which the block ordering in the function. 622 if (DFBlocks.size() > 1) 623 std::sort(DFBlocks.begin(), DFBlocks.end()); 624 625 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { 626 BasicBlock *BB = DFBlocks[i].second; 627 if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) 628 Info.DefiningBlocks.push_back(BB); 629 } 630 DFBlocks.clear(); 631 } 632} 633 634 635/// RewriteSingleStoreAlloca - If there is only a single store to this value, 636/// replace any loads of it that are directly dominated by the definition with 637/// the value stored. 638void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 639 AllocaInfo &Info) { 640 StoreInst *OnlyStore = Info.OnlyStore; 641 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 642 643 // Be aware of loads before the store. 644 SmallPtrSet<BasicBlock*, 32> ProcessedBlocks; 645 for (unsigned i = 0, e = Info.UsingBlocks.size(); i != e; ++i) { 646 BasicBlock *UseBlock = Info.UsingBlocks[i]; 647 648 // If we already processed this block, don't reprocess it. 649 if (!ProcessedBlocks.insert(UseBlock)) { 650 Info.UsingBlocks[i] = Info.UsingBlocks.back(); 651 Info.UsingBlocks.pop_back(); 652 --i; --e; 653 continue; 654 } 655 656 // If the store dominates the block and if we haven't processed it yet, 657 // do so now. We can't handle the case where the store doesn't dominate a 658 // block because there may be a path between the store and the use, but we 659 // may need to insert phi nodes to handle dominance properly. 660 if (!StoringGlobalVal && !dominates(OnlyStore->getParent(), UseBlock)) 661 continue; 662 663 // If the use and store are in the same block, do a quick scan to 664 // verify that there are no uses before the store. 665 if (UseBlock == OnlyStore->getParent()) { 666 BasicBlock::iterator I = UseBlock->begin(); 667 for (; &*I != OnlyStore; ++I) { // scan block for store. 668 if (isa<LoadInst>(I) && I->getOperand(0) == AI) 669 break; 670 } 671 if (&*I != OnlyStore) 672 continue; // Do not promote the uses of this in this block. 673 } 674 675 // Otherwise, if this is a different block or if all uses happen 676 // after the store, do a simple linear scan to replace loads with 677 // the stored value. 678 for (BasicBlock::iterator I = UseBlock->begin(), E = UseBlock->end(); 679 I != E; ) { 680 if (LoadInst *LI = dyn_cast<LoadInst>(I++)) { 681 if (LI->getOperand(0) == AI) { 682 LI->replaceAllUsesWith(OnlyStore->getOperand(0)); 683 if (AST && isa<PointerType>(LI->getType())) 684 AST->deleteValue(LI); 685 LI->eraseFromParent(); 686 } 687 } 688 } 689 690 // Finally, remove this block from the UsingBlock set. 691 Info.UsingBlocks[i] = Info.UsingBlocks.back(); 692 Info.UsingBlocks.pop_back(); 693 --i; --e; 694 } 695} 696 697 698/// PromoteLocallyUsedAlloca - Many allocas are only used within a single basic 699/// block. If this is the case, avoid traversing the CFG and inserting a lot of 700/// potentially useless PHI nodes by just performing a single linear pass over 701/// the basic block using the Alloca. 702/// 703/// If we cannot promote this alloca (because it is read before it is written), 704/// return true. This is necessary in cases where, due to control flow, the 705/// alloca is potentially undefined on some control flow paths. e.g. code like 706/// this is potentially correct: 707/// 708/// for (...) { if (c) { A = undef; undef = B; } } 709/// 710/// ... so long as A is not used before undef is set. 711/// 712bool PromoteMem2Reg::PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI) { 713 assert(!AI->use_empty() && "There are no uses of the alloca!"); 714 715 // Handle degenerate cases quickly. 716 if (AI->hasOneUse()) { 717 Instruction *U = cast<Instruction>(AI->use_back()); 718 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 719 // Must be a load of uninitialized value. 720 LI->replaceAllUsesWith(UndefValue::get(AI->getAllocatedType())); 721 if (AST && isa<PointerType>(LI->getType())) 722 AST->deleteValue(LI); 723 } else { 724 // Otherwise it must be a store which is never read. 725 assert(isa<StoreInst>(U)); 726 } 727 BB->getInstList().erase(U); 728 } else { 729 // Uses of the uninitialized memory location shall get undef. 730 Value *CurVal = 0; 731 732 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 733 Instruction *Inst = I++; 734 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 735 if (LI->getOperand(0) == AI) { 736 if (!CurVal) return true; // Could not locally promote! 737 738 // Loads just returns the "current value"... 739 LI->replaceAllUsesWith(CurVal); 740 if (AST && isa<PointerType>(LI->getType())) 741 AST->deleteValue(LI); 742 BB->getInstList().erase(LI); 743 } 744 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 745 if (SI->getOperand(1) == AI) { 746 // Store updates the "current value"... 747 CurVal = SI->getOperand(0); 748 BB->getInstList().erase(SI); 749 } 750 } 751 } 752 } 753 754 // After traversing the basic block, there should be no more uses of the 755 // alloca: remove it now. 756 assert(AI->use_empty() && "Uses of alloca from more than one BB??"); 757 if (AST) AST->deleteValue(AI); 758 AI->eraseFromParent(); 759 760 ++NumLocalPromoted; 761 return false; 762} 763 764/// PromoteLocallyUsedAllocas - This method is just like 765/// PromoteLocallyUsedAlloca, except that it processes multiple alloca 766/// instructions in parallel. This is important in cases where we have large 767/// basic blocks, as we don't want to rescan the entire basic block for each 768/// alloca which is locally used in it (which might be a lot). 769void PromoteMem2Reg:: 770PromoteLocallyUsedAllocas(BasicBlock *BB, const std::vector<AllocaInst*> &AIs) { 771 DenseMap<AllocaInst*, Value*> CurValues; 772 for (unsigned i = 0, e = AIs.size(); i != e; ++i) 773 CurValues[AIs[i]] = 0; // Insert with null value 774 775 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 776 Instruction *Inst = I++; 777 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 778 // Is this a load of an alloca we are tracking? 779 if (AllocaInst *AI = dyn_cast<AllocaInst>(LI->getOperand(0))) { 780 DenseMap<AllocaInst*, Value*>::iterator AIt = CurValues.find(AI); 781 if (AIt != CurValues.end()) { 782 // If loading an uninitialized value, allow the inter-block case to 783 // handle it. Due to control flow, this might actually be ok. 784 if (AIt->second == 0) { // Use of locally uninitialized value?? 785 RetryList.push_back(AI); // Retry elsewhere. 786 CurValues.erase(AIt); // Stop tracking this here. 787 if (CurValues.empty()) return; 788 } else { 789 // Loads just returns the "current value"... 790 LI->replaceAllUsesWith(AIt->second); 791 if (AST && isa<PointerType>(LI->getType())) 792 AST->deleteValue(LI); 793 BB->getInstList().erase(LI); 794 } 795 } 796 } 797 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 798 if (AllocaInst *AI = dyn_cast<AllocaInst>(SI->getOperand(1))) { 799 DenseMap<AllocaInst*, Value*>::iterator AIt = CurValues.find(AI); 800 if (AIt != CurValues.end()) { 801 // Store updates the "current value"... 802 AIt->second = SI->getOperand(0); 803 SI->eraseFromParent(); 804 } 805 } 806 } 807 } 808 809 // At the end of the block scan, all allocas in CurValues are dead. 810 for (DenseMap<AllocaInst*, Value*>::iterator I = CurValues.begin(), 811 E = CurValues.end(); I != E; ++I) { 812 AllocaInst *AI = I->first; 813 assert(AI->use_empty() && "Uses of alloca from more than one BB??"); 814 if (AST) AST->deleteValue(AI); 815 AI->eraseFromParent(); 816 } 817 818 NumLocalPromoted += CurValues.size(); 819} 820 821 822 823// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 824// Alloca returns true if there wasn't already a phi-node for that variable 825// 826bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 827 unsigned &Version, 828 SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { 829 // Look up the basic-block in question. 830 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 831 832 // If the BB already has a phi node added for the i'th alloca then we're done! 833 if (PN) return false; 834 835 // Create a PhiNode using the dereferenced type... and add the phi-node to the 836 // BasicBlock. 837 PN = new PHINode(Allocas[AllocaNo]->getAllocatedType(), 838 Allocas[AllocaNo]->getName() + "." + 839 utostr(Version++), BB->begin()); 840 ++NumPHIInsert; 841 PhiToAllocaMap[PN] = AllocaNo; 842 PN->reserveOperandSpace(getNumPreds(BB)); 843 844 InsertedPHINodes.insert(PN); 845 846 if (AST && isa<PointerType>(PN->getType())) 847 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 848 849 return true; 850} 851 852// RenamePass - Recursively traverse the CFG of the function, renaming loads and 853// stores to the allocas which we are promoting. IncomingVals indicates what 854// value each Alloca contains on exit from the predecessor block Pred. 855// 856void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 857 RenamePassData::ValVector &IncomingVals, 858 std::vector<RenamePassData> &Worklist) { 859NextIteration: 860 // If we are inserting any phi nodes into this BB, they will already be in the 861 // block. 862 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 863 // Pred may have multiple edges to BB. If so, we want to add N incoming 864 // values to each PHI we are inserting on the first time we see the edge. 865 // Check to see if APN already has incoming values from Pred. This also 866 // prevents us from modifying PHI nodes that are not currently being 867 // inserted. 868 bool HasPredEntries = false; 869 for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) { 870 if (APN->getIncomingBlock(i) == Pred) { 871 HasPredEntries = true; 872 break; 873 } 874 } 875 876 // If we have PHI nodes to update, compute the number of edges from Pred to 877 // BB. 878 if (!HasPredEntries) { 879 // We want to be able to distinguish between PHI nodes being inserted by 880 // this invocation of mem2reg from those phi nodes that already existed in 881 // the IR before mem2reg was run. We determine that APN is being inserted 882 // because it is missing incoming edges. All other PHI nodes being 883 // inserted by this pass of mem2reg will have the same number of incoming 884 // operands so far. Remember this count. 885 unsigned NewPHINumOperands = APN->getNumOperands(); 886 887 TerminatorInst *PredTerm = Pred->getTerminator(); 888 unsigned NumEdges = 0; 889 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { 890 if (PredTerm->getSuccessor(i) == BB) 891 ++NumEdges; 892 } 893 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 894 895 // Add entries for all the phis. 896 BasicBlock::iterator PNI = BB->begin(); 897 do { 898 unsigned AllocaNo = PhiToAllocaMap[APN]; 899 900 // Add N incoming values to the PHI node. 901 for (unsigned i = 0; i != NumEdges; ++i) 902 APN->addIncoming(IncomingVals[AllocaNo], Pred); 903 904 // The currently active variable for this block is now the PHI. 905 IncomingVals[AllocaNo] = APN; 906 907 // Get the next phi node. 908 ++PNI; 909 APN = dyn_cast<PHINode>(PNI); 910 if (APN == 0) break; 911 912 // Verify that it is missing entries. If not, it is not being inserted 913 // by this mem2reg invocation so we want to ignore it. 914 } while (APN->getNumOperands() == NewPHINumOperands); 915 } 916 } 917 918 // Don't revisit blocks. 919 if (!Visited.insert(BB)) return; 920 921 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 922 Instruction *I = II++; // get the instruction, increment iterator 923 924 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 925 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 926 if (!Src) continue; 927 928 std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 929 if (AI == AllocaLookup.end()) continue; 930 931 Value *V = IncomingVals[AI->second]; 932 933 // Anything using the load now uses the current value. 934 LI->replaceAllUsesWith(V); 935 if (AST && isa<PointerType>(LI->getType())) 936 AST->deleteValue(LI); 937 BB->getInstList().erase(LI); 938 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 939 // Delete this instruction and mark the name as the current holder of the 940 // value 941 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 942 if (!Dest) continue; 943 944 std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 945 if (ai == AllocaLookup.end()) 946 continue; 947 948 // what value were we writing? 949 IncomingVals[ai->second] = SI->getOperand(0); 950 BB->getInstList().erase(SI); 951 } 952 } 953 954 // 'Recurse' to our successors. 955 TerminatorInst *TI = BB->getTerminator(); 956 unsigned NumSuccs = TI->getNumSuccessors(); 957 if (NumSuccs == 0) return; 958 959 // Add all-but-one successor to the worklist. 960 for (unsigned i = 0; i != NumSuccs-1; i++) 961 Worklist.push_back(RenamePassData(TI->getSuccessor(i), BB, IncomingVals)); 962 963 // Handle the last successor without using the worklist. This allows us to 964 // handle unconditional branches directly, for example. 965 Pred = BB; 966 BB = TI->getSuccessor(NumSuccs-1); 967 goto NextIteration; 968} 969 970/// PromoteMemToReg - Promote the specified list of alloca instructions into 971/// scalar registers, inserting PHI nodes as appropriate. This function makes 972/// use of DominanceFrontier information. This function does not modify the CFG 973/// of the function at all. All allocas must be from the same function. 974/// 975/// If AST is specified, the specified tracker is updated to reflect changes 976/// made to the IR. 977/// 978void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 979 DominatorTree &DT, DominanceFrontier &DF, 980 AliasSetTracker *AST) { 981 // If there is nothing to do, bail out... 982 if (Allocas.empty()) return; 983 984 SmallVector<AllocaInst*, 16> RetryList; 985 PromoteMem2Reg(Allocas, RetryList, DT, DF, AST).run(); 986 987 // PromoteMem2Reg may not have been able to promote all of the allocas in one 988 // pass, run it again if needed. 989 std::vector<AllocaInst*> NewAllocas; 990 while (!RetryList.empty()) { 991 // If we need to retry some allocas, this is due to there being no store 992 // before a read in a local block. To counteract this, insert a store of 993 // undef into the alloca right after the alloca itself. 994 for (unsigned i = 0, e = RetryList.size(); i != e; ++i) { 995 BasicBlock::iterator BBI = RetryList[i]; 996 997 new StoreInst(UndefValue::get(RetryList[i]->getAllocatedType()), 998 RetryList[i], ++BBI); 999 } 1000 1001 NewAllocas.assign(RetryList.begin(), RetryList.end()); 1002 RetryList.clear(); 1003 PromoteMem2Reg(NewAllocas, RetryList, DT, DF, AST).run(); 1004 NewAllocas.clear(); 1005 } 1006} 1007