PromoteMemoryToRegister.cpp revision b8c564f72ef75c775ce473e7b450d5c4da933e89
1//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file promotes memory references to be register references. It promotes 11// alloca instructions which only have loads and stores as uses. An alloca is 12// transformed by using dominator frontiers to place PHI nodes, then traversing 13// the function in depth-first order to rewrite loads and stores as appropriate. 14// This is just the standard SSA construction algorithm to construct "pruned" 15// SSA form. 16// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "mem2reg" 20#include "llvm/Transforms/Utils/PromoteMemToReg.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/Instructions.h" 25#include "llvm/IntrinsicInst.h" 26#include "llvm/Analysis/Dominators.h" 27#include "llvm/Analysis/AliasSetTracker.h" 28#include "llvm/ADT/DenseMap.h" 29#include "llvm/ADT/SmallPtrSet.h" 30#include "llvm/ADT/SmallVector.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/ADT/StringExtras.h" 33#include "llvm/ADT/STLExtras.h" 34#include "llvm/Support/CFG.h" 35#include "llvm/Support/Compiler.h" 36#include <algorithm> 37using namespace llvm; 38 39STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 40STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 41STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 42STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 43 44// Provide DenseMapInfo for all pointers. 45namespace llvm { 46template<> 47struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 48 typedef std::pair<BasicBlock*, unsigned> EltTy; 49 static inline EltTy getEmptyKey() { 50 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 51 } 52 static inline EltTy getTombstoneKey() { 53 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 54 } 55 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 56 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 57 } 58 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 59 return LHS == RHS; 60 } 61 static bool isPod() { return true; } 62}; 63} 64 65/// isAllocaPromotable - Return true if this alloca is legal for promotion. 66/// This is true if there are only loads and stores to the alloca. 67/// 68bool llvm::isAllocaPromotable(const AllocaInst *AI) { 69 // FIXME: If the memory unit is of pointer or integer type, we can permit 70 // assignments to subsections of the memory unit. 71 72 // Only allow direct and non-volatile loads and stores... 73 for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end(); 74 UI != UE; ++UI) // Loop over all of the uses of the alloca 75 if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 76 if (LI->isVolatile()) 77 return false; 78 } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 79 if (SI->getOperand(0) == AI) 80 return false; // Don't allow a store OF the AI, only INTO the AI. 81 if (SI->isVolatile()) 82 return false; 83 } else if (const BitCastInst *BC = dyn_cast<BitCastInst>(*UI)) { 84 // Uses by dbg info shouldn't inhibit promotion. 85 if (!BC->hasOneUse() || !isa<DbgInfoIntrinsic>(*BC->use_begin())) 86 return false; 87 } else { 88 return false; 89 } 90 91 return true; 92} 93 94namespace { 95 struct AllocaInfo; 96 97 // Data package used by RenamePass() 98 class VISIBILITY_HIDDEN RenamePassData { 99 public: 100 typedef std::vector<Value *> ValVector; 101 102 RenamePassData() {} 103 RenamePassData(BasicBlock *B, BasicBlock *P, 104 const ValVector &V) : BB(B), Pred(P), Values(V) {} 105 BasicBlock *BB; 106 BasicBlock *Pred; 107 ValVector Values; 108 109 void swap(RenamePassData &RHS) { 110 std::swap(BB, RHS.BB); 111 std::swap(Pred, RHS.Pred); 112 Values.swap(RHS.Values); 113 } 114 }; 115 116 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 117 /// load/store instructions in the block that directly load or store an alloca. 118 /// 119 /// This functionality is important because it avoids scanning large basic 120 /// blocks multiple times when promoting many allocas in the same block. 121 class VISIBILITY_HIDDEN LargeBlockInfo { 122 /// InstNumbers - For each instruction that we track, keep the index of the 123 /// instruction. The index starts out as the number of the instruction from 124 /// the start of the block. 125 DenseMap<const Instruction *, unsigned> InstNumbers; 126 public: 127 128 /// isInterestingInstruction - This code only looks at accesses to allocas. 129 static bool isInterestingInstruction(const Instruction *I) { 130 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 131 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 132 } 133 134 /// getInstructionIndex - Get or calculate the index of the specified 135 /// instruction. 136 unsigned getInstructionIndex(const Instruction *I) { 137 assert(isInterestingInstruction(I) && 138 "Not a load/store to/from an alloca?"); 139 140 // If we already have this instruction number, return it. 141 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 142 if (It != InstNumbers.end()) return It->second; 143 144 // Scan the whole block to get the instruction. This accumulates 145 // information for every interesting instruction in the block, in order to 146 // avoid gratuitus rescans. 147 const BasicBlock *BB = I->getParent(); 148 unsigned InstNo = 0; 149 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 150 BBI != E; ++BBI) 151 if (isInterestingInstruction(BBI)) 152 InstNumbers[BBI] = InstNo++; 153 It = InstNumbers.find(I); 154 155 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 156 return It->second; 157 } 158 159 void deleteValue(const Instruction *I) { 160 InstNumbers.erase(I); 161 } 162 163 void clear() { 164 InstNumbers.clear(); 165 } 166 }; 167 168 struct VISIBILITY_HIDDEN PromoteMem2Reg { 169 /// Allocas - The alloca instructions being promoted. 170 /// 171 std::vector<AllocaInst*> Allocas; 172 DominatorTree &DT; 173 DominanceFrontier &DF; 174 175 /// AST - An AliasSetTracker object to update. If null, don't update it. 176 /// 177 AliasSetTracker *AST; 178 179 /// AllocaLookup - Reverse mapping of Allocas. 180 /// 181 std::map<AllocaInst*, unsigned> AllocaLookup; 182 183 /// NewPhiNodes - The PhiNodes we're adding. 184 /// 185 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 186 187 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 188 /// it corresponds to. 189 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 190 191 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 192 /// each alloca that is of pointer type, we keep track of what to copyValue 193 /// to the inserted PHI nodes here. 194 /// 195 std::vector<Value*> PointerAllocaValues; 196 197 /// Visited - The set of basic blocks the renamer has already visited. 198 /// 199 SmallPtrSet<BasicBlock*, 16> Visited; 200 201 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 202 /// non-determinstic behavior. 203 DenseMap<BasicBlock*, unsigned> BBNumbers; 204 205 /// BBNumPreds - Lazily compute the number of predecessors a block has. 206 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 207 public: 208 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 209 DominanceFrontier &df, AliasSetTracker *ast) 210 : Allocas(A), DT(dt), DF(df), AST(ast) {} 211 212 void run(); 213 214 /// properlyDominates - Return true if I1 properly dominates I2. 215 /// 216 bool properlyDominates(Instruction *I1, Instruction *I2) const { 217 if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) 218 I1 = II->getNormalDest()->begin(); 219 return DT.properlyDominates(I1->getParent(), I2->getParent()); 220 } 221 222 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 223 /// 224 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 225 return DT.dominates(BB1, BB2); 226 } 227 228 private: 229 void RemoveFromAllocasList(unsigned &AllocaIdx) { 230 Allocas[AllocaIdx] = Allocas.back(); 231 Allocas.pop_back(); 232 --AllocaIdx; 233 } 234 235 unsigned getNumPreds(const BasicBlock *BB) { 236 unsigned &NP = BBNumPreds[BB]; 237 if (NP == 0) 238 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 239 return NP-1; 240 } 241 242 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 243 AllocaInfo &Info); 244 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 245 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 246 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 247 248 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 249 LargeBlockInfo &LBI); 250 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 251 LargeBlockInfo &LBI); 252 253 254 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 255 RenamePassData::ValVector &IncVals, 256 std::vector<RenamePassData> &Worklist); 257 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, 258 SmallPtrSet<PHINode*, 16> &InsertedPHINodes); 259 }; 260 261 struct AllocaInfo { 262 std::vector<BasicBlock*> DefiningBlocks; 263 std::vector<BasicBlock*> UsingBlocks; 264 265 StoreInst *OnlyStore; 266 BasicBlock *OnlyBlock; 267 bool OnlyUsedInOneBlock; 268 269 Value *AllocaPointerVal; 270 271 void clear() { 272 DefiningBlocks.clear(); 273 UsingBlocks.clear(); 274 OnlyStore = 0; 275 OnlyBlock = 0; 276 OnlyUsedInOneBlock = true; 277 AllocaPointerVal = 0; 278 } 279 280 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 281 /// ivars. 282 void AnalyzeAlloca(AllocaInst *AI) { 283 clear(); 284 285 // As we scan the uses of the alloca instruction, keep track of stores, 286 // and decide whether all of the loads and stores to the alloca are within 287 // the same basic block. 288 for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); 289 U != E;) { 290 Instruction *User = cast<Instruction>(*U); 291 ++U; 292 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 293 // Remove any uses of this alloca in DbgInfoInstrinsics. 294 assert(BC->hasOneUse() && "Unexpected alloca uses!"); 295 DbgInfoIntrinsic *DI = cast<DbgInfoIntrinsic>(*BC->use_begin()); 296 DI->eraseFromParent(); 297 BC->eraseFromParent(); 298 continue; 299 } 300 else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 301 // Remember the basic blocks which define new values for the alloca 302 DefiningBlocks.push_back(SI->getParent()); 303 AllocaPointerVal = SI->getOperand(0); 304 OnlyStore = SI; 305 } else { 306 LoadInst *LI = cast<LoadInst>(User); 307 // Otherwise it must be a load instruction, keep track of variable 308 // reads. 309 UsingBlocks.push_back(LI->getParent()); 310 AllocaPointerVal = LI; 311 } 312 313 if (OnlyUsedInOneBlock) { 314 if (OnlyBlock == 0) 315 OnlyBlock = User->getParent(); 316 else if (OnlyBlock != User->getParent()) 317 OnlyUsedInOneBlock = false; 318 } 319 } 320 } 321 }; 322} // end of anonymous namespace 323 324 325void PromoteMem2Reg::run() { 326 Function &F = *DF.getRoot()->getParent(); 327 328 if (AST) PointerAllocaValues.resize(Allocas.size()); 329 330 AllocaInfo Info; 331 LargeBlockInfo LBI; 332 333 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 334 AllocaInst *AI = Allocas[AllocaNum]; 335 336 assert(isAllocaPromotable(AI) && 337 "Cannot promote non-promotable alloca!"); 338 assert(AI->getParent()->getParent() == &F && 339 "All allocas should be in the same function, which is same as DF!"); 340 341 if (AI->use_empty()) { 342 // If there are no uses of the alloca, just delete it now. 343 if (AST) AST->deleteValue(AI); 344 AI->eraseFromParent(); 345 346 // Remove the alloca from the Allocas list, since it has been processed 347 RemoveFromAllocasList(AllocaNum); 348 ++NumDeadAlloca; 349 continue; 350 } 351 352 // Calculate the set of read and write-locations for each alloca. This is 353 // analogous to finding the 'uses' and 'definitions' of each variable. 354 Info.AnalyzeAlloca(AI); 355 356 // If there is only a single store to this value, replace any loads of 357 // it that are directly dominated by the definition with the value stored. 358 if (Info.DefiningBlocks.size() == 1) { 359 RewriteSingleStoreAlloca(AI, Info, LBI); 360 361 // Finally, after the scan, check to see if the store is all that is left. 362 if (Info.UsingBlocks.empty()) { 363 // Remove the (now dead) store and alloca. 364 Info.OnlyStore->eraseFromParent(); 365 LBI.deleteValue(Info.OnlyStore); 366 367 if (AST) AST->deleteValue(AI); 368 AI->eraseFromParent(); 369 LBI.deleteValue(AI); 370 371 // The alloca has been processed, move on. 372 RemoveFromAllocasList(AllocaNum); 373 374 ++NumSingleStore; 375 continue; 376 } 377 } 378 379 // If the alloca is only read and written in one basic block, just perform a 380 // linear sweep over the block to eliminate it. 381 if (Info.OnlyUsedInOneBlock) { 382 PromoteSingleBlockAlloca(AI, Info, LBI); 383 384 // Finally, after the scan, check to see if the stores are all that is 385 // left. 386 if (Info.UsingBlocks.empty()) { 387 388 // Remove the (now dead) stores and alloca. 389 while (!AI->use_empty()) { 390 StoreInst *SI = cast<StoreInst>(AI->use_back()); 391 SI->eraseFromParent(); 392 LBI.deleteValue(SI); 393 } 394 395 if (AST) AST->deleteValue(AI); 396 AI->eraseFromParent(); 397 LBI.deleteValue(AI); 398 399 // The alloca has been processed, move on. 400 RemoveFromAllocasList(AllocaNum); 401 402 ++NumLocalPromoted; 403 continue; 404 } 405 } 406 407 // If we haven't computed a numbering for the BB's in the function, do so 408 // now. 409 if (BBNumbers.empty()) { 410 unsigned ID = 0; 411 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 412 BBNumbers[I] = ID++; 413 } 414 415 // If we have an AST to keep updated, remember some pointer value that is 416 // stored into the alloca. 417 if (AST) 418 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 419 420 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 421 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 422 423 // At this point, we're committed to promoting the alloca using IDF's, and 424 // the standard SSA construction algorithm. Determine which blocks need PHI 425 // nodes and see if we can optimize out some work by avoiding insertion of 426 // dead phi nodes. 427 DetermineInsertionPoint(AI, AllocaNum, Info); 428 } 429 430 if (Allocas.empty()) 431 return; // All of the allocas must have been trivial! 432 433 LBI.clear(); 434 435 436 // Set the incoming values for the basic block to be null values for all of 437 // the alloca's. We do this in case there is a load of a value that has not 438 // been stored yet. In this case, it will get this null value. 439 // 440 RenamePassData::ValVector Values(Allocas.size()); 441 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 442 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 443 444 // Walks all basic blocks in the function performing the SSA rename algorithm 445 // and inserting the phi nodes we marked as necessary 446 // 447 std::vector<RenamePassData> RenamePassWorkList; 448 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 449 while (!RenamePassWorkList.empty()) { 450 RenamePassData RPD; 451 RPD.swap(RenamePassWorkList.back()); 452 RenamePassWorkList.pop_back(); 453 // RenamePass may add new worklist entries. 454 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 455 } 456 457 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 458 Visited.clear(); 459 460 // Remove the allocas themselves from the function. 461 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 462 Instruction *A = Allocas[i]; 463 464 // If there are any uses of the alloca instructions left, they must be in 465 // sections of dead code that were not processed on the dominance frontier. 466 // Just delete the users now. 467 // 468 if (!A->use_empty()) 469 A->replaceAllUsesWith(UndefValue::get(A->getType())); 470 if (AST) AST->deleteValue(A); 471 A->eraseFromParent(); 472 } 473 474 475 // Loop over all of the PHI nodes and see if there are any that we can get 476 // rid of because they merge all of the same incoming values. This can 477 // happen due to undef values coming into the PHI nodes. This process is 478 // iterative, because eliminating one PHI node can cause others to be removed. 479 bool EliminatedAPHI = true; 480 while (EliminatedAPHI) { 481 EliminatedAPHI = false; 482 483 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 484 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 485 PHINode *PN = I->second; 486 487 // If this PHI node merges one value and/or undefs, get the value. 488 if (Value *V = PN->hasConstantValue(true)) { 489 if (!isa<Instruction>(V) || 490 properlyDominates(cast<Instruction>(V), PN)) { 491 if (AST && isa<PointerType>(PN->getType())) 492 AST->deleteValue(PN); 493 PN->replaceAllUsesWith(V); 494 PN->eraseFromParent(); 495 NewPhiNodes.erase(I++); 496 EliminatedAPHI = true; 497 continue; 498 } 499 } 500 ++I; 501 } 502 } 503 504 // At this point, the renamer has added entries to PHI nodes for all reachable 505 // code. Unfortunately, there may be unreachable blocks which the renamer 506 // hasn't traversed. If this is the case, the PHI nodes may not 507 // have incoming values for all predecessors. Loop over all PHI nodes we have 508 // created, inserting undef values if they are missing any incoming values. 509 // 510 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 511 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 512 // We want to do this once per basic block. As such, only process a block 513 // when we find the PHI that is the first entry in the block. 514 PHINode *SomePHI = I->second; 515 BasicBlock *BB = SomePHI->getParent(); 516 if (&BB->front() != SomePHI) 517 continue; 518 519 // Only do work here if there the PHI nodes are missing incoming values. We 520 // know that all PHI nodes that were inserted in a block will have the same 521 // number of incoming values, so we can just check any of them. 522 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 523 continue; 524 525 // Get the preds for BB. 526 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 527 528 // Ok, now we know that all of the PHI nodes are missing entries for some 529 // basic blocks. Start by sorting the incoming predecessors for efficient 530 // access. 531 std::sort(Preds.begin(), Preds.end()); 532 533 // Now we loop through all BB's which have entries in SomePHI and remove 534 // them from the Preds list. 535 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 536 // Do a log(n) search of the Preds list for the entry we want. 537 SmallVector<BasicBlock*, 16>::iterator EntIt = 538 std::lower_bound(Preds.begin(), Preds.end(), 539 SomePHI->getIncomingBlock(i)); 540 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 541 "PHI node has entry for a block which is not a predecessor!"); 542 543 // Remove the entry 544 Preds.erase(EntIt); 545 } 546 547 // At this point, the blocks left in the preds list must have dummy 548 // entries inserted into every PHI nodes for the block. Update all the phi 549 // nodes in this block that we are inserting (there could be phis before 550 // mem2reg runs). 551 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 552 BasicBlock::iterator BBI = BB->begin(); 553 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 554 SomePHI->getNumIncomingValues() == NumBadPreds) { 555 Value *UndefVal = UndefValue::get(SomePHI->getType()); 556 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 557 SomePHI->addIncoming(UndefVal, Preds[pred]); 558 } 559 } 560 561 NewPhiNodes.clear(); 562} 563 564 565/// ComputeLiveInBlocks - Determine which blocks the value is live in. These 566/// are blocks which lead to uses. Knowing this allows us to avoid inserting 567/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 568/// would be dead). 569void PromoteMem2Reg:: 570ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 571 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 572 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 573 574 // To determine liveness, we must iterate through the predecessors of blocks 575 // where the def is live. Blocks are added to the worklist if we need to 576 // check their predecessors. Start with all the using blocks. 577 SmallVector<BasicBlock*, 64> LiveInBlockWorklist; 578 LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), 579 Info.UsingBlocks.begin(), Info.UsingBlocks.end()); 580 581 // If any of the using blocks is also a definition block, check to see if the 582 // definition occurs before or after the use. If it happens before the use, 583 // the value isn't really live-in. 584 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 585 BasicBlock *BB = LiveInBlockWorklist[i]; 586 if (!DefBlocks.count(BB)) continue; 587 588 // Okay, this is a block that both uses and defines the value. If the first 589 // reference to the alloca is a def (store), then we know it isn't live-in. 590 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 591 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 592 if (SI->getOperand(1) != AI) continue; 593 594 // We found a store to the alloca before a load. The alloca is not 595 // actually live-in here. 596 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 597 LiveInBlockWorklist.pop_back(); 598 --i, --e; 599 break; 600 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 601 if (LI->getOperand(0) != AI) continue; 602 603 // Okay, we found a load before a store to the alloca. It is actually 604 // live into this block. 605 break; 606 } 607 } 608 } 609 610 // Now that we have a set of blocks where the phi is live-in, recursively add 611 // their predecessors until we find the full region the value is live. 612 while (!LiveInBlockWorklist.empty()) { 613 BasicBlock *BB = LiveInBlockWorklist.back(); 614 LiveInBlockWorklist.pop_back(); 615 616 // The block really is live in here, insert it into the set. If already in 617 // the set, then it has already been processed. 618 if (!LiveInBlocks.insert(BB)) 619 continue; 620 621 // Since the value is live into BB, it is either defined in a predecessor or 622 // live into it to. Add the preds to the worklist unless they are a 623 // defining block. 624 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 625 BasicBlock *P = *PI; 626 627 // The value is not live into a predecessor if it defines the value. 628 if (DefBlocks.count(P)) 629 continue; 630 631 // Otherwise it is, add to the worklist. 632 LiveInBlockWorklist.push_back(P); 633 } 634 } 635} 636 637/// DetermineInsertionPoint - At this point, we're committed to promoting the 638/// alloca using IDF's, and the standard SSA construction algorithm. Determine 639/// which blocks need phi nodes and see if we can optimize out some work by 640/// avoiding insertion of dead phi nodes. 641void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 642 AllocaInfo &Info) { 643 644 // Unique the set of defining blocks for efficient lookup. 645 SmallPtrSet<BasicBlock*, 32> DefBlocks; 646 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 647 648 // Determine which blocks the value is live in. These are blocks which lead 649 // to uses. 650 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 651 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 652 653 // Compute the locations where PhiNodes need to be inserted. Look at the 654 // dominance frontier of EACH basic-block we have a write in. 655 unsigned CurrentVersion = 0; 656 SmallPtrSet<PHINode*, 16> InsertedPHINodes; 657 std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; 658 while (!Info.DefiningBlocks.empty()) { 659 BasicBlock *BB = Info.DefiningBlocks.back(); 660 Info.DefiningBlocks.pop_back(); 661 662 // Look up the DF for this write, add it to defining blocks. 663 DominanceFrontier::const_iterator it = DF.find(BB); 664 if (it == DF.end()) continue; 665 666 const DominanceFrontier::DomSetType &S = it->second; 667 668 // In theory we don't need the indirection through the DFBlocks vector. 669 // In practice, the order of calling QueuePhiNode would depend on the 670 // (unspecified) ordering of basic blocks in the dominance frontier, 671 // which would give PHI nodes non-determinstic subscripts. Fix this by 672 // processing blocks in order of the occurance in the function. 673 for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), 674 PE = S.end(); P != PE; ++P) { 675 // If the frontier block is not in the live-in set for the alloca, don't 676 // bother processing it. 677 if (!LiveInBlocks.count(*P)) 678 continue; 679 680 DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); 681 } 682 683 // Sort by which the block ordering in the function. 684 if (DFBlocks.size() > 1) 685 std::sort(DFBlocks.begin(), DFBlocks.end()); 686 687 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { 688 BasicBlock *BB = DFBlocks[i].second; 689 if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) 690 Info.DefiningBlocks.push_back(BB); 691 } 692 DFBlocks.clear(); 693 } 694} 695 696/// RewriteSingleStoreAlloca - If there is only a single store to this value, 697/// replace any loads of it that are directly dominated by the definition with 698/// the value stored. 699void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 700 AllocaInfo &Info, 701 LargeBlockInfo &LBI) { 702 StoreInst *OnlyStore = Info.OnlyStore; 703 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 704 BasicBlock *StoreBB = OnlyStore->getParent(); 705 int StoreIndex = -1; 706 707 // Clear out UsingBlocks. We will reconstruct it here if needed. 708 Info.UsingBlocks.clear(); 709 710 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 711 Instruction *UserInst = cast<Instruction>(*UI++); 712 if (!isa<LoadInst>(UserInst)) { 713 assert(UserInst == OnlyStore && "Should only have load/stores"); 714 continue; 715 } 716 LoadInst *LI = cast<LoadInst>(UserInst); 717 718 // Okay, if we have a load from the alloca, we want to replace it with the 719 // only value stored to the alloca. We can do this if the value is 720 // dominated by the store. If not, we use the rest of the mem2reg machinery 721 // to insert the phi nodes as needed. 722 if (!StoringGlobalVal) { // Non-instructions are always dominated. 723 if (LI->getParent() == StoreBB) { 724 // If we have a use that is in the same block as the store, compare the 725 // indices of the two instructions to see which one came first. If the 726 // load came before the store, we can't handle it. 727 if (StoreIndex == -1) 728 StoreIndex = LBI.getInstructionIndex(OnlyStore); 729 730 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 731 // Can't handle this load, bail out. 732 Info.UsingBlocks.push_back(StoreBB); 733 continue; 734 } 735 736 } else if (LI->getParent() != StoreBB && 737 !dominates(StoreBB, LI->getParent())) { 738 // If the load and store are in different blocks, use BB dominance to 739 // check their relationships. If the store doesn't dom the use, bail 740 // out. 741 Info.UsingBlocks.push_back(LI->getParent()); 742 continue; 743 } 744 } 745 746 // Otherwise, we *can* safely rewrite this load. 747 LI->replaceAllUsesWith(OnlyStore->getOperand(0)); 748 if (AST && isa<PointerType>(LI->getType())) 749 AST->deleteValue(LI); 750 LI->eraseFromParent(); 751 LBI.deleteValue(LI); 752 } 753} 754 755 756/// StoreIndexSearchPredicate - This is a helper predicate used to search by the 757/// first element of a pair. 758struct StoreIndexSearchPredicate { 759 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 760 const std::pair<unsigned, StoreInst*> &RHS) { 761 return LHS.first < RHS.first; 762 } 763}; 764 765/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 766/// block. If this is the case, avoid traversing the CFG and inserting a lot of 767/// potentially useless PHI nodes by just performing a single linear pass over 768/// the basic block using the Alloca. 769/// 770/// If we cannot promote this alloca (because it is read before it is written), 771/// return true. This is necessary in cases where, due to control flow, the 772/// alloca is potentially undefined on some control flow paths. e.g. code like 773/// this is potentially correct: 774/// 775/// for (...) { if (c) { A = undef; undef = B; } } 776/// 777/// ... so long as A is not used before undef is set. 778/// 779void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 780 LargeBlockInfo &LBI) { 781 // The trickiest case to handle is when we have large blocks. Because of this, 782 // this code is optimized assuming that large blocks happen. This does not 783 // significantly pessimize the small block case. This uses LargeBlockInfo to 784 // make it efficient to get the index of various operations in the block. 785 786 // Clear out UsingBlocks. We will reconstruct it here if needed. 787 Info.UsingBlocks.clear(); 788 789 // Walk the use-def list of the alloca, getting the locations of all stores. 790 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 791 StoresByIndexTy StoresByIndex; 792 793 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 794 UI != E; ++UI) 795 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 796 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 797 798 // If there are no stores to the alloca, just replace any loads with undef. 799 if (StoresByIndex.empty()) { 800 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 801 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 802 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 803 if (AST && isa<PointerType>(LI->getType())) 804 AST->deleteValue(LI); 805 LBI.deleteValue(LI); 806 LI->eraseFromParent(); 807 } 808 return; 809 } 810 811 // Sort the stores by their index, making it efficient to do a lookup with a 812 // binary search. 813 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 814 815 // Walk all of the loads from this alloca, replacing them with the nearest 816 // store above them, if any. 817 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 818 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 819 if (!LI) continue; 820 821 unsigned LoadIdx = LBI.getInstructionIndex(LI); 822 823 // Find the nearest store that has a lower than this load. 824 StoresByIndexTy::iterator I = 825 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 826 std::pair<unsigned, StoreInst*>(LoadIdx, 0), 827 StoreIndexSearchPredicate()); 828 829 // If there is no store before this load, then we can't promote this load. 830 if (I == StoresByIndex.begin()) { 831 // Can't handle this load, bail out. 832 Info.UsingBlocks.push_back(LI->getParent()); 833 continue; 834 } 835 836 // Otherwise, there was a store before this load, the load takes its value. 837 --I; 838 LI->replaceAllUsesWith(I->second->getOperand(0)); 839 if (AST && isa<PointerType>(LI->getType())) 840 AST->deleteValue(LI); 841 LI->eraseFromParent(); 842 LBI.deleteValue(LI); 843 } 844} 845 846 847// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 848// Alloca returns true if there wasn't already a phi-node for that variable 849// 850bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 851 unsigned &Version, 852 SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { 853 // Look up the basic-block in question. 854 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 855 856 // If the BB already has a phi node added for the i'th alloca then we're done! 857 if (PN) return false; 858 859 // Create a PhiNode using the dereferenced type... and add the phi-node to the 860 // BasicBlock. 861 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), 862 Allocas[AllocaNo]->getName() + "." + 863 utostr(Version++), BB->begin()); 864 ++NumPHIInsert; 865 PhiToAllocaMap[PN] = AllocaNo; 866 PN->reserveOperandSpace(getNumPreds(BB)); 867 868 InsertedPHINodes.insert(PN); 869 870 if (AST && isa<PointerType>(PN->getType())) 871 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 872 873 return true; 874} 875 876// RenamePass - Recursively traverse the CFG of the function, renaming loads and 877// stores to the allocas which we are promoting. IncomingVals indicates what 878// value each Alloca contains on exit from the predecessor block Pred. 879// 880void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 881 RenamePassData::ValVector &IncomingVals, 882 std::vector<RenamePassData> &Worklist) { 883NextIteration: 884 // If we are inserting any phi nodes into this BB, they will already be in the 885 // block. 886 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 887 // Pred may have multiple edges to BB. If so, we want to add N incoming 888 // values to each PHI we are inserting on the first time we see the edge. 889 // Check to see if APN already has incoming values from Pred. This also 890 // prevents us from modifying PHI nodes that are not currently being 891 // inserted. 892 bool HasPredEntries = false; 893 for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) { 894 if (APN->getIncomingBlock(i) == Pred) { 895 HasPredEntries = true; 896 break; 897 } 898 } 899 900 // If we have PHI nodes to update, compute the number of edges from Pred to 901 // BB. 902 if (!HasPredEntries) { 903 // We want to be able to distinguish between PHI nodes being inserted by 904 // this invocation of mem2reg from those phi nodes that already existed in 905 // the IR before mem2reg was run. We determine that APN is being inserted 906 // because it is missing incoming edges. All other PHI nodes being 907 // inserted by this pass of mem2reg will have the same number of incoming 908 // operands so far. Remember this count. 909 unsigned NewPHINumOperands = APN->getNumOperands(); 910 911 unsigned NumEdges = 0; 912 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 913 if (*I == BB) 914 ++NumEdges; 915 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 916 917 // Add entries for all the phis. 918 BasicBlock::iterator PNI = BB->begin(); 919 do { 920 unsigned AllocaNo = PhiToAllocaMap[APN]; 921 922 // Add N incoming values to the PHI node. 923 for (unsigned i = 0; i != NumEdges; ++i) 924 APN->addIncoming(IncomingVals[AllocaNo], Pred); 925 926 // The currently active variable for this block is now the PHI. 927 IncomingVals[AllocaNo] = APN; 928 929 // Get the next phi node. 930 ++PNI; 931 APN = dyn_cast<PHINode>(PNI); 932 if (APN == 0) break; 933 934 // Verify that it is missing entries. If not, it is not being inserted 935 // by this mem2reg invocation so we want to ignore it. 936 } while (APN->getNumOperands() == NewPHINumOperands); 937 } 938 } 939 940 // Don't revisit blocks. 941 if (!Visited.insert(BB)) return; 942 943 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 944 Instruction *I = II++; // get the instruction, increment iterator 945 946 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 947 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 948 if (!Src) continue; 949 950 std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 951 if (AI == AllocaLookup.end()) continue; 952 953 Value *V = IncomingVals[AI->second]; 954 955 // Anything using the load now uses the current value. 956 LI->replaceAllUsesWith(V); 957 if (AST && isa<PointerType>(LI->getType())) 958 AST->deleteValue(LI); 959 BB->getInstList().erase(LI); 960 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 961 // Delete this instruction and mark the name as the current holder of the 962 // value 963 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 964 if (!Dest) continue; 965 966 std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 967 if (ai == AllocaLookup.end()) 968 continue; 969 970 // what value were we writing? 971 IncomingVals[ai->second] = SI->getOperand(0); 972 BB->getInstList().erase(SI); 973 } 974 } 975 976 // 'Recurse' to our successors. 977 succ_iterator I = succ_begin(BB), E = succ_end(BB); 978 if (I == E) return; 979 980 // Handle the last successor without using the worklist. This allows us to 981 // handle unconditional branches directly, for example. 982 --E; 983 for (; I != E; ++I) 984 Worklist.push_back(RenamePassData(*I, BB, IncomingVals)); 985 986 Pred = BB; 987 BB = *I; 988 goto NextIteration; 989} 990 991/// PromoteMemToReg - Promote the specified list of alloca instructions into 992/// scalar registers, inserting PHI nodes as appropriate. This function makes 993/// use of DominanceFrontier information. This function does not modify the CFG 994/// of the function at all. All allocas must be from the same function. 995/// 996/// If AST is specified, the specified tracker is updated to reflect changes 997/// made to the IR. 998/// 999void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1000 DominatorTree &DT, DominanceFrontier &DF, 1001 AliasSetTracker *AST) { 1002 // If there is nothing to do, bail out... 1003 if (Allocas.empty()) return; 1004 1005 PromoteMem2Reg(Allocas, DT, DF, AST).run(); 1006} 1007