ScalarReplAggregates.cpp revision fd93908ae8b9684fe71c239e3c6cfe13ff6a2663
1//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by the LLVM research group and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This transformation implements the well known scalar replacement of 11// aggregates transformation. This xform breaks up alloca instructions of 12// aggregate type (structure or array) into individual alloca instructions for 13// each member (if possible). Then, if possible, it transforms the individual 14// alloca instructions into nice clean scalar SSA form. 15// 16// This combines a simple SRoA algorithm with the Mem2Reg algorithm because 17// often interact, especially for C++ programs. As such, iterating between 18// SRoA, then Mem2Reg until we run out of things to promote works well. 19// 20//===----------------------------------------------------------------------===// 21 22#include "llvm/Transforms/Scalar.h" 23#include "llvm/Constants.h" 24#include "llvm/DerivedTypes.h" 25#include "llvm/Function.h" 26#include "llvm/Pass.h" 27#include "llvm/Instructions.h" 28#include "llvm/Analysis/Dominators.h" 29#include "llvm/Support/GetElementPtrTypeIterator.h" 30#include "llvm/Target/TargetData.h" 31#include "llvm/Transforms/Utils/PromoteMemToReg.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/ADT/Statistic.h" 34#include "llvm/ADT/StringExtras.h" 35using namespace llvm; 36 37namespace { 38 Statistic<> NumReplaced("scalarrepl", "Number of allocas broken up"); 39 Statistic<> NumPromoted("scalarrepl", "Number of allocas promoted"); 40 41 struct SROA : public FunctionPass { 42 bool runOnFunction(Function &F); 43 44 bool performScalarRepl(Function &F); 45 bool performPromotion(Function &F); 46 47 // getAnalysisUsage - This pass does not require any passes, but we know it 48 // will not alter the CFG, so say so. 49 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 50 AU.addRequired<DominatorTree>(); 51 AU.addRequired<DominanceFrontier>(); 52 AU.addRequired<TargetData>(); 53 AU.setPreservesCFG(); 54 } 55 56 private: 57 int isSafeElementUse(Value *Ptr); 58 int isSafeUseOfAllocation(Instruction *User); 59 int isSafeAllocaToScalarRepl(AllocationInst *AI); 60 void CanonicalizeAllocaUsers(AllocationInst *AI); 61 AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base); 62 }; 63 64 RegisterOpt<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); 65} 66 67// Public interface to the ScalarReplAggregates pass 68FunctionPass *llvm::createScalarReplAggregatesPass() { return new SROA(); } 69 70 71bool SROA::runOnFunction(Function &F) { 72 bool Changed = performPromotion(F); 73 while (1) { 74 bool LocalChange = performScalarRepl(F); 75 if (!LocalChange) break; // No need to repromote if no scalarrepl 76 Changed = true; 77 LocalChange = performPromotion(F); 78 if (!LocalChange) break; // No need to re-scalarrepl if no promotion 79 } 80 81 return Changed; 82} 83 84 85bool SROA::performPromotion(Function &F) { 86 std::vector<AllocaInst*> Allocas; 87 const TargetData &TD = getAnalysis<TargetData>(); 88 DominatorTree &DT = getAnalysis<DominatorTree>(); 89 DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); 90 91 BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 92 93 bool Changed = false; 94 95 while (1) { 96 Allocas.clear(); 97 98 // Find allocas that are safe to promote, by looking at all instructions in 99 // the entry node 100 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 101 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 102 if (isAllocaPromotable(AI, TD)) 103 Allocas.push_back(AI); 104 105 if (Allocas.empty()) break; 106 107 PromoteMemToReg(Allocas, DT, DF, TD); 108 NumPromoted += Allocas.size(); 109 Changed = true; 110 } 111 112 return Changed; 113} 114 115 116// performScalarRepl - This algorithm is a simple worklist driven algorithm, 117// which runs on all of the malloc/alloca instructions in the function, removing 118// them if they are only used by getelementptr instructions. 119// 120bool SROA::performScalarRepl(Function &F) { 121 std::vector<AllocationInst*> WorkList; 122 123 // Scan the entry basic block, adding any alloca's and mallocs to the worklist 124 BasicBlock &BB = F.getEntryBlock(); 125 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 126 if (AllocationInst *A = dyn_cast<AllocationInst>(I)) 127 WorkList.push_back(A); 128 129 // Process the worklist 130 bool Changed = false; 131 while (!WorkList.empty()) { 132 AllocationInst *AI = WorkList.back(); 133 WorkList.pop_back(); 134 135 // We cannot transform the allocation instruction if it is an array 136 // allocation (allocations OF arrays are ok though), and an allocation of a 137 // scalar value cannot be decomposed at all. 138 // 139 if (AI->isArrayAllocation() || 140 (!isa<StructType>(AI->getAllocatedType()) && 141 !isa<ArrayType>(AI->getAllocatedType()))) continue; 142 143 // Check that all of the users of the allocation are capable of being 144 // transformed. 145 switch (isSafeAllocaToScalarRepl(AI)) { 146 default: assert(0 && "Unexpected value!"); 147 case 0: // Not safe to scalar replace. 148 continue; 149 case 1: // Safe, but requires cleanup/canonicalizations first 150 CanonicalizeAllocaUsers(AI); 151 case 3: // Safe to scalar replace. 152 break; 153 } 154 155 DEBUG(std::cerr << "Found inst to xform: " << *AI); 156 Changed = true; 157 158 std::vector<AllocaInst*> ElementAllocas; 159 if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 160 ElementAllocas.reserve(ST->getNumContainedTypes()); 161 for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 162 AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 163 AI->getName() + "." + utostr(i), AI); 164 ElementAllocas.push_back(NA); 165 WorkList.push_back(NA); // Add to worklist for recursive processing 166 } 167 } else { 168 const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 169 ElementAllocas.reserve(AT->getNumElements()); 170 const Type *ElTy = AT->getElementType(); 171 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 172 AllocaInst *NA = new AllocaInst(ElTy, 0, 173 AI->getName() + "." + utostr(i), AI); 174 ElementAllocas.push_back(NA); 175 WorkList.push_back(NA); // Add to worklist for recursive processing 176 } 177 } 178 179 // Now that we have created the alloca instructions that we want to use, 180 // expand the getelementptr instructions to use them. 181 // 182 while (!AI->use_empty()) { 183 Instruction *User = cast<Instruction>(AI->use_back()); 184 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); 185 // We now know that the GEP is of the form: GEP <ptr>, 0, <cst> 186 unsigned Idx = 187 (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getRawValue(); 188 189 assert(Idx < ElementAllocas.size() && "Index out of range?"); 190 AllocaInst *AllocaToUse = ElementAllocas[Idx]; 191 192 Value *RepValue; 193 if (GEPI->getNumOperands() == 3) { 194 // Do not insert a new getelementptr instruction with zero indices, only 195 // to have it optimized out later. 196 RepValue = AllocaToUse; 197 } else { 198 // We are indexing deeply into the structure, so we still need a 199 // getelement ptr instruction to finish the indexing. This may be 200 // expanded itself once the worklist is rerun. 201 // 202 std::string OldName = GEPI->getName(); // Steal the old name. 203 std::vector<Value*> NewArgs; 204 NewArgs.push_back(Constant::getNullValue(Type::IntTy)); 205 NewArgs.insert(NewArgs.end(), GEPI->op_begin()+3, GEPI->op_end()); 206 GEPI->setName(""); 207 RepValue = new GetElementPtrInst(AllocaToUse, NewArgs, OldName, GEPI); 208 } 209 210 // Move all of the users over to the new GEP. 211 GEPI->replaceAllUsesWith(RepValue); 212 // Delete the old GEP 213 GEPI->eraseFromParent(); 214 } 215 216 // Finally, delete the Alloca instruction 217 AI->getParent()->getInstList().erase(AI); 218 NumReplaced++; 219 } 220 221 return Changed; 222} 223 224 225/// isSafeElementUse - Check to see if this use is an allowed use for a 226/// getelementptr instruction of an array aggregate allocation. 227/// 228int SROA::isSafeElementUse(Value *Ptr) { 229 for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 230 I != E; ++I) { 231 Instruction *User = cast<Instruction>(*I); 232 switch (User->getOpcode()) { 233 case Instruction::Load: break; 234 case Instruction::Store: 235 // Store is ok if storing INTO the pointer, not storing the pointer 236 if (User->getOperand(0) == Ptr) return 0; 237 break; 238 case Instruction::GetElementPtr: { 239 GetElementPtrInst *GEP = cast<GetElementPtrInst>(User); 240 if (GEP->getNumOperands() > 1) { 241 if (!isa<Constant>(GEP->getOperand(1)) || 242 !cast<Constant>(GEP->getOperand(1))->isNullValue()) 243 return 0; // Using pointer arithmetic to navigate the array... 244 } 245 if (!isSafeElementUse(GEP)) return 0; 246 break; 247 } 248 default: 249 DEBUG(std::cerr << " Transformation preventing inst: " << *User); 250 return 0; 251 } 252 } 253 return 3; // All users look ok :) 254} 255 256/// AllUsersAreLoads - Return true if all users of this value are loads. 257static bool AllUsersAreLoads(Value *Ptr) { 258 for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 259 I != E; ++I) 260 if (cast<Instruction>(*I)->getOpcode() != Instruction::Load) 261 return false; 262 return true; 263} 264 265/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an 266/// aggregate allocation. 267/// 268int SROA::isSafeUseOfAllocation(Instruction *User) { 269 if (!isa<GetElementPtrInst>(User)) return 0; 270 271 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); 272 gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); 273 274 // The GEP is safe to transform if it is of the form GEP <ptr>, 0, <cst> 275 if (I == E || 276 I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) 277 return 0; 278 279 ++I; 280 if (I == E) return 0; // ran out of GEP indices?? 281 282 // If this is a use of an array allocation, do a bit more checking for sanity. 283 if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 284 uint64_t NumElements = AT->getNumElements(); 285 286 if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) { 287 // Check to make sure that index falls within the array. If not, 288 // something funny is going on, so we won't do the optimization. 289 // 290 if (cast<ConstantInt>(GEPI->getOperand(2))->getRawValue() >= NumElements) 291 return 0; 292 293 } else { 294 // If this is an array index and the index is not constant, we cannot 295 // promote... that is unless the array has exactly one or two elements in 296 // it, in which case we CAN promote it, but we have to canonicalize this 297 // out if this is the only problem. 298 if (NumElements == 1 || NumElements == 2) 299 return AllUsersAreLoads(GEPI) ? 1 : 0; // Canonicalization required! 300 return 0; 301 } 302 } 303 304 // If there are any non-simple uses of this getelementptr, make sure to reject 305 // them. 306 return isSafeElementUse(GEPI); 307} 308 309/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 310/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 311/// or 1 if safe after canonicalization has been performed. 312/// 313int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { 314 // Loop over the use list of the alloca. We can only transform it if all of 315 // the users are safe to transform. 316 // 317 int isSafe = 3; 318 for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); 319 I != E; ++I) { 320 isSafe &= isSafeUseOfAllocation(cast<Instruction>(*I)); 321 if (isSafe == 0) { 322 DEBUG(std::cerr << "Cannot transform: " << *AI << " due to user: " 323 << **I); 324 return 0; 325 } 326 } 327 // If we require cleanup, isSafe is now 1, otherwise it is 3. 328 return isSafe; 329} 330 331/// CanonicalizeAllocaUsers - If SROA reported that it can promote the specified 332/// allocation, but only if cleaned up, perform the cleanups required. 333void SROA::CanonicalizeAllocaUsers(AllocationInst *AI) { 334 // At this point, we know that the end result will be SROA'd and promoted, so 335 // we can insert ugly code if required so long as sroa+mem2reg will clean it 336 // up. 337 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 338 UI != E; ) { 339 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(*UI++); 340 gep_type_iterator I = gep_type_begin(GEPI); 341 ++I; 342 343 if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 344 uint64_t NumElements = AT->getNumElements(); 345 346 if (!isa<ConstantInt>(I.getOperand())) { 347 if (NumElements == 1) { 348 GEPI->setOperand(2, Constant::getNullValue(Type::IntTy)); 349 } else { 350 assert(NumElements == 2 && "Unhandled case!"); 351 // All users of the GEP must be loads. At each use of the GEP, insert 352 // two loads of the appropriate indexed GEP and select between them. 353 Value *IsOne = BinaryOperator::createSetNE(I.getOperand(), 354 Constant::getNullValue(I.getOperand()->getType()), 355 "isone", GEPI); 356 // Insert the new GEP instructions, which are properly indexed. 357 std::vector<Value*> Indices(GEPI->op_begin()+1, GEPI->op_end()); 358 Indices[1] = Constant::getNullValue(Type::IntTy); 359 Value *ZeroIdx = new GetElementPtrInst(GEPI->getOperand(0), Indices, 360 GEPI->getName()+".0", GEPI); 361 Indices[1] = ConstantInt::get(Type::IntTy, 1); 362 Value *OneIdx = new GetElementPtrInst(GEPI->getOperand(0), Indices, 363 GEPI->getName()+".1", GEPI); 364 // Replace all loads of the variable index GEP with loads from both 365 // indexes and a select. 366 while (!GEPI->use_empty()) { 367 LoadInst *LI = cast<LoadInst>(GEPI->use_back()); 368 Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI); 369 Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI); 370 Value *R = new SelectInst(IsOne, One, Zero, LI->getName(), LI); 371 LI->replaceAllUsesWith(R); 372 LI->eraseFromParent(); 373 } 374 GEPI->eraseFromParent(); 375 } 376 } 377 } 378 } 379} 380