ScalarReplAggregates.cpp revision b742defa0a8f3e477c3ed641da49aab276937556
1//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This transformation implements the well known scalar replacement of 11// aggregates transformation. This xform breaks up alloca instructions of 12// aggregate type (structure or array) into individual alloca instructions for 13// each member (if possible). Then, if possible, it transforms the individual 14// alloca instructions into nice clean scalar SSA form. 15// 16// This combines a simple SRoA algorithm with the Mem2Reg algorithm because 17// often interact, especially for C++ programs. As such, iterating between 18// SRoA, then Mem2Reg until we run out of things to promote works well. 19// 20//===----------------------------------------------------------------------===// 21 22#define DEBUG_TYPE "scalarrepl" 23#include "llvm/Transforms/Scalar.h" 24#include "llvm/Constants.h" 25#include "llvm/DerivedTypes.h" 26#include "llvm/Function.h" 27#include "llvm/GlobalVariable.h" 28#include "llvm/Instructions.h" 29#include "llvm/IntrinsicInst.h" 30#include "llvm/LLVMContext.h" 31#include "llvm/Pass.h" 32#include "llvm/Analysis/Dominators.h" 33#include "llvm/Target/TargetData.h" 34#include "llvm/Transforms/Utils/PromoteMemToReg.h" 35#include "llvm/Transforms/Utils/Local.h" 36#include "llvm/Support/Debug.h" 37#include "llvm/Support/ErrorHandling.h" 38#include "llvm/Support/GetElementPtrTypeIterator.h" 39#include "llvm/Support/IRBuilder.h" 40#include "llvm/Support/MathExtras.h" 41#include "llvm/Support/raw_ostream.h" 42#include "llvm/ADT/SmallVector.h" 43#include "llvm/ADT/Statistic.h" 44using namespace llvm; 45 46STATISTIC(NumReplaced, "Number of allocas broken up"); 47STATISTIC(NumPromoted, "Number of allocas promoted"); 48STATISTIC(NumConverted, "Number of aggregates converted to scalar"); 49STATISTIC(NumGlobals, "Number of allocas copied from constant global"); 50 51namespace { 52 struct SROA : public FunctionPass { 53 static char ID; // Pass identification, replacement for typeid 54 explicit SROA(signed T = -1) : FunctionPass(&ID) { 55 if (T == -1) 56 SRThreshold = 128; 57 else 58 SRThreshold = T; 59 } 60 61 bool runOnFunction(Function &F); 62 63 bool performScalarRepl(Function &F); 64 bool performPromotion(Function &F); 65 66 // getAnalysisUsage - This pass does not require any passes, but we know it 67 // will not alter the CFG, so say so. 68 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 69 AU.addRequired<DominatorTree>(); 70 AU.addRequired<DominanceFrontier>(); 71 AU.setPreservesCFG(); 72 } 73 74 private: 75 TargetData *TD; 76 77 /// DeadInsts - Keep track of instructions we have made dead, so that 78 /// we can remove them after we are done working. 79 SmallVector<Value*, 32> DeadInsts; 80 81 /// AllocaInfo - When analyzing uses of an alloca instruction, this captures 82 /// information about the uses. All these fields are initialized to false 83 /// and set to true when something is learned. 84 struct AllocaInfo { 85 /// isUnsafe - This is set to true if the alloca cannot be SROA'd. 86 bool isUnsafe : 1; 87 88 /// needsCleanup - This is set to true if there is some use of the alloca 89 /// that requires cleanup. 90 bool needsCleanup : 1; 91 92 /// isMemCpySrc - This is true if this aggregate is memcpy'd from. 93 bool isMemCpySrc : 1; 94 95 /// isMemCpyDst - This is true if this aggregate is memcpy'd into. 96 bool isMemCpyDst : 1; 97 98 AllocaInfo() 99 : isUnsafe(false), needsCleanup(false), 100 isMemCpySrc(false), isMemCpyDst(false) {} 101 }; 102 103 unsigned SRThreshold; 104 105 void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } 106 107 int isSafeAllocaToScalarRepl(AllocaInst *AI); 108 109 void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 110 uint64_t ArrayOffset, AllocaInfo &Info); 111 void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, 112 uint64_t &ArrayOffset, AllocaInfo &Info); 113 void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, 114 uint64_t MemSize, const Type *MemOpType, bool isStore, 115 AllocaInfo &Info); 116 bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); 117 unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); 118 119 void DoScalarReplacement(AllocaInst *AI, 120 std::vector<AllocaInst*> &WorkList); 121 void DeleteDeadInstructions(); 122 void CleanupGEP(GetElementPtrInst *GEP); 123 void CleanupAllocaUsers(Value *V); 124 AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); 125 126 void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 127 SmallVector<AllocaInst*, 32> &NewElts); 128 void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 129 SmallVector<AllocaInst*, 32> &NewElts); 130 void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 131 SmallVector<AllocaInst*, 32> &NewElts); 132 void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 133 AllocaInst *AI, 134 SmallVector<AllocaInst*, 32> &NewElts); 135 void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 136 SmallVector<AllocaInst*, 32> &NewElts); 137 void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 138 SmallVector<AllocaInst*, 32> &NewElts); 139 140 bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, 141 bool &SawVec, uint64_t Offset, unsigned AllocaSize); 142 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); 143 Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, 144 uint64_t Offset, IRBuilder<> &Builder); 145 Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, 146 uint64_t Offset, IRBuilder<> &Builder); 147 static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); 148 }; 149} 150 151char SROA::ID = 0; 152static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); 153 154// Public interface to the ScalarReplAggregates pass 155FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { 156 return new SROA(Threshold); 157} 158 159 160bool SROA::runOnFunction(Function &F) { 161 TD = getAnalysisIfAvailable<TargetData>(); 162 163 bool Changed = performPromotion(F); 164 165 // FIXME: ScalarRepl currently depends on TargetData more than it 166 // theoretically needs to. It should be refactored in order to support 167 // target-independent IR. Until this is done, just skip the actual 168 // scalar-replacement portion of this pass. 169 if (!TD) return Changed; 170 171 while (1) { 172 bool LocalChange = performScalarRepl(F); 173 if (!LocalChange) break; // No need to repromote if no scalarrepl 174 Changed = true; 175 LocalChange = performPromotion(F); 176 if (!LocalChange) break; // No need to re-scalarrepl if no promotion 177 } 178 179 return Changed; 180} 181 182 183bool SROA::performPromotion(Function &F) { 184 std::vector<AllocaInst*> Allocas; 185 DominatorTree &DT = getAnalysis<DominatorTree>(); 186 DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); 187 188 BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 189 190 bool Changed = false; 191 192 while (1) { 193 Allocas.clear(); 194 195 // Find allocas that are safe to promote, by looking at all instructions in 196 // the entry node 197 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 198 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 199 if (isAllocaPromotable(AI)) 200 Allocas.push_back(AI); 201 202 if (Allocas.empty()) break; 203 204 PromoteMemToReg(Allocas, DT, DF); 205 NumPromoted += Allocas.size(); 206 Changed = true; 207 } 208 209 return Changed; 210} 211 212/// getNumSAElements - Return the number of elements in the specific struct or 213/// array. 214static uint64_t getNumSAElements(const Type *T) { 215 if (const StructType *ST = dyn_cast<StructType>(T)) 216 return ST->getNumElements(); 217 return cast<ArrayType>(T)->getNumElements(); 218} 219 220// performScalarRepl - This algorithm is a simple worklist driven algorithm, 221// which runs on all of the malloc/alloca instructions in the function, removing 222// them if they are only used by getelementptr instructions. 223// 224bool SROA::performScalarRepl(Function &F) { 225 std::vector<AllocaInst*> WorkList; 226 227 // Scan the entry basic block, adding any alloca's and mallocs to the worklist 228 BasicBlock &BB = F.getEntryBlock(); 229 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 230 if (AllocaInst *A = dyn_cast<AllocaInst>(I)) 231 WorkList.push_back(A); 232 233 // Process the worklist 234 bool Changed = false; 235 while (!WorkList.empty()) { 236 AllocaInst *AI = WorkList.back(); 237 WorkList.pop_back(); 238 239 // Handle dead allocas trivially. These can be formed by SROA'ing arrays 240 // with unused elements. 241 if (AI->use_empty()) { 242 AI->eraseFromParent(); 243 continue; 244 } 245 246 // If this alloca is impossible for us to promote, reject it early. 247 if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) 248 continue; 249 250 // Check to see if this allocation is only modified by a memcpy/memmove from 251 // a constant global. If this is the case, we can change all users to use 252 // the constant global instead. This is commonly produced by the CFE by 253 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 254 // is only subsequently read. 255 if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { 256 DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n'); 257 DEBUG(errs() << " memcpy = " << *TheCopy << '\n'); 258 Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); 259 AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); 260 TheCopy->eraseFromParent(); // Don't mutate the global. 261 AI->eraseFromParent(); 262 ++NumGlobals; 263 Changed = true; 264 continue; 265 } 266 267 // Check to see if we can perform the core SROA transformation. We cannot 268 // transform the allocation instruction if it is an array allocation 269 // (allocations OF arrays are ok though), and an allocation of a scalar 270 // value cannot be decomposed at all. 271 uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); 272 273 // Do not promote [0 x %struct]. 274 if (AllocaSize == 0) continue; 275 276 // Do not promote any struct whose size is too big. 277 if (AllocaSize > SRThreshold) continue; 278 279 if ((isa<StructType>(AI->getAllocatedType()) || 280 isa<ArrayType>(AI->getAllocatedType())) && 281 // Do not promote any struct into more than "32" separate vars. 282 getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) { 283 // Check that all of the users of the allocation are capable of being 284 // transformed. 285 switch (isSafeAllocaToScalarRepl(AI)) { 286 default: llvm_unreachable("Unexpected value!"); 287 case 0: // Not safe to scalar replace. 288 break; 289 case 1: // Safe, but requires cleanup/canonicalizations first 290 CleanupAllocaUsers(AI); 291 // FALL THROUGH. 292 case 3: // Safe to scalar replace. 293 DoScalarReplacement(AI, WorkList); 294 Changed = true; 295 continue; 296 } 297 } 298 299 // If we can turn this aggregate value (potentially with casts) into a 300 // simple scalar value that can be mem2reg'd into a register value. 301 // IsNotTrivial tracks whether this is something that mem2reg could have 302 // promoted itself. If so, we don't want to transform it needlessly. Note 303 // that we can't just check based on the type: the alloca may be of an i32 304 // but that has pointer arithmetic to set byte 3 of it or something. 305 bool IsNotTrivial = false; 306 const Type *VectorTy = 0; 307 bool HadAVector = false; 308 if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector, 309 0, unsigned(AllocaSize)) && IsNotTrivial) { 310 AllocaInst *NewAI; 311 // If we were able to find a vector type that can handle this with 312 // insert/extract elements, and if there was at least one use that had 313 // a vector type, promote this to a vector. We don't want to promote 314 // random stuff that doesn't use vectors (e.g. <9 x double>) because then 315 // we just get a lot of insert/extracts. If at least one vector is 316 // involved, then we probably really do have a union of vector/array. 317 if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) { 318 DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " 319 << *VectorTy << '\n'); 320 321 // Create and insert the vector alloca. 322 NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); 323 ConvertUsesToScalar(AI, NewAI, 0); 324 } else { 325 DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); 326 327 // Create and insert the integer alloca. 328 const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); 329 NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); 330 ConvertUsesToScalar(AI, NewAI, 0); 331 } 332 NewAI->takeName(AI); 333 AI->eraseFromParent(); 334 ++NumConverted; 335 Changed = true; 336 continue; 337 } 338 339 // Otherwise, couldn't process this alloca. 340 } 341 342 return Changed; 343} 344 345/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 346/// predicate, do SROA now. 347void SROA::DoScalarReplacement(AllocaInst *AI, 348 std::vector<AllocaInst*> &WorkList) { 349 DEBUG(errs() << "Found inst to SROA: " << *AI << '\n'); 350 SmallVector<AllocaInst*, 32> ElementAllocas; 351 if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 352 ElementAllocas.reserve(ST->getNumContainedTypes()); 353 for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 354 AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 355 AI->getAlignment(), 356 AI->getName() + "." + Twine(i), AI); 357 ElementAllocas.push_back(NA); 358 WorkList.push_back(NA); // Add to worklist for recursive processing 359 } 360 } else { 361 const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 362 ElementAllocas.reserve(AT->getNumElements()); 363 const Type *ElTy = AT->getElementType(); 364 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 365 AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), 366 AI->getName() + "." + Twine(i), AI); 367 ElementAllocas.push_back(NA); 368 WorkList.push_back(NA); // Add to worklist for recursive processing 369 } 370 } 371 372 // Now that we have created the new alloca instructions, rewrite all the 373 // uses of the old alloca. 374 RewriteForScalarRepl(AI, AI, 0, ElementAllocas); 375 376 // Now erase any instructions that were made dead while rewriting the alloca. 377 DeleteDeadInstructions(); 378 AI->eraseFromParent(); 379 380 NumReplaced++; 381} 382 383/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, 384/// recursively including all their operands that become trivially dead. 385void SROA::DeleteDeadInstructions() { 386 while (!DeadInsts.empty()) { 387 Instruction *I = cast<Instruction>(DeadInsts.pop_back_val()); 388 389 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 390 if (Instruction *U = dyn_cast<Instruction>(*OI)) { 391 // Zero out the operand and see if it becomes trivially dead. 392 // (But, don't add allocas to the dead instruction list -- they are 393 // already on the worklist and will be deleted separately.) 394 *OI = 0; 395 if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U)) 396 DeadInsts.push_back(U); 397 } 398 399 I->eraseFromParent(); 400 } 401} 402 403/// AllUsersAreLoads - Return true if all users of this value are loads. 404static bool AllUsersAreLoads(Value *Ptr) { 405 for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 406 I != E; ++I) 407 if (cast<Instruction>(*I)->getOpcode() != Instruction::Load) 408 return false; 409 return true; 410} 411 412/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to 413/// performing scalar replacement of alloca AI. The results are flagged in 414/// the Info parameter. Offset and ArrayOffset indicate the position within 415/// AI that is referenced by this instruction. 416void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 417 uint64_t ArrayOffset, AllocaInfo &Info) { 418 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 419 Instruction *User = cast<Instruction>(*UI); 420 421 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 422 isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); 423 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 424 uint64_t GEPArrayOffset = ArrayOffset; 425 uint64_t GEPOffset = Offset; 426 isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); 427 if (!Info.isUnsafe) 428 isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); 429 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { 430 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 431 if (Length) 432 isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, 433 UI.getOperandNo() == 1, Info); 434 else 435 MarkUnsafe(Info); 436 } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 437 if (!LI->isVolatile()) { 438 const Type *LIType = LI->getType(); 439 isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), 440 LIType, false, Info); 441 } else 442 MarkUnsafe(Info); 443 } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 444 // Store is ok if storing INTO the pointer, not storing the pointer 445 if (!SI->isVolatile() && SI->getOperand(0) != I) { 446 const Type *SIType = SI->getOperand(0)->getType(); 447 isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), 448 SIType, true, Info); 449 } else 450 MarkUnsafe(Info); 451 } else if (isa<DbgInfoIntrinsic>(UI)) { 452 // If one user is DbgInfoIntrinsic then check if all users are 453 // DbgInfoIntrinsics. 454 if (OnlyUsedByDbgInfoIntrinsics(I)) { 455 Info.needsCleanup = true; 456 return; 457 } 458 MarkUnsafe(Info); 459 } else { 460 DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); 461 MarkUnsafe(Info); 462 } 463 if (Info.isUnsafe) return; 464 } 465} 466 467/// isSafeGEP - Check if a GEP instruction can be handled for scalar 468/// replacement. It is safe when all the indices are constant, in-bounds 469/// references, and when the resulting offset corresponds to an element within 470/// the alloca type. The results are flagged in the Info parameter. Upon 471/// return, Offset is adjusted as specified by the GEP indices. For the 472/// special case of a variable index to a 2-element array, ArrayOffset is set 473/// to the array element size. 474void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, 475 uint64_t &Offset, uint64_t &ArrayOffset, 476 AllocaInfo &Info) { 477 gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); 478 if (GEPIt == E) 479 return; 480 481 // The first GEP index must be zero. 482 if (!isa<ConstantInt>(GEPIt.getOperand()) || 483 !cast<ConstantInt>(GEPIt.getOperand())->isZero()) 484 return MarkUnsafe(Info); 485 if (++GEPIt == E) 486 return; 487 488 // If the first index is a non-constant index into an array, see if we can 489 // handle it as a special case. 490 const Type *ArrayEltTy = 0; 491 if (ArrayOffset == 0 && Offset == 0) { 492 if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) { 493 if (!isa<ConstantInt>(GEPIt.getOperand())) { 494 uint64_t NumElements = AT->getNumElements(); 495 496 // If this is an array index and the index is not constant, we cannot 497 // promote... that is unless the array has exactly one or two elements 498 // in it, in which case we CAN promote it, but we have to canonicalize 499 // this out if this is the only problem. 500 if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) 501 return MarkUnsafe(Info); 502 Info.needsCleanup = true; 503 ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); 504 ArrayEltTy = AT->getElementType(); 505 ++GEPIt; 506 } 507 } 508 } 509 510 // Walk through the GEP type indices, checking the types that this indexes 511 // into. 512 for (; GEPIt != E; ++GEPIt) { 513 // Ignore struct elements, no extra checking needed for these. 514 if (isa<StructType>(*GEPIt)) 515 continue; 516 517 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); 518 if (!IdxVal) 519 return MarkUnsafe(Info); 520 521 if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) { 522 // This GEP indexes an array. Verify that this is an in-range constant 523 // integer. Specifically, consider A[0][i]. We cannot know that the user 524 // isn't doing invalid things like allowing i to index an out-of-range 525 // subscript that accesses A[1]. Because of this, we have to reject SROA 526 // of any accesses into structs where any of the components are variables. 527 if (IdxVal->getZExtValue() >= AT->getNumElements()) 528 return MarkUnsafe(Info); 529 } else { 530 const VectorType *VT = dyn_cast<VectorType>(*GEPIt); 531 assert(VT && "unexpected type in GEP type iterator"); 532 if (IdxVal->getZExtValue() >= VT->getNumElements()) 533 return MarkUnsafe(Info); 534 } 535 } 536 537 // All the indices are safe. Now compute the offset due to this GEP and 538 // check if the alloca has a component element at that offset. 539 if (ArrayOffset == 0) { 540 SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 541 Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), 542 &Indices[0], Indices.size()); 543 } else { 544 // Both array elements have the same type, so it suffices to check one of 545 // them. Copy the GEP indices starting from the array index, but replace 546 // that variable index with a constant zero. 547 SmallVector<Value*, 8> Indices(GEPI->op_begin() + 2, GEPI->op_end()); 548 Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); 549 const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); 550 Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); 551 } 552 if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) 553 MarkUnsafe(Info); 554} 555 556/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI 557/// alloca or has an offset and size that corresponds to a component element 558/// within it. The offset checked here may have been formed from a GEP with a 559/// pointer bitcasted to a different type. 560void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, 561 uint64_t ArrayOffset, uint64_t MemSize, 562 const Type *MemOpType, bool isStore, 563 AllocaInfo &Info) { 564 // Check if this is a load/store of the entire alloca. 565 if (Offset == 0 && ArrayOffset == 0 && 566 MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { 567 bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); 568 // This is safe for MemIntrinsics (where MemOpType is 0), integer types 569 // (which are essentially the same as the MemIntrinsics, especially with 570 // regard to copying padding between elements), or references using the 571 // aggregate type of the alloca. 572 if (!MemOpType || isa<IntegerType>(MemOpType) || UsesAggregateType) { 573 if (!UsesAggregateType) { 574 if (isStore) 575 Info.isMemCpyDst = true; 576 else 577 Info.isMemCpySrc = true; 578 } 579 return; 580 } 581 } 582 // Check if the offset/size correspond to a component within the alloca type. 583 const Type *T = AI->getAllocatedType(); 584 if (TypeHasComponent(T, Offset, MemSize) && 585 (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) 586 return; 587 588 return MarkUnsafe(Info); 589} 590 591/// TypeHasComponent - Return true if T has a component type with the 592/// specified offset and size. If Size is zero, do not check the size. 593bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { 594 const Type *EltTy; 595 uint64_t EltSize; 596 if (const StructType *ST = dyn_cast<StructType>(T)) { 597 const StructLayout *Layout = TD->getStructLayout(ST); 598 unsigned EltIdx = Layout->getElementContainingOffset(Offset); 599 EltTy = ST->getContainedType(EltIdx); 600 EltSize = TD->getTypeAllocSize(EltTy); 601 Offset -= Layout->getElementOffset(EltIdx); 602 } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { 603 EltTy = AT->getElementType(); 604 EltSize = TD->getTypeAllocSize(EltTy); 605 Offset %= EltSize; 606 } else { 607 return false; 608 } 609 if (Offset == 0 && (Size == 0 || EltSize == Size)) 610 return true; 611 // Check if the component spans multiple elements. 612 if (Offset + Size > EltSize) 613 return false; 614 return TypeHasComponent(EltTy, Offset, Size); 615} 616 617/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite 618/// the instruction I, which references it, to use the separate elements. 619/// Offset indicates the position within AI that is referenced by this 620/// instruction. 621void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 622 SmallVector<AllocaInst*, 32> &NewElts) { 623 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 624 Instruction *User = cast<Instruction>(*UI); 625 626 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 627 RewriteBitCast(BC, AI, Offset, NewElts); 628 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 629 RewriteGEP(GEPI, AI, Offset, NewElts); 630 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 631 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 632 uint64_t MemSize = Length->getZExtValue(); 633 if (Offset == 0 && 634 MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) 635 RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); 636 } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 637 const Type *LIType = LI->getType(); 638 if (LIType == AI->getAllocatedType()) { 639 // Replace: 640 // %res = load { i32, i32 }* %alloc 641 // with: 642 // %load.0 = load i32* %alloc.0 643 // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 644 // %load.1 = load i32* %alloc.1 645 // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 646 // (Also works for arrays instead of structs) 647 Value *Insert = UndefValue::get(LIType); 648 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 649 Value *Load = new LoadInst(NewElts[i], "load", LI); 650 Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); 651 } 652 LI->replaceAllUsesWith(Insert); 653 DeadInsts.push_back(LI); 654 } else if (isa<IntegerType>(LIType) && 655 TD->getTypeAllocSize(LIType) == 656 TD->getTypeAllocSize(AI->getAllocatedType())) { 657 // If this is a load of the entire alloca to an integer, rewrite it. 658 RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); 659 } 660 } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 661 Value *Val = SI->getOperand(0); 662 const Type *SIType = Val->getType(); 663 if (SIType == AI->getAllocatedType()) { 664 // Replace: 665 // store { i32, i32 } %val, { i32, i32 }* %alloc 666 // with: 667 // %val.0 = extractvalue { i32, i32 } %val, 0 668 // store i32 %val.0, i32* %alloc.0 669 // %val.1 = extractvalue { i32, i32 } %val, 1 670 // store i32 %val.1, i32* %alloc.1 671 // (Also works for arrays instead of structs) 672 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 673 Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); 674 new StoreInst(Extract, NewElts[i], SI); 675 } 676 DeadInsts.push_back(SI); 677 } else if (isa<IntegerType>(SIType) && 678 TD->getTypeAllocSize(SIType) == 679 TD->getTypeAllocSize(AI->getAllocatedType())) { 680 // If this is a store of the entire alloca from an integer, rewrite it. 681 RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); 682 } 683 } 684 } 685} 686 687/// RewriteBitCast - Update a bitcast reference to the alloca being replaced 688/// and recursively continue updating all of its uses. 689void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 690 SmallVector<AllocaInst*, 32> &NewElts) { 691 RewriteForScalarRepl(BC, AI, Offset, NewElts); 692 if (BC->getOperand(0) != AI) 693 return; 694 695 // The bitcast references the original alloca. Replace its uses with 696 // references to the first new element alloca. 697 Instruction *Val = NewElts[0]; 698 if (Val->getType() != BC->getDestTy()) { 699 Val = new BitCastInst(Val, BC->getDestTy(), "", BC); 700 Val->takeName(BC); 701 } 702 BC->replaceAllUsesWith(Val); 703 DeadInsts.push_back(BC); 704} 705 706/// FindElementAndOffset - Return the index of the element containing Offset 707/// within the specified type, which must be either a struct or an array. 708/// Sets T to the type of the element and Offset to the offset within that 709/// element. 710unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { 711 unsigned Idx = 0; 712 if (const StructType *ST = dyn_cast<StructType>(T)) { 713 const StructLayout *Layout = TD->getStructLayout(ST); 714 Idx = Layout->getElementContainingOffset(Offset); 715 T = ST->getContainedType(Idx); 716 Offset -= Layout->getElementOffset(Idx); 717 } else { 718 const ArrayType *AT = dyn_cast<ArrayType>(T); 719 assert(AT && "unexpected type for scalar replacement"); 720 T = AT->getElementType(); 721 uint64_t EltSize = TD->getTypeAllocSize(T); 722 Idx = (unsigned)(Offset / EltSize); 723 Offset -= Idx * EltSize; 724 } 725 return Idx; 726} 727 728/// RewriteGEP - Check if this GEP instruction moves the pointer across 729/// elements of the alloca that are being split apart, and if so, rewrite 730/// the GEP to be relative to the new element. 731void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 732 SmallVector<AllocaInst*, 32> &NewElts) { 733 uint64_t OldOffset = Offset; 734 SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 735 Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), 736 &Indices[0], Indices.size()); 737 738 RewriteForScalarRepl(GEPI, AI, Offset, NewElts); 739 740 const Type *T = AI->getAllocatedType(); 741 unsigned OldIdx = FindElementAndOffset(T, OldOffset); 742 if (GEPI->getOperand(0) == AI) 743 OldIdx = ~0U; // Force the GEP to be rewritten. 744 745 T = AI->getAllocatedType(); 746 uint64_t EltOffset = Offset; 747 unsigned Idx = FindElementAndOffset(T, EltOffset); 748 749 // If this GEP does not move the pointer across elements of the alloca 750 // being split, then it does not needs to be rewritten. 751 if (Idx == OldIdx) 752 return; 753 754 const Type *i32Ty = Type::getInt32Ty(AI->getContext()); 755 SmallVector<Value*, 8> NewArgs; 756 NewArgs.push_back(Constant::getNullValue(i32Ty)); 757 while (EltOffset != 0) { 758 unsigned EltIdx = FindElementAndOffset(T, EltOffset); 759 NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); 760 } 761 Instruction *Val = NewElts[Idx]; 762 if (NewArgs.size() > 1) { 763 Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), 764 NewArgs.end(), "", GEPI); 765 Val->takeName(GEPI); 766 } 767 if (Val->getType() != GEPI->getType()) 768 Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); 769 GEPI->replaceAllUsesWith(Val); 770 DeadInsts.push_back(GEPI); 771} 772 773/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. 774/// Rewrite it to copy or set the elements of the scalarized memory. 775void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 776 AllocaInst *AI, 777 SmallVector<AllocaInst*, 32> &NewElts) { 778 // If this is a memcpy/memmove, construct the other pointer as the 779 // appropriate type. The "Other" pointer is the pointer that goes to memory 780 // that doesn't have anything to do with the alloca that we are promoting. For 781 // memset, this Value* stays null. 782 Value *OtherPtr = 0; 783 LLVMContext &Context = MI->getContext(); 784 unsigned MemAlignment = MI->getAlignment(); 785 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy 786 if (Inst == MTI->getRawDest()) 787 OtherPtr = MTI->getRawSource(); 788 else { 789 assert(Inst == MTI->getRawSource()); 790 OtherPtr = MTI->getRawDest(); 791 } 792 } 793 794 // If there is an other pointer, we want to convert it to the same pointer 795 // type as AI has, so we can GEP through it safely. 796 if (OtherPtr) { 797 798 // Remove bitcasts and all-zero GEPs from OtherPtr. This is an 799 // optimization, but it's also required to detect the corner case where 800 // both pointer operands are referencing the same memory, and where 801 // OtherPtr may be a bitcast or GEP that currently being rewritten. (This 802 // function is only called for mem intrinsics that access the whole 803 // aggregate, so non-zero GEPs are not an issue here.) 804 while (1) { 805 if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) { 806 OtherPtr = BC->getOperand(0); 807 continue; 808 } 809 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) { 810 // All zero GEPs are effectively bitcasts. 811 if (GEP->hasAllZeroIndices()) { 812 OtherPtr = GEP->getOperand(0); 813 continue; 814 } 815 } 816 break; 817 } 818 // If OtherPtr has already been rewritten, this intrinsic will be dead. 819 if (OtherPtr == NewElts[0]) 820 return; 821 822 if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) 823 if (BCE->getOpcode() == Instruction::BitCast) 824 OtherPtr = BCE->getOperand(0); 825 826 // If the pointer is not the right type, insert a bitcast to the right 827 // type. 828 if (OtherPtr->getType() != AI->getType()) 829 OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), 830 MI); 831 } 832 833 // Process each element of the aggregate. 834 Value *TheFn = MI->getOperand(0); 835 const Type *BytePtrTy = MI->getRawDest()->getType(); 836 bool SROADest = MI->getRawDest() == Inst; 837 838 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); 839 840 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 841 // If this is a memcpy/memmove, emit a GEP of the other element address. 842 Value *OtherElt = 0; 843 unsigned OtherEltAlign = MemAlignment; 844 845 if (OtherPtr == AI) { 846 OtherElt = NewElts[i]; 847 OtherEltAlign = 0; 848 } else if (OtherPtr) { 849 Value *Idx[2] = { Zero, 850 ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; 851 OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, 852 OtherPtr->getNameStr()+"."+Twine(i), 853 MI); 854 uint64_t EltOffset; 855 const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); 856 if (const StructType *ST = 857 dyn_cast<StructType>(OtherPtrTy->getElementType())) { 858 EltOffset = TD->getStructLayout(ST)->getElementOffset(i); 859 } else { 860 const Type *EltTy = 861 cast<SequentialType>(OtherPtr->getType())->getElementType(); 862 EltOffset = TD->getTypeAllocSize(EltTy)*i; 863 } 864 865 // The alignment of the other pointer is the guaranteed alignment of the 866 // element, which is affected by both the known alignment of the whole 867 // mem intrinsic and the alignment of the element. If the alignment of 868 // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the 869 // known alignment is just 4 bytes. 870 OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); 871 } 872 873 Value *EltPtr = NewElts[i]; 874 const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); 875 876 // If we got down to a scalar, insert a load or store as appropriate. 877 if (EltTy->isSingleValueType()) { 878 if (isa<MemTransferInst>(MI)) { 879 if (SROADest) { 880 // From Other to Alloca. 881 Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); 882 new StoreInst(Elt, EltPtr, MI); 883 } else { 884 // From Alloca to Other. 885 Value *Elt = new LoadInst(EltPtr, "tmp", MI); 886 new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); 887 } 888 continue; 889 } 890 assert(isa<MemSetInst>(MI)); 891 892 // If the stored element is zero (common case), just store a null 893 // constant. 894 Constant *StoreVal; 895 if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { 896 if (CI->isZero()) { 897 StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 898 } else { 899 // If EltTy is a vector type, get the element type. 900 const Type *ValTy = EltTy->getScalarType(); 901 902 // Construct an integer with the right value. 903 unsigned EltSize = TD->getTypeSizeInBits(ValTy); 904 APInt OneVal(EltSize, CI->getZExtValue()); 905 APInt TotalVal(OneVal); 906 // Set each byte. 907 for (unsigned i = 0; 8*i < EltSize; ++i) { 908 TotalVal = TotalVal.shl(8); 909 TotalVal |= OneVal; 910 } 911 912 // Convert the integer value to the appropriate type. 913 StoreVal = ConstantInt::get(Context, TotalVal); 914 if (isa<PointerType>(ValTy)) 915 StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 916 else if (ValTy->isFloatingPoint()) 917 StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 918 assert(StoreVal->getType() == ValTy && "Type mismatch!"); 919 920 // If the requested value was a vector constant, create it. 921 if (EltTy != ValTy) { 922 unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); 923 SmallVector<Constant*, 16> Elts(NumElts, StoreVal); 924 StoreVal = ConstantVector::get(&Elts[0], NumElts); 925 } 926 } 927 new StoreInst(StoreVal, EltPtr, MI); 928 continue; 929 } 930 // Otherwise, if we're storing a byte variable, use a memset call for 931 // this element. 932 } 933 934 // Cast the element pointer to BytePtrTy. 935 if (EltPtr->getType() != BytePtrTy) 936 EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI); 937 938 // Cast the other pointer (if we have one) to BytePtrTy. 939 if (OtherElt && OtherElt->getType() != BytePtrTy) 940 OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), 941 MI); 942 943 unsigned EltSize = TD->getTypeAllocSize(EltTy); 944 945 // Finally, insert the meminst for this element. 946 if (isa<MemTransferInst>(MI)) { 947 Value *Ops[] = { 948 SROADest ? EltPtr : OtherElt, // Dest ptr 949 SROADest ? OtherElt : EltPtr, // Src ptr 950 ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 951 // Align 952 ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign) 953 }; 954 CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 955 } else { 956 assert(isa<MemSetInst>(MI)); 957 Value *Ops[] = { 958 EltPtr, MI->getOperand(2), // Dest, Value, 959 ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 960 Zero // Align 961 }; 962 CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 963 } 964 } 965 DeadInsts.push_back(MI); 966} 967 968/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that 969/// overwrites the entire allocation. Extract out the pieces of the stored 970/// integer and store them individually. 971void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 972 SmallVector<AllocaInst*, 32> &NewElts){ 973 // Extract each element out of the integer according to its structure offset 974 // and store the element value to the individual alloca. 975 Value *SrcVal = SI->getOperand(0); 976 const Type *AllocaEltTy = AI->getAllocatedType(); 977 uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 978 979 // Handle tail padding by extending the operand 980 if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) 981 SrcVal = new ZExtInst(SrcVal, 982 IntegerType::get(SI->getContext(), AllocaSizeBits), 983 "", SI); 984 985 DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI 986 << '\n'); 987 988 // There are two forms here: AI could be an array or struct. Both cases 989 // have different ways to compute the element offset. 990 if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 991 const StructLayout *Layout = TD->getStructLayout(EltSTy); 992 993 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 994 // Get the number of bits to shift SrcVal to get the value. 995 const Type *FieldTy = EltSTy->getElementType(i); 996 uint64_t Shift = Layout->getElementOffsetInBits(i); 997 998 if (TD->isBigEndian()) 999 Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); 1000 1001 Value *EltVal = SrcVal; 1002 if (Shift) { 1003 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 1004 EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 1005 "sroa.store.elt", SI); 1006 } 1007 1008 // Truncate down to an integer of the right size. 1009 uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 1010 1011 // Ignore zero sized fields like {}, they obviously contain no data. 1012 if (FieldSizeBits == 0) continue; 1013 1014 if (FieldSizeBits != AllocaSizeBits) 1015 EltVal = new TruncInst(EltVal, 1016 IntegerType::get(SI->getContext(), FieldSizeBits), 1017 "", SI); 1018 Value *DestField = NewElts[i]; 1019 if (EltVal->getType() == FieldTy) { 1020 // Storing to an integer field of this size, just do it. 1021 } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) { 1022 // Bitcast to the right element type (for fp/vector values). 1023 EltVal = new BitCastInst(EltVal, FieldTy, "", SI); 1024 } else { 1025 // Otherwise, bitcast the dest pointer (for aggregates). 1026 DestField = new BitCastInst(DestField, 1027 PointerType::getUnqual(EltVal->getType()), 1028 "", SI); 1029 } 1030 new StoreInst(EltVal, DestField, SI); 1031 } 1032 1033 } else { 1034 const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); 1035 const Type *ArrayEltTy = ATy->getElementType(); 1036 uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 1037 uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); 1038 1039 uint64_t Shift; 1040 1041 if (TD->isBigEndian()) 1042 Shift = AllocaSizeBits-ElementOffset; 1043 else 1044 Shift = 0; 1045 1046 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1047 // Ignore zero sized fields like {}, they obviously contain no data. 1048 if (ElementSizeBits == 0) continue; 1049 1050 Value *EltVal = SrcVal; 1051 if (Shift) { 1052 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 1053 EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 1054 "sroa.store.elt", SI); 1055 } 1056 1057 // Truncate down to an integer of the right size. 1058 if (ElementSizeBits != AllocaSizeBits) 1059 EltVal = new TruncInst(EltVal, 1060 IntegerType::get(SI->getContext(), 1061 ElementSizeBits),"",SI); 1062 Value *DestField = NewElts[i]; 1063 if (EltVal->getType() == ArrayEltTy) { 1064 // Storing to an integer field of this size, just do it. 1065 } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) { 1066 // Bitcast to the right element type (for fp/vector values). 1067 EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); 1068 } else { 1069 // Otherwise, bitcast the dest pointer (for aggregates). 1070 DestField = new BitCastInst(DestField, 1071 PointerType::getUnqual(EltVal->getType()), 1072 "", SI); 1073 } 1074 new StoreInst(EltVal, DestField, SI); 1075 1076 if (TD->isBigEndian()) 1077 Shift -= ElementOffset; 1078 else 1079 Shift += ElementOffset; 1080 } 1081 } 1082 1083 DeadInsts.push_back(SI); 1084} 1085 1086/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to 1087/// an integer. Load the individual pieces to form the aggregate value. 1088void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 1089 SmallVector<AllocaInst*, 32> &NewElts) { 1090 // Extract each element out of the NewElts according to its structure offset 1091 // and form the result value. 1092 const Type *AllocaEltTy = AI->getAllocatedType(); 1093 uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 1094 1095 DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI 1096 << '\n'); 1097 1098 // There are two forms here: AI could be an array or struct. Both cases 1099 // have different ways to compute the element offset. 1100 const StructLayout *Layout = 0; 1101 uint64_t ArrayEltBitOffset = 0; 1102 if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 1103 Layout = TD->getStructLayout(EltSTy); 1104 } else { 1105 const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); 1106 ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 1107 } 1108 1109 Value *ResultVal = 1110 Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); 1111 1112 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1113 // Load the value from the alloca. If the NewElt is an aggregate, cast 1114 // the pointer to an integer of the same size before doing the load. 1115 Value *SrcField = NewElts[i]; 1116 const Type *FieldTy = 1117 cast<PointerType>(SrcField->getType())->getElementType(); 1118 uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 1119 1120 // Ignore zero sized fields like {}, they obviously contain no data. 1121 if (FieldSizeBits == 0) continue; 1122 1123 const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 1124 FieldSizeBits); 1125 if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() && 1126 !isa<VectorType>(FieldTy)) 1127 SrcField = new BitCastInst(SrcField, 1128 PointerType::getUnqual(FieldIntTy), 1129 "", LI); 1130 SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); 1131 1132 // If SrcField is a fp or vector of the right size but that isn't an 1133 // integer type, bitcast to an integer so we can shift it. 1134 if (SrcField->getType() != FieldIntTy) 1135 SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); 1136 1137 // Zero extend the field to be the same size as the final alloca so that 1138 // we can shift and insert it. 1139 if (SrcField->getType() != ResultVal->getType()) 1140 SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); 1141 1142 // Determine the number of bits to shift SrcField. 1143 uint64_t Shift; 1144 if (Layout) // Struct case. 1145 Shift = Layout->getElementOffsetInBits(i); 1146 else // Array case. 1147 Shift = i*ArrayEltBitOffset; 1148 1149 if (TD->isBigEndian()) 1150 Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); 1151 1152 if (Shift) { 1153 Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); 1154 SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); 1155 } 1156 1157 ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); 1158 } 1159 1160 // Handle tail padding by truncating the result 1161 if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) 1162 ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); 1163 1164 LI->replaceAllUsesWith(ResultVal); 1165 DeadInsts.push_back(LI); 1166} 1167 1168/// HasPadding - Return true if the specified type has any structure or 1169/// alignment padding, false otherwise. 1170static bool HasPadding(const Type *Ty, const TargetData &TD) { 1171 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 1172 const StructLayout *SL = TD.getStructLayout(STy); 1173 unsigned PrevFieldBitOffset = 0; 1174 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 1175 unsigned FieldBitOffset = SL->getElementOffsetInBits(i); 1176 1177 // Padding in sub-elements? 1178 if (HasPadding(STy->getElementType(i), TD)) 1179 return true; 1180 1181 // Check to see if there is any padding between this element and the 1182 // previous one. 1183 if (i) { 1184 unsigned PrevFieldEnd = 1185 PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); 1186 if (PrevFieldEnd < FieldBitOffset) 1187 return true; 1188 } 1189 1190 PrevFieldBitOffset = FieldBitOffset; 1191 } 1192 1193 // Check for tail padding. 1194 if (unsigned EltCount = STy->getNumElements()) { 1195 unsigned PrevFieldEnd = PrevFieldBitOffset + 1196 TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); 1197 if (PrevFieldEnd < SL->getSizeInBits()) 1198 return true; 1199 } 1200 1201 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 1202 return HasPadding(ATy->getElementType(), TD); 1203 } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1204 return HasPadding(VTy->getElementType(), TD); 1205 } 1206 return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); 1207} 1208 1209/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 1210/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 1211/// or 1 if safe after canonicalization has been performed. 1212int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { 1213 // Loop over the use list of the alloca. We can only transform it if all of 1214 // the users are safe to transform. 1215 AllocaInfo Info; 1216 1217 isSafeForScalarRepl(AI, AI, 0, 0, Info); 1218 if (Info.isUnsafe) { 1219 DEBUG(errs() << "Cannot transform: " << *AI << '\n'); 1220 return 0; 1221 } 1222 1223 // Okay, we know all the users are promotable. If the aggregate is a memcpy 1224 // source and destination, we have to be careful. In particular, the memcpy 1225 // could be moving around elements that live in structure padding of the LLVM 1226 // types, but may actually be used. In these cases, we refuse to promote the 1227 // struct. 1228 if (Info.isMemCpySrc && Info.isMemCpyDst && 1229 HasPadding(AI->getAllocatedType(), *TD)) 1230 return 0; 1231 1232 // If we require cleanup, return 1, otherwise return 3. 1233 return Info.needsCleanup ? 1 : 3; 1234} 1235 1236/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP 1237/// is canonicalized here. 1238void SROA::CleanupGEP(GetElementPtrInst *GEPI) { 1239 gep_type_iterator I = gep_type_begin(GEPI); 1240 ++I; 1241 1242 const ArrayType *AT = dyn_cast<ArrayType>(*I); 1243 if (!AT) 1244 return; 1245 1246 uint64_t NumElements = AT->getNumElements(); 1247 1248 if (isa<ConstantInt>(I.getOperand())) 1249 return; 1250 1251 if (NumElements == 1) { 1252 GEPI->setOperand(2, 1253 Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()))); 1254 return; 1255 } 1256 1257 assert(NumElements == 2 && "Unhandled case!"); 1258 // All users of the GEP must be loads. At each use of the GEP, insert 1259 // two loads of the appropriate indexed GEP and select between them. 1260 Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(), 1261 Constant::getNullValue(I.getOperand()->getType()), 1262 "isone"); 1263 // Insert the new GEP instructions, which are properly indexed. 1264 SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end()); 1265 Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); 1266 Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), 1267 Indices.begin(), 1268 Indices.end(), 1269 GEPI->getName()+".0",GEPI); 1270 Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); 1271 Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), 1272 Indices.begin(), 1273 Indices.end(), 1274 GEPI->getName()+".1", GEPI); 1275 // Replace all loads of the variable index GEP with loads from both 1276 // indexes and a select. 1277 while (!GEPI->use_empty()) { 1278 LoadInst *LI = cast<LoadInst>(GEPI->use_back()); 1279 Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI); 1280 Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI); 1281 Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI); 1282 LI->replaceAllUsesWith(R); 1283 LI->eraseFromParent(); 1284 } 1285} 1286 1287/// CleanupAllocaUsers - If SROA reported that it can promote the specified 1288/// allocation, but only if cleaned up, perform the cleanups required. 1289void SROA::CleanupAllocaUsers(Value *V) { 1290 // At this point, we know that the end result will be SROA'd and promoted, so 1291 // we can insert ugly code if required so long as sroa+mem2reg will clean it 1292 // up. 1293 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); 1294 UI != E; ) { 1295 User *U = *UI++; 1296 if (isa<BitCastInst>(U)) { 1297 CleanupAllocaUsers(U); 1298 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 1299 CleanupGEP(GEPI); 1300 CleanupAllocaUsers(GEPI); 1301 if (GEPI->use_empty()) GEPI->eraseFromParent(); 1302 } else { 1303 Instruction *I = cast<Instruction>(U); 1304 SmallVector<DbgInfoIntrinsic *, 2> DbgInUses; 1305 if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { 1306 // Safe to remove debug info uses. 1307 while (!DbgInUses.empty()) { 1308 DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back(); 1309 DI->eraseFromParent(); 1310 } 1311 I->eraseFromParent(); 1312 } 1313 } 1314 } 1315} 1316 1317/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at 1318/// the offset specified by Offset (which is specified in bytes). 1319/// 1320/// There are two cases we handle here: 1321/// 1) A union of vector types of the same size and potentially its elements. 1322/// Here we turn element accesses into insert/extract element operations. 1323/// This promotes a <4 x float> with a store of float to the third element 1324/// into a <4 x float> that uses insert element. 1325/// 2) A fully general blob of memory, which we turn into some (potentially 1326/// large) integer type with extract and insert operations where the loads 1327/// and stores would mutate the memory. 1328static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, 1329 unsigned AllocaSize, const TargetData &TD, 1330 LLVMContext &Context) { 1331 // If this could be contributing to a vector, analyze it. 1332 if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type. 1333 1334 // If the In type is a vector that is the same size as the alloca, see if it 1335 // matches the existing VecTy. 1336 if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { 1337 if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { 1338 // If we're storing/loading a vector of the right size, allow it as a 1339 // vector. If this the first vector we see, remember the type so that 1340 // we know the element size. 1341 if (VecTy == 0) 1342 VecTy = VInTy; 1343 return; 1344 } 1345 } else if (In->isFloatTy() || In->isDoubleTy() || 1346 (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 && 1347 isPowerOf2_32(In->getPrimitiveSizeInBits()))) { 1348 // If we're accessing something that could be an element of a vector, see 1349 // if the implied vector agrees with what we already have and if Offset is 1350 // compatible with it. 1351 unsigned EltSize = In->getPrimitiveSizeInBits()/8; 1352 if (Offset % EltSize == 0 && 1353 AllocaSize % EltSize == 0 && 1354 (VecTy == 0 || 1355 cast<VectorType>(VecTy)->getElementType() 1356 ->getPrimitiveSizeInBits()/8 == EltSize)) { 1357 if (VecTy == 0) 1358 VecTy = VectorType::get(In, AllocaSize/EltSize); 1359 return; 1360 } 1361 } 1362 } 1363 1364 // Otherwise, we have a case that we can't handle with an optimized vector 1365 // form. We can still turn this into a large integer. 1366 VecTy = Type::getVoidTy(Context); 1367} 1368 1369/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all 1370/// its accesses to a single vector type, return true and set VecTy to 1371/// the new type. If we could convert the alloca into a single promotable 1372/// integer, return true but set VecTy to VoidTy. Further, if the use is not a 1373/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset 1374/// is the current offset from the base of the alloca being analyzed. 1375/// 1376/// If we see at least one access to the value that is as a vector type, set the 1377/// SawVec flag. 1378bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, 1379 bool &SawVec, uint64_t Offset, 1380 unsigned AllocaSize) { 1381 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1382 Instruction *User = cast<Instruction>(*UI); 1383 1384 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1385 // Don't break volatile loads. 1386 if (LI->isVolatile()) 1387 return false; 1388 MergeInType(LI->getType(), Offset, VecTy, 1389 AllocaSize, *TD, V->getContext()); 1390 SawVec |= isa<VectorType>(LI->getType()); 1391 continue; 1392 } 1393 1394 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1395 // Storing the pointer, not into the value? 1396 if (SI->getOperand(0) == V || SI->isVolatile()) return 0; 1397 MergeInType(SI->getOperand(0)->getType(), Offset, 1398 VecTy, AllocaSize, *TD, V->getContext()); 1399 SawVec |= isa<VectorType>(SI->getOperand(0)->getType()); 1400 continue; 1401 } 1402 1403 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 1404 if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset, 1405 AllocaSize)) 1406 return false; 1407 IsNotTrivial = true; 1408 continue; 1409 } 1410 1411 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 1412 // If this is a GEP with a variable indices, we can't handle it. 1413 if (!GEP->hasAllConstantIndices()) 1414 return false; 1415 1416 // Compute the offset that this GEP adds to the pointer. 1417 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 1418 uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), 1419 &Indices[0], Indices.size()); 1420 // See if all uses can be converted. 1421 if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, 1422 AllocaSize)) 1423 return false; 1424 IsNotTrivial = true; 1425 continue; 1426 } 1427 1428 // If this is a constant sized memset of a constant value (e.g. 0) we can 1429 // handle it. 1430 if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 1431 // Store of constant value and constant size. 1432 if (isa<ConstantInt>(MSI->getValue()) && 1433 isa<ConstantInt>(MSI->getLength())) { 1434 IsNotTrivial = true; 1435 continue; 1436 } 1437 } 1438 1439 // If this is a memcpy or memmove into or out of the whole allocation, we 1440 // can handle it like a load or store of the scalar type. 1441 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 1442 if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength())) 1443 if (Len->getZExtValue() == AllocaSize && Offset == 0) { 1444 IsNotTrivial = true; 1445 continue; 1446 } 1447 } 1448 1449 // Ignore dbg intrinsic. 1450 if (isa<DbgInfoIntrinsic>(User)) 1451 continue; 1452 1453 // Otherwise, we cannot handle this! 1454 return false; 1455 } 1456 1457 return true; 1458} 1459 1460/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca 1461/// directly. This happens when we are converting an "integer union" to a 1462/// single integer scalar, or when we are converting a "vector union" to a 1463/// vector with insert/extractelement instructions. 1464/// 1465/// Offset is an offset from the original alloca, in bits that need to be 1466/// shifted to the right. By the end of this, there should be no uses of Ptr. 1467void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { 1468 while (!Ptr->use_empty()) { 1469 Instruction *User = cast<Instruction>(Ptr->use_back()); 1470 1471 if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 1472 ConvertUsesToScalar(CI, NewAI, Offset); 1473 CI->eraseFromParent(); 1474 continue; 1475 } 1476 1477 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 1478 // Compute the offset that this GEP adds to the pointer. 1479 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 1480 uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), 1481 &Indices[0], Indices.size()); 1482 ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); 1483 GEP->eraseFromParent(); 1484 continue; 1485 } 1486 1487 IRBuilder<> Builder(User->getParent(), User); 1488 1489 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1490 // The load is a bit extract from NewAI shifted right by Offset bits. 1491 Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); 1492 Value *NewLoadVal 1493 = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); 1494 LI->replaceAllUsesWith(NewLoadVal); 1495 LI->eraseFromParent(); 1496 continue; 1497 } 1498 1499 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1500 assert(SI->getOperand(0) != Ptr && "Consistency error!"); 1501 // FIXME: Remove once builder has Twine API. 1502 Value *Old = Builder.CreateLoad(NewAI, 1503 (NewAI->getName()+".in").str().c_str()); 1504 Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, 1505 Builder); 1506 Builder.CreateStore(New, NewAI); 1507 SI->eraseFromParent(); 1508 continue; 1509 } 1510 1511 // If this is a constant sized memset of a constant value (e.g. 0) we can 1512 // transform it into a store of the expanded constant value. 1513 if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 1514 assert(MSI->getRawDest() == Ptr && "Consistency error!"); 1515 unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 1516 if (NumBytes != 0) { 1517 unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); 1518 1519 // Compute the value replicated the right number of times. 1520 APInt APVal(NumBytes*8, Val); 1521 1522 // Splat the value if non-zero. 1523 if (Val) 1524 for (unsigned i = 1; i != NumBytes; ++i) 1525 APVal |= APVal << 8; 1526 1527 // FIXME: Remove once builder has Twine API. 1528 Value *Old = Builder.CreateLoad(NewAI, 1529 (NewAI->getName()+".in").str().c_str()); 1530 Value *New = ConvertScalar_InsertValue( 1531 ConstantInt::get(User->getContext(), APVal), 1532 Old, Offset, Builder); 1533 Builder.CreateStore(New, NewAI); 1534 } 1535 MSI->eraseFromParent(); 1536 continue; 1537 } 1538 1539 // If this is a memcpy or memmove into or out of the whole allocation, we 1540 // can handle it like a load or store of the scalar type. 1541 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 1542 assert(Offset == 0 && "must be store to start of alloca"); 1543 1544 // If the source and destination are both to the same alloca, then this is 1545 // a noop copy-to-self, just delete it. Otherwise, emit a load and store 1546 // as appropriate. 1547 AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject()); 1548 1549 if (MTI->getSource()->getUnderlyingObject() != OrigAI) { 1550 // Dest must be OrigAI, change this to be a load from the original 1551 // pointer (bitcasted), then a store to our new alloca. 1552 assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); 1553 Value *SrcPtr = MTI->getSource(); 1554 SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); 1555 1556 LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); 1557 SrcVal->setAlignment(MTI->getAlignment()); 1558 Builder.CreateStore(SrcVal, NewAI); 1559 } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) { 1560 // Src must be OrigAI, change this to be a load from NewAI then a store 1561 // through the original dest pointer (bitcasted). 1562 assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); 1563 LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); 1564 1565 Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); 1566 StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); 1567 NewStore->setAlignment(MTI->getAlignment()); 1568 } else { 1569 // Noop transfer. Src == Dst 1570 } 1571 1572 1573 MTI->eraseFromParent(); 1574 continue; 1575 } 1576 1577 // If user is a dbg info intrinsic then it is safe to remove it. 1578 if (isa<DbgInfoIntrinsic>(User)) { 1579 User->eraseFromParent(); 1580 continue; 1581 } 1582 1583 llvm_unreachable("Unsupported operation!"); 1584 } 1585} 1586 1587/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer 1588/// or vector value FromVal, extracting the bits from the offset specified by 1589/// Offset. This returns the value, which is of type ToType. 1590/// 1591/// This happens when we are converting an "integer union" to a single 1592/// integer scalar, or when we are converting a "vector union" to a vector with 1593/// insert/extractelement instructions. 1594/// 1595/// Offset is an offset from the original alloca, in bits that need to be 1596/// shifted to the right. 1597Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, 1598 uint64_t Offset, IRBuilder<> &Builder) { 1599 // If the load is of the whole new alloca, no conversion is needed. 1600 if (FromVal->getType() == ToType && Offset == 0) 1601 return FromVal; 1602 1603 // If the result alloca is a vector type, this is either an element 1604 // access or a bitcast to another vector type of the same size. 1605 if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { 1606 if (isa<VectorType>(ToType)) 1607 return Builder.CreateBitCast(FromVal, ToType, "tmp"); 1608 1609 // Otherwise it must be an element access. 1610 unsigned Elt = 0; 1611 if (Offset) { 1612 unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); 1613 Elt = Offset/EltSize; 1614 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 1615 } 1616 // Return the element extracted out of it. 1617 Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( 1618 Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); 1619 if (V->getType() != ToType) 1620 V = Builder.CreateBitCast(V, ToType, "tmp"); 1621 return V; 1622 } 1623 1624 // If ToType is a first class aggregate, extract out each of the pieces and 1625 // use insertvalue's to form the FCA. 1626 if (const StructType *ST = dyn_cast<StructType>(ToType)) { 1627 const StructLayout &Layout = *TD->getStructLayout(ST); 1628 Value *Res = UndefValue::get(ST); 1629 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 1630 Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), 1631 Offset+Layout.getElementOffsetInBits(i), 1632 Builder); 1633 Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 1634 } 1635 return Res; 1636 } 1637 1638 if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { 1639 uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); 1640 Value *Res = UndefValue::get(AT); 1641 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1642 Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), 1643 Offset+i*EltSize, Builder); 1644 Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 1645 } 1646 return Res; 1647 } 1648 1649 // Otherwise, this must be a union that was converted to an integer value. 1650 const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); 1651 1652 // If this is a big-endian system and the load is narrower than the 1653 // full alloca type, we need to do a shift to get the right bits. 1654 int ShAmt = 0; 1655 if (TD->isBigEndian()) { 1656 // On big-endian machines, the lowest bit is stored at the bit offset 1657 // from the pointer given by getTypeStoreSizeInBits. This matters for 1658 // integers with a bitwidth that is not a multiple of 8. 1659 ShAmt = TD->getTypeStoreSizeInBits(NTy) - 1660 TD->getTypeStoreSizeInBits(ToType) - Offset; 1661 } else { 1662 ShAmt = Offset; 1663 } 1664 1665 // Note: we support negative bitwidths (with shl) which are not defined. 1666 // We do this to support (f.e.) loads off the end of a structure where 1667 // only some bits are used. 1668 if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) 1669 FromVal = Builder.CreateLShr(FromVal, 1670 ConstantInt::get(FromVal->getType(), 1671 ShAmt), "tmp"); 1672 else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) 1673 FromVal = Builder.CreateShl(FromVal, 1674 ConstantInt::get(FromVal->getType(), 1675 -ShAmt), "tmp"); 1676 1677 // Finally, unconditionally truncate the integer to the right width. 1678 unsigned LIBitWidth = TD->getTypeSizeInBits(ToType); 1679 if (LIBitWidth < NTy->getBitWidth()) 1680 FromVal = 1681 Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 1682 LIBitWidth), "tmp"); 1683 else if (LIBitWidth > NTy->getBitWidth()) 1684 FromVal = 1685 Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 1686 LIBitWidth), "tmp"); 1687 1688 // If the result is an integer, this is a trunc or bitcast. 1689 if (isa<IntegerType>(ToType)) { 1690 // Should be done. 1691 } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) { 1692 // Just do a bitcast, we know the sizes match up. 1693 FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); 1694 } else { 1695 // Otherwise must be a pointer. 1696 FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); 1697 } 1698 assert(FromVal->getType() == ToType && "Didn't convert right?"); 1699 return FromVal; 1700} 1701 1702/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer 1703/// or vector value "Old" at the offset specified by Offset. 1704/// 1705/// This happens when we are converting an "integer union" to a 1706/// single integer scalar, or when we are converting a "vector union" to a 1707/// vector with insert/extractelement instructions. 1708/// 1709/// Offset is an offset from the original alloca, in bits that need to be 1710/// shifted to the right. 1711Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, 1712 uint64_t Offset, IRBuilder<> &Builder) { 1713 1714 // Convert the stored type to the actual type, shift it left to insert 1715 // then 'or' into place. 1716 const Type *AllocaType = Old->getType(); 1717 LLVMContext &Context = Old->getContext(); 1718 1719 if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { 1720 uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); 1721 uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType()); 1722 1723 // Changing the whole vector with memset or with an access of a different 1724 // vector type? 1725 if (ValSize == VecSize) 1726 return Builder.CreateBitCast(SV, AllocaType, "tmp"); 1727 1728 uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); 1729 1730 // Must be an element insertion. 1731 unsigned Elt = Offset/EltSize; 1732 1733 if (SV->getType() != VTy->getElementType()) 1734 SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); 1735 1736 SV = Builder.CreateInsertElement(Old, SV, 1737 ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), 1738 "tmp"); 1739 return SV; 1740 } 1741 1742 // If SV is a first-class aggregate value, insert each value recursively. 1743 if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { 1744 const StructLayout &Layout = *TD->getStructLayout(ST); 1745 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 1746 Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 1747 Old = ConvertScalar_InsertValue(Elt, Old, 1748 Offset+Layout.getElementOffsetInBits(i), 1749 Builder); 1750 } 1751 return Old; 1752 } 1753 1754 if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { 1755 uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); 1756 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1757 Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 1758 Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); 1759 } 1760 return Old; 1761 } 1762 1763 // If SV is a float, convert it to the appropriate integer type. 1764 // If it is a pointer, do the same. 1765 unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType()); 1766 unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); 1767 unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); 1768 unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); 1769 if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType())) 1770 SV = Builder.CreateBitCast(SV, 1771 IntegerType::get(SV->getContext(),SrcWidth), "tmp"); 1772 else if (isa<PointerType>(SV->getType())) 1773 SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp"); 1774 1775 // Zero extend or truncate the value if needed. 1776 if (SV->getType() != AllocaType) { 1777 if (SV->getType()->getPrimitiveSizeInBits() < 1778 AllocaType->getPrimitiveSizeInBits()) 1779 SV = Builder.CreateZExt(SV, AllocaType, "tmp"); 1780 else { 1781 // Truncation may be needed if storing more than the alloca can hold 1782 // (undefined behavior). 1783 SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); 1784 SrcWidth = DestWidth; 1785 SrcStoreWidth = DestStoreWidth; 1786 } 1787 } 1788 1789 // If this is a big-endian system and the store is narrower than the 1790 // full alloca type, we need to do a shift to get the right bits. 1791 int ShAmt = 0; 1792 if (TD->isBigEndian()) { 1793 // On big-endian machines, the lowest bit is stored at the bit offset 1794 // from the pointer given by getTypeStoreSizeInBits. This matters for 1795 // integers with a bitwidth that is not a multiple of 8. 1796 ShAmt = DestStoreWidth - SrcStoreWidth - Offset; 1797 } else { 1798 ShAmt = Offset; 1799 } 1800 1801 // Note: we support negative bitwidths (with shr) which are not defined. 1802 // We do this to support (f.e.) stores off the end of a structure where 1803 // only some bits in the structure are set. 1804 APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); 1805 if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { 1806 SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), 1807 ShAmt), "tmp"); 1808 Mask <<= ShAmt; 1809 } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { 1810 SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), 1811 -ShAmt), "tmp"); 1812 Mask = Mask.lshr(-ShAmt); 1813 } 1814 1815 // Mask out the bits we are about to insert from the old value, and or 1816 // in the new bits. 1817 if (SrcWidth != DestWidth) { 1818 assert(DestWidth > SrcWidth); 1819 Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); 1820 SV = Builder.CreateOr(Old, SV, "ins"); 1821 } 1822 return SV; 1823} 1824 1825 1826 1827/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to 1828/// some part of a constant global variable. This intentionally only accepts 1829/// constant expressions because we don't can't rewrite arbitrary instructions. 1830static bool PointsToConstantGlobal(Value *V) { 1831 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 1832 return GV->isConstant(); 1833 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 1834 if (CE->getOpcode() == Instruction::BitCast || 1835 CE->getOpcode() == Instruction::GetElementPtr) 1836 return PointsToConstantGlobal(CE->getOperand(0)); 1837 return false; 1838} 1839 1840/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 1841/// pointer to an alloca. Ignore any reads of the pointer, return false if we 1842/// see any stores or other unknown uses. If we see pointer arithmetic, keep 1843/// track of whether it moves the pointer (with isOffset) but otherwise traverse 1844/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 1845/// the alloca, and if the source pointer is a pointer to a constant global, we 1846/// can optimize this. 1847static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, 1848 bool isOffset) { 1849 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1850 if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) 1851 // Ignore non-volatile loads, they are always ok. 1852 if (!LI->isVolatile()) 1853 continue; 1854 1855 if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) { 1856 // If uses of the bitcast are ok, we are ok. 1857 if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) 1858 return false; 1859 continue; 1860 } 1861 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { 1862 // If the GEP has all zero indices, it doesn't offset the pointer. If it 1863 // doesn't, it does. 1864 if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, 1865 isOffset || !GEP->hasAllZeroIndices())) 1866 return false; 1867 continue; 1868 } 1869 1870 // If this is isn't our memcpy/memmove, reject it as something we can't 1871 // handle. 1872 if (!isa<MemTransferInst>(*UI)) 1873 return false; 1874 1875 // If we already have seen a copy, reject the second one. 1876 if (TheCopy) return false; 1877 1878 // If the pointer has been offset from the start of the alloca, we can't 1879 // safely handle this. 1880 if (isOffset) return false; 1881 1882 // If the memintrinsic isn't using the alloca as the dest, reject it. 1883 if (UI.getOperandNo() != 1) return false; 1884 1885 MemIntrinsic *MI = cast<MemIntrinsic>(*UI); 1886 1887 // If the source of the memcpy/move is not a constant global, reject it. 1888 if (!PointsToConstantGlobal(MI->getOperand(2))) 1889 return false; 1890 1891 // Otherwise, the transform is safe. Remember the copy instruction. 1892 TheCopy = MI; 1893 } 1894 return true; 1895} 1896 1897/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 1898/// modified by a copy from a constant global. If we can prove this, we can 1899/// replace any uses of the alloca with uses of the global directly. 1900Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { 1901 Instruction *TheCopy = 0; 1902 if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) 1903 return TheCopy; 1904 return 0; 1905} 1906