ScalarReplAggregates.cpp revision 6c95d24927b11fb76bc3e76ebe6a8198d74fc178
1//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This transformation implements the well known scalar replacement of 11// aggregates transformation. This xform breaks up alloca instructions of 12// aggregate type (structure or array) into individual alloca instructions for 13// each member (if possible). Then, if possible, it transforms the individual 14// alloca instructions into nice clean scalar SSA form. 15// 16// This combines a simple SRoA algorithm with the Mem2Reg algorithm because 17// often interact, especially for C++ programs. As such, iterating between 18// SRoA, then Mem2Reg until we run out of things to promote works well. 19// 20//===----------------------------------------------------------------------===// 21 22#define DEBUG_TYPE "scalarrepl" 23#include "llvm/Transforms/Scalar.h" 24#include "llvm/Constants.h" 25#include "llvm/DerivedTypes.h" 26#include "llvm/Function.h" 27#include "llvm/GlobalVariable.h" 28#include "llvm/Instructions.h" 29#include "llvm/IntrinsicInst.h" 30#include "llvm/LLVMContext.h" 31#include "llvm/Module.h" 32#include "llvm/Pass.h" 33#include "llvm/Analysis/Dominators.h" 34#include "llvm/Analysis/ValueTracking.h" 35#include "llvm/Target/TargetData.h" 36#include "llvm/Transforms/Utils/PromoteMemToReg.h" 37#include "llvm/Transforms/Utils/Local.h" 38#include "llvm/Transforms/Utils/SSAUpdater.h" 39#include "llvm/Support/CallSite.h" 40#include "llvm/Support/Debug.h" 41#include "llvm/Support/ErrorHandling.h" 42#include "llvm/Support/GetElementPtrTypeIterator.h" 43#include "llvm/Support/IRBuilder.h" 44#include "llvm/Support/MathExtras.h" 45#include "llvm/Support/raw_ostream.h" 46#include "llvm/ADT/SmallVector.h" 47#include "llvm/ADT/Statistic.h" 48using namespace llvm; 49 50STATISTIC(NumReplaced, "Number of allocas broken up"); 51STATISTIC(NumPromoted, "Number of allocas promoted"); 52STATISTIC(NumConverted, "Number of aggregates converted to scalar"); 53STATISTIC(NumGlobals, "Number of allocas copied from constant global"); 54 55namespace { 56 struct SROA : public FunctionPass { 57 SROA(int T, bool hasDT, char &ID) 58 : FunctionPass(ID), HasDomTree(hasDT) { 59 if (T == -1) 60 SRThreshold = 128; 61 else 62 SRThreshold = T; 63 } 64 65 bool runOnFunction(Function &F); 66 67 bool performScalarRepl(Function &F); 68 bool performPromotion(Function &F); 69 70 private: 71 bool HasDomTree; 72 TargetData *TD; 73 74 /// DeadInsts - Keep track of instructions we have made dead, so that 75 /// we can remove them after we are done working. 76 SmallVector<Value*, 32> DeadInsts; 77 78 /// AllocaInfo - When analyzing uses of an alloca instruction, this captures 79 /// information about the uses. All these fields are initialized to false 80 /// and set to true when something is learned. 81 struct AllocaInfo { 82 /// The alloca to promote. 83 AllocaInst *AI; 84 85 /// isUnsafe - This is set to true if the alloca cannot be SROA'd. 86 bool isUnsafe : 1; 87 88 /// isMemCpySrc - This is true if this aggregate is memcpy'd from. 89 bool isMemCpySrc : 1; 90 91 /// isMemCpyDst - This is true if this aggregate is memcpy'd into. 92 bool isMemCpyDst : 1; 93 94 /// hasSubelementAccess - This is true if a subelement of the alloca is 95 /// ever accessed, or false if the alloca is only accessed with mem 96 /// intrinsics or load/store that only access the entire alloca at once. 97 bool hasSubelementAccess : 1; 98 99 /// hasALoadOrStore - This is true if there are any loads or stores to it. 100 /// The alloca may just be accessed with memcpy, for example, which would 101 /// not set this. 102 bool hasALoadOrStore : 1; 103 104 explicit AllocaInfo(AllocaInst *ai) 105 : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), 106 hasSubelementAccess(false), hasALoadOrStore(false) {} 107 }; 108 109 unsigned SRThreshold; 110 111 void MarkUnsafe(AllocaInfo &I, Instruction *User) { 112 I.isUnsafe = true; 113 DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n'); 114 } 115 116 bool isSafeAllocaToScalarRepl(AllocaInst *AI); 117 118 void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info); 119 void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); 120 void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, 121 const Type *MemOpType, bool isStore, AllocaInfo &Info, 122 Instruction *TheAccess); 123 bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); 124 uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, 125 const Type *&IdxTy); 126 127 void DoScalarReplacement(AllocaInst *AI, 128 std::vector<AllocaInst*> &WorkList); 129 void DeleteDeadInstructions(); 130 131 void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 132 SmallVector<AllocaInst*, 32> &NewElts); 133 void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 134 SmallVector<AllocaInst*, 32> &NewElts); 135 void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 136 SmallVector<AllocaInst*, 32> &NewElts); 137 void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 138 AllocaInst *AI, 139 SmallVector<AllocaInst*, 32> &NewElts); 140 void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 141 SmallVector<AllocaInst*, 32> &NewElts); 142 void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 143 SmallVector<AllocaInst*, 32> &NewElts); 144 145 static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); 146 }; 147 148 // SROA_DT - SROA that uses DominatorTree. 149 struct SROA_DT : public SROA { 150 static char ID; 151 public: 152 SROA_DT(int T = -1) : SROA(T, true, ID) { 153 initializeSROA_DTPass(*PassRegistry::getPassRegistry()); 154 } 155 156 // getAnalysisUsage - This pass does not require any passes, but we know it 157 // will not alter the CFG, so say so. 158 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 159 AU.addRequired<DominatorTree>(); 160 AU.setPreservesCFG(); 161 } 162 }; 163 164 // SROA_SSAUp - SROA that uses SSAUpdater. 165 struct SROA_SSAUp : public SROA { 166 static char ID; 167 public: 168 SROA_SSAUp(int T = -1) : SROA(T, false, ID) { 169 initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); 170 } 171 172 // getAnalysisUsage - This pass does not require any passes, but we know it 173 // will not alter the CFG, so say so. 174 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 175 AU.setPreservesCFG(); 176 } 177 }; 178 179} 180 181char SROA_DT::ID = 0; 182char SROA_SSAUp::ID = 0; 183 184INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl", 185 "Scalar Replacement of Aggregates (DT)", false, false) 186INITIALIZE_PASS_DEPENDENCY(DominatorTree) 187INITIALIZE_PASS_END(SROA_DT, "scalarrepl", 188 "Scalar Replacement of Aggregates (DT)", false, false) 189 190INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa", 191 "Scalar Replacement of Aggregates (SSAUp)", false, false) 192INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa", 193 "Scalar Replacement of Aggregates (SSAUp)", false, false) 194 195// Public interface to the ScalarReplAggregates pass 196FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold, 197 bool UseDomTree) { 198 if (UseDomTree) 199 return new SROA_DT(Threshold); 200 return new SROA_SSAUp(Threshold); 201} 202 203 204//===----------------------------------------------------------------------===// 205// Convert To Scalar Optimization. 206//===----------------------------------------------------------------------===// 207 208namespace { 209/// ConvertToScalarInfo - This class implements the "Convert To Scalar" 210/// optimization, which scans the uses of an alloca and determines if it can 211/// rewrite it in terms of a single new alloca that can be mem2reg'd. 212class ConvertToScalarInfo { 213 /// AllocaSize - The size of the alloca being considered. 214 unsigned AllocaSize; 215 const TargetData &TD; 216 217 /// IsNotTrivial - This is set to true if there is some access to the object 218 /// which means that mem2reg can't promote it. 219 bool IsNotTrivial; 220 221 /// VectorTy - This tracks the type that we should promote the vector to if 222 /// it is possible to turn it into a vector. This starts out null, and if it 223 /// isn't possible to turn into a vector type, it gets set to VoidTy. 224 const Type *VectorTy; 225 226 /// HadAVector - True if there is at least one vector access to the alloca. 227 /// We don't want to turn random arrays into vectors and use vector element 228 /// insert/extract, but if there are element accesses to something that is 229 /// also declared as a vector, we do want to promote to a vector. 230 bool HadAVector; 231 232public: 233 explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) 234 : AllocaSize(Size), TD(td) { 235 IsNotTrivial = false; 236 VectorTy = 0; 237 HadAVector = false; 238 } 239 240 AllocaInst *TryConvert(AllocaInst *AI); 241 242private: 243 bool CanConvertToScalar(Value *V, uint64_t Offset); 244 void MergeInType(const Type *In, uint64_t Offset); 245 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); 246 247 Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, 248 uint64_t Offset, IRBuilder<> &Builder); 249 Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, 250 uint64_t Offset, IRBuilder<> &Builder); 251}; 252} // end anonymous namespace. 253 254 255/// TryConvert - Analyze the specified alloca, and if it is safe to do so, 256/// rewrite it to be a new alloca which is mem2reg'able. This returns the new 257/// alloca if possible or null if not. 258AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { 259 // If we can't convert this scalar, or if mem2reg can trivially do it, bail 260 // out. 261 if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) 262 return 0; 263 264 // If we were able to find a vector type that can handle this with 265 // insert/extract elements, and if there was at least one use that had 266 // a vector type, promote this to a vector. We don't want to promote 267 // random stuff that doesn't use vectors (e.g. <9 x double>) because then 268 // we just get a lot of insert/extracts. If at least one vector is 269 // involved, then we probably really do have a union of vector/array. 270 const Type *NewTy; 271 if (VectorTy && VectorTy->isVectorTy() && HadAVector) { 272 DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " 273 << *VectorTy << '\n'); 274 NewTy = VectorTy; // Use the vector type. 275 } else { 276 DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); 277 // Create and insert the integer alloca. 278 NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); 279 } 280 AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); 281 ConvertUsesToScalar(AI, NewAI, 0); 282 return NewAI; 283} 284 285/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) 286/// so far at the offset specified by Offset (which is specified in bytes). 287/// 288/// There are two cases we handle here: 289/// 1) A union of vector types of the same size and potentially its elements. 290/// Here we turn element accesses into insert/extract element operations. 291/// This promotes a <4 x float> with a store of float to the third element 292/// into a <4 x float> that uses insert element. 293/// 2) A fully general blob of memory, which we turn into some (potentially 294/// large) integer type with extract and insert operations where the loads 295/// and stores would mutate the memory. We mark this by setting VectorTy 296/// to VoidTy. 297void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { 298 // If we already decided to turn this into a blob of integer memory, there is 299 // nothing to be done. 300 if (VectorTy && VectorTy->isVoidTy()) 301 return; 302 303 // If this could be contributing to a vector, analyze it. 304 305 // If the In type is a vector that is the same size as the alloca, see if it 306 // matches the existing VecTy. 307 if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { 308 // Remember if we saw a vector type. 309 HadAVector = true; 310 311 if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { 312 // If we're storing/loading a vector of the right size, allow it as a 313 // vector. If this the first vector we see, remember the type so that 314 // we know the element size. If this is a subsequent access, ignore it 315 // even if it is a differing type but the same size. Worst case we can 316 // bitcast the resultant vectors. 317 if (VectorTy == 0) 318 VectorTy = VInTy; 319 return; 320 } 321 } else if (In->isFloatTy() || In->isDoubleTy() || 322 (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && 323 isPowerOf2_32(In->getPrimitiveSizeInBits()))) { 324 // If we're accessing something that could be an element of a vector, see 325 // if the implied vector agrees with what we already have and if Offset is 326 // compatible with it. 327 unsigned EltSize = In->getPrimitiveSizeInBits()/8; 328 if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && 329 (VectorTy == 0 || 330 cast<VectorType>(VectorTy)->getElementType() 331 ->getPrimitiveSizeInBits()/8 == EltSize)) { 332 if (VectorTy == 0) 333 VectorTy = VectorType::get(In, AllocaSize/EltSize); 334 return; 335 } 336 } 337 338 // Otherwise, we have a case that we can't handle with an optimized vector 339 // form. We can still turn this into a large integer. 340 VectorTy = Type::getVoidTy(In->getContext()); 341} 342 343/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all 344/// its accesses to a single vector type, return true and set VecTy to 345/// the new type. If we could convert the alloca into a single promotable 346/// integer, return true but set VecTy to VoidTy. Further, if the use is not a 347/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset 348/// is the current offset from the base of the alloca being analyzed. 349/// 350/// If we see at least one access to the value that is as a vector type, set the 351/// SawVec flag. 352bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { 353 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 354 Instruction *User = cast<Instruction>(*UI); 355 356 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 357 // Don't break volatile loads. 358 if (LI->isVolatile()) 359 return false; 360 // Don't touch MMX operations. 361 if (LI->getType()->isX86_MMXTy()) 362 return false; 363 MergeInType(LI->getType(), Offset); 364 continue; 365 } 366 367 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 368 // Storing the pointer, not into the value? 369 if (SI->getOperand(0) == V || SI->isVolatile()) return false; 370 // Don't touch MMX operations. 371 if (SI->getOperand(0)->getType()->isX86_MMXTy()) 372 return false; 373 MergeInType(SI->getOperand(0)->getType(), Offset); 374 continue; 375 } 376 377 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 378 IsNotTrivial = true; // Can't be mem2reg'd. 379 if (!CanConvertToScalar(BCI, Offset)) 380 return false; 381 continue; 382 } 383 384 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 385 // If this is a GEP with a variable indices, we can't handle it. 386 if (!GEP->hasAllConstantIndices()) 387 return false; 388 389 // Compute the offset that this GEP adds to the pointer. 390 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 391 uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), 392 &Indices[0], Indices.size()); 393 // See if all uses can be converted. 394 if (!CanConvertToScalar(GEP, Offset+GEPOffset)) 395 return false; 396 IsNotTrivial = true; // Can't be mem2reg'd. 397 continue; 398 } 399 400 // If this is a constant sized memset of a constant value (e.g. 0) we can 401 // handle it. 402 if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 403 // Store of constant value and constant size. 404 if (!isa<ConstantInt>(MSI->getValue()) || 405 !isa<ConstantInt>(MSI->getLength())) 406 return false; 407 IsNotTrivial = true; // Can't be mem2reg'd. 408 continue; 409 } 410 411 // If this is a memcpy or memmove into or out of the whole allocation, we 412 // can handle it like a load or store of the scalar type. 413 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 414 ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); 415 if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) 416 return false; 417 418 IsNotTrivial = true; // Can't be mem2reg'd. 419 continue; 420 } 421 422 // Otherwise, we cannot handle this! 423 return false; 424 } 425 426 return true; 427} 428 429/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca 430/// directly. This happens when we are converting an "integer union" to a 431/// single integer scalar, or when we are converting a "vector union" to a 432/// vector with insert/extractelement instructions. 433/// 434/// Offset is an offset from the original alloca, in bits that need to be 435/// shifted to the right. By the end of this, there should be no uses of Ptr. 436void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, 437 uint64_t Offset) { 438 while (!Ptr->use_empty()) { 439 Instruction *User = cast<Instruction>(Ptr->use_back()); 440 441 if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 442 ConvertUsesToScalar(CI, NewAI, Offset); 443 CI->eraseFromParent(); 444 continue; 445 } 446 447 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 448 // Compute the offset that this GEP adds to the pointer. 449 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 450 uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), 451 &Indices[0], Indices.size()); 452 ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); 453 GEP->eraseFromParent(); 454 continue; 455 } 456 457 IRBuilder<> Builder(User); 458 459 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 460 // The load is a bit extract from NewAI shifted right by Offset bits. 461 Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); 462 Value *NewLoadVal 463 = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); 464 LI->replaceAllUsesWith(NewLoadVal); 465 LI->eraseFromParent(); 466 continue; 467 } 468 469 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 470 assert(SI->getOperand(0) != Ptr && "Consistency error!"); 471 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 472 Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, 473 Builder); 474 Builder.CreateStore(New, NewAI); 475 SI->eraseFromParent(); 476 477 // If the load we just inserted is now dead, then the inserted store 478 // overwrote the entire thing. 479 if (Old->use_empty()) 480 Old->eraseFromParent(); 481 continue; 482 } 483 484 // If this is a constant sized memset of a constant value (e.g. 0) we can 485 // transform it into a store of the expanded constant value. 486 if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 487 assert(MSI->getRawDest() == Ptr && "Consistency error!"); 488 unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 489 if (NumBytes != 0) { 490 unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); 491 492 // Compute the value replicated the right number of times. 493 APInt APVal(NumBytes*8, Val); 494 495 // Splat the value if non-zero. 496 if (Val) 497 for (unsigned i = 1; i != NumBytes; ++i) 498 APVal |= APVal << 8; 499 500 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 501 Value *New = ConvertScalar_InsertValue( 502 ConstantInt::get(User->getContext(), APVal), 503 Old, Offset, Builder); 504 Builder.CreateStore(New, NewAI); 505 506 // If the load we just inserted is now dead, then the memset overwrote 507 // the entire thing. 508 if (Old->use_empty()) 509 Old->eraseFromParent(); 510 } 511 MSI->eraseFromParent(); 512 continue; 513 } 514 515 // If this is a memcpy or memmove into or out of the whole allocation, we 516 // can handle it like a load or store of the scalar type. 517 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 518 assert(Offset == 0 && "must be store to start of alloca"); 519 520 // If the source and destination are both to the same alloca, then this is 521 // a noop copy-to-self, just delete it. Otherwise, emit a load and store 522 // as appropriate. 523 AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, 0)); 524 525 if (GetUnderlyingObject(MTI->getSource(), 0) != OrigAI) { 526 // Dest must be OrigAI, change this to be a load from the original 527 // pointer (bitcasted), then a store to our new alloca. 528 assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); 529 Value *SrcPtr = MTI->getSource(); 530 const PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); 531 const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); 532 if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { 533 AIPTy = PointerType::get(AIPTy->getElementType(), 534 SPTy->getAddressSpace()); 535 } 536 SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy); 537 538 LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); 539 SrcVal->setAlignment(MTI->getAlignment()); 540 Builder.CreateStore(SrcVal, NewAI); 541 } else if (GetUnderlyingObject(MTI->getDest(), 0) != OrigAI) { 542 // Src must be OrigAI, change this to be a load from NewAI then a store 543 // through the original dest pointer (bitcasted). 544 assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); 545 LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); 546 547 const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); 548 const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); 549 if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { 550 AIPTy = PointerType::get(AIPTy->getElementType(), 551 DPTy->getAddressSpace()); 552 } 553 Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy); 554 555 StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); 556 NewStore->setAlignment(MTI->getAlignment()); 557 } else { 558 // Noop transfer. Src == Dst 559 } 560 561 MTI->eraseFromParent(); 562 continue; 563 } 564 565 llvm_unreachable("Unsupported operation!"); 566 } 567} 568 569/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer 570/// or vector value FromVal, extracting the bits from the offset specified by 571/// Offset. This returns the value, which is of type ToType. 572/// 573/// This happens when we are converting an "integer union" to a single 574/// integer scalar, or when we are converting a "vector union" to a vector with 575/// insert/extractelement instructions. 576/// 577/// Offset is an offset from the original alloca, in bits that need to be 578/// shifted to the right. 579Value *ConvertToScalarInfo:: 580ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, 581 uint64_t Offset, IRBuilder<> &Builder) { 582 // If the load is of the whole new alloca, no conversion is needed. 583 if (FromVal->getType() == ToType && Offset == 0) 584 return FromVal; 585 586 // If the result alloca is a vector type, this is either an element 587 // access or a bitcast to another vector type of the same size. 588 if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { 589 if (ToType->isVectorTy()) 590 return Builder.CreateBitCast(FromVal, ToType, "tmp"); 591 592 // Otherwise it must be an element access. 593 unsigned Elt = 0; 594 if (Offset) { 595 unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); 596 Elt = Offset/EltSize; 597 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 598 } 599 // Return the element extracted out of it. 600 Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( 601 Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); 602 if (V->getType() != ToType) 603 V = Builder.CreateBitCast(V, ToType, "tmp"); 604 return V; 605 } 606 607 // If ToType is a first class aggregate, extract out each of the pieces and 608 // use insertvalue's to form the FCA. 609 if (const StructType *ST = dyn_cast<StructType>(ToType)) { 610 const StructLayout &Layout = *TD.getStructLayout(ST); 611 Value *Res = UndefValue::get(ST); 612 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 613 Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), 614 Offset+Layout.getElementOffsetInBits(i), 615 Builder); 616 Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 617 } 618 return Res; 619 } 620 621 if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { 622 uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); 623 Value *Res = UndefValue::get(AT); 624 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 625 Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), 626 Offset+i*EltSize, Builder); 627 Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 628 } 629 return Res; 630 } 631 632 // Otherwise, this must be a union that was converted to an integer value. 633 const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); 634 635 // If this is a big-endian system and the load is narrower than the 636 // full alloca type, we need to do a shift to get the right bits. 637 int ShAmt = 0; 638 if (TD.isBigEndian()) { 639 // On big-endian machines, the lowest bit is stored at the bit offset 640 // from the pointer given by getTypeStoreSizeInBits. This matters for 641 // integers with a bitwidth that is not a multiple of 8. 642 ShAmt = TD.getTypeStoreSizeInBits(NTy) - 643 TD.getTypeStoreSizeInBits(ToType) - Offset; 644 } else { 645 ShAmt = Offset; 646 } 647 648 // Note: we support negative bitwidths (with shl) which are not defined. 649 // We do this to support (f.e.) loads off the end of a structure where 650 // only some bits are used. 651 if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) 652 FromVal = Builder.CreateLShr(FromVal, 653 ConstantInt::get(FromVal->getType(), 654 ShAmt), "tmp"); 655 else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) 656 FromVal = Builder.CreateShl(FromVal, 657 ConstantInt::get(FromVal->getType(), 658 -ShAmt), "tmp"); 659 660 // Finally, unconditionally truncate the integer to the right width. 661 unsigned LIBitWidth = TD.getTypeSizeInBits(ToType); 662 if (LIBitWidth < NTy->getBitWidth()) 663 FromVal = 664 Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 665 LIBitWidth), "tmp"); 666 else if (LIBitWidth > NTy->getBitWidth()) 667 FromVal = 668 Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 669 LIBitWidth), "tmp"); 670 671 // If the result is an integer, this is a trunc or bitcast. 672 if (ToType->isIntegerTy()) { 673 // Should be done. 674 } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { 675 // Just do a bitcast, we know the sizes match up. 676 FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); 677 } else { 678 // Otherwise must be a pointer. 679 FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); 680 } 681 assert(FromVal->getType() == ToType && "Didn't convert right?"); 682 return FromVal; 683} 684 685/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer 686/// or vector value "Old" at the offset specified by Offset. 687/// 688/// This happens when we are converting an "integer union" to a 689/// single integer scalar, or when we are converting a "vector union" to a 690/// vector with insert/extractelement instructions. 691/// 692/// Offset is an offset from the original alloca, in bits that need to be 693/// shifted to the right. 694Value *ConvertToScalarInfo:: 695ConvertScalar_InsertValue(Value *SV, Value *Old, 696 uint64_t Offset, IRBuilder<> &Builder) { 697 // Convert the stored type to the actual type, shift it left to insert 698 // then 'or' into place. 699 const Type *AllocaType = Old->getType(); 700 LLVMContext &Context = Old->getContext(); 701 702 if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { 703 uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy); 704 uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType()); 705 706 // Changing the whole vector with memset or with an access of a different 707 // vector type? 708 if (ValSize == VecSize) 709 return Builder.CreateBitCast(SV, AllocaType, "tmp"); 710 711 uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); 712 713 // Must be an element insertion. 714 unsigned Elt = Offset/EltSize; 715 716 if (SV->getType() != VTy->getElementType()) 717 SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); 718 719 SV = Builder.CreateInsertElement(Old, SV, 720 ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), 721 "tmp"); 722 return SV; 723 } 724 725 // If SV is a first-class aggregate value, insert each value recursively. 726 if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { 727 const StructLayout &Layout = *TD.getStructLayout(ST); 728 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 729 Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 730 Old = ConvertScalar_InsertValue(Elt, Old, 731 Offset+Layout.getElementOffsetInBits(i), 732 Builder); 733 } 734 return Old; 735 } 736 737 if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { 738 uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); 739 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 740 Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 741 Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); 742 } 743 return Old; 744 } 745 746 // If SV is a float, convert it to the appropriate integer type. 747 // If it is a pointer, do the same. 748 unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType()); 749 unsigned DestWidth = TD.getTypeSizeInBits(AllocaType); 750 unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType()); 751 unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType); 752 if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) 753 SV = Builder.CreateBitCast(SV, 754 IntegerType::get(SV->getContext(),SrcWidth), "tmp"); 755 else if (SV->getType()->isPointerTy()) 756 SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp"); 757 758 // Zero extend or truncate the value if needed. 759 if (SV->getType() != AllocaType) { 760 if (SV->getType()->getPrimitiveSizeInBits() < 761 AllocaType->getPrimitiveSizeInBits()) 762 SV = Builder.CreateZExt(SV, AllocaType, "tmp"); 763 else { 764 // Truncation may be needed if storing more than the alloca can hold 765 // (undefined behavior). 766 SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); 767 SrcWidth = DestWidth; 768 SrcStoreWidth = DestStoreWidth; 769 } 770 } 771 772 // If this is a big-endian system and the store is narrower than the 773 // full alloca type, we need to do a shift to get the right bits. 774 int ShAmt = 0; 775 if (TD.isBigEndian()) { 776 // On big-endian machines, the lowest bit is stored at the bit offset 777 // from the pointer given by getTypeStoreSizeInBits. This matters for 778 // integers with a bitwidth that is not a multiple of 8. 779 ShAmt = DestStoreWidth - SrcStoreWidth - Offset; 780 } else { 781 ShAmt = Offset; 782 } 783 784 // Note: we support negative bitwidths (with shr) which are not defined. 785 // We do this to support (f.e.) stores off the end of a structure where 786 // only some bits in the structure are set. 787 APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); 788 if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { 789 SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), 790 ShAmt), "tmp"); 791 Mask <<= ShAmt; 792 } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { 793 SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), 794 -ShAmt), "tmp"); 795 Mask = Mask.lshr(-ShAmt); 796 } 797 798 // Mask out the bits we are about to insert from the old value, and or 799 // in the new bits. 800 if (SrcWidth != DestWidth) { 801 assert(DestWidth > SrcWidth); 802 Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); 803 SV = Builder.CreateOr(Old, SV, "ins"); 804 } 805 return SV; 806} 807 808 809//===----------------------------------------------------------------------===// 810// SRoA Driver 811//===----------------------------------------------------------------------===// 812 813 814bool SROA::runOnFunction(Function &F) { 815 TD = getAnalysisIfAvailable<TargetData>(); 816 817 bool Changed = performPromotion(F); 818 819 // FIXME: ScalarRepl currently depends on TargetData more than it 820 // theoretically needs to. It should be refactored in order to support 821 // target-independent IR. Until this is done, just skip the actual 822 // scalar-replacement portion of this pass. 823 if (!TD) return Changed; 824 825 while (1) { 826 bool LocalChange = performScalarRepl(F); 827 if (!LocalChange) break; // No need to repromote if no scalarrepl 828 Changed = true; 829 LocalChange = performPromotion(F); 830 if (!LocalChange) break; // No need to re-scalarrepl if no promotion 831 } 832 833 return Changed; 834} 835 836namespace { 837class AllocaPromoter : public LoadAndStorePromoter { 838 AllocaInst *AI; 839public: 840 AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S) 841 : LoadAndStorePromoter(Insts, S), AI(0) {} 842 843 void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { 844 // Remember which alloca we're promoting (for isInstInList). 845 this->AI = AI; 846 LoadAndStorePromoter::run(Insts); 847 AI->eraseFromParent(); 848 } 849 850 virtual bool isInstInList(Instruction *I, 851 const SmallVectorImpl<Instruction*> &Insts) const { 852 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 853 return LI->getOperand(0) == AI; 854 return cast<StoreInst>(I)->getPointerOperand() == AI; 855 } 856}; 857} // end anon namespace 858 859bool SROA::performPromotion(Function &F) { 860 std::vector<AllocaInst*> Allocas; 861 DominatorTree *DT = 0; 862 if (HasDomTree) 863 DT = &getAnalysis<DominatorTree>(); 864 865 BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 866 867 bool Changed = false; 868 SmallVector<Instruction*, 64> Insts; 869 while (1) { 870 Allocas.clear(); 871 872 // Find allocas that are safe to promote, by looking at all instructions in 873 // the entry node 874 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 875 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 876 if (isAllocaPromotable(AI)) 877 Allocas.push_back(AI); 878 879 if (Allocas.empty()) break; 880 881 if (HasDomTree) 882 PromoteMemToReg(Allocas, *DT); 883 else { 884 SSAUpdater SSA; 885 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 886 AllocaInst *AI = Allocas[i]; 887 888 // Build list of instructions to promote. 889 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 890 UI != E; ++UI) 891 Insts.push_back(cast<Instruction>(*UI)); 892 893 AllocaPromoter(Insts, SSA).run(AI, Insts); 894 Insts.clear(); 895 } 896 } 897 NumPromoted += Allocas.size(); 898 Changed = true; 899 } 900 901 return Changed; 902} 903 904 905/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for 906/// SROA. It must be a struct or array type with a small number of elements. 907static bool ShouldAttemptScalarRepl(AllocaInst *AI) { 908 const Type *T = AI->getAllocatedType(); 909 // Do not promote any struct into more than 32 separate vars. 910 if (const StructType *ST = dyn_cast<StructType>(T)) 911 return ST->getNumElements() <= 32; 912 // Arrays are much less likely to be safe for SROA; only consider 913 // them if they are very small. 914 if (const ArrayType *AT = dyn_cast<ArrayType>(T)) 915 return AT->getNumElements() <= 8; 916 return false; 917} 918 919 920// performScalarRepl - This algorithm is a simple worklist driven algorithm, 921// which runs on all of the malloc/alloca instructions in the function, removing 922// them if they are only used by getelementptr instructions. 923// 924bool SROA::performScalarRepl(Function &F) { 925 std::vector<AllocaInst*> WorkList; 926 927 // Scan the entry basic block, adding allocas to the worklist. 928 BasicBlock &BB = F.getEntryBlock(); 929 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 930 if (AllocaInst *A = dyn_cast<AllocaInst>(I)) 931 WorkList.push_back(A); 932 933 // Process the worklist 934 bool Changed = false; 935 while (!WorkList.empty()) { 936 AllocaInst *AI = WorkList.back(); 937 WorkList.pop_back(); 938 939 // Handle dead allocas trivially. These can be formed by SROA'ing arrays 940 // with unused elements. 941 if (AI->use_empty()) { 942 AI->eraseFromParent(); 943 Changed = true; 944 continue; 945 } 946 947 // If this alloca is impossible for us to promote, reject it early. 948 if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) 949 continue; 950 951 // Check to see if this allocation is only modified by a memcpy/memmove from 952 // a constant global. If this is the case, we can change all users to use 953 // the constant global instead. This is commonly produced by the CFE by 954 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 955 // is only subsequently read. 956 if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { 957 DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); 958 DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); 959 Constant *TheSrc = cast<Constant>(TheCopy->getSource()); 960 AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); 961 TheCopy->eraseFromParent(); // Don't mutate the global. 962 AI->eraseFromParent(); 963 ++NumGlobals; 964 Changed = true; 965 continue; 966 } 967 968 // Check to see if we can perform the core SROA transformation. We cannot 969 // transform the allocation instruction if it is an array allocation 970 // (allocations OF arrays are ok though), and an allocation of a scalar 971 // value cannot be decomposed at all. 972 uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); 973 974 // Do not promote [0 x %struct]. 975 if (AllocaSize == 0) continue; 976 977 // Do not promote any struct whose size is too big. 978 if (AllocaSize > SRThreshold) continue; 979 980 // If the alloca looks like a good candidate for scalar replacement, and if 981 // all its users can be transformed, then split up the aggregate into its 982 // separate elements. 983 if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { 984 DoScalarReplacement(AI, WorkList); 985 Changed = true; 986 continue; 987 } 988 989 // If we can turn this aggregate value (potentially with casts) into a 990 // simple scalar value that can be mem2reg'd into a register value. 991 // IsNotTrivial tracks whether this is something that mem2reg could have 992 // promoted itself. If so, we don't want to transform it needlessly. Note 993 // that we can't just check based on the type: the alloca may be of an i32 994 // but that has pointer arithmetic to set byte 3 of it or something. 995 if (AllocaInst *NewAI = 996 ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) { 997 NewAI->takeName(AI); 998 AI->eraseFromParent(); 999 ++NumConverted; 1000 Changed = true; 1001 continue; 1002 } 1003 1004 // Otherwise, couldn't process this alloca. 1005 } 1006 1007 return Changed; 1008} 1009 1010/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 1011/// predicate, do SROA now. 1012void SROA::DoScalarReplacement(AllocaInst *AI, 1013 std::vector<AllocaInst*> &WorkList) { 1014 DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); 1015 SmallVector<AllocaInst*, 32> ElementAllocas; 1016 if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 1017 ElementAllocas.reserve(ST->getNumContainedTypes()); 1018 for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 1019 AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 1020 AI->getAlignment(), 1021 AI->getName() + "." + Twine(i), AI); 1022 ElementAllocas.push_back(NA); 1023 WorkList.push_back(NA); // Add to worklist for recursive processing 1024 } 1025 } else { 1026 const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 1027 ElementAllocas.reserve(AT->getNumElements()); 1028 const Type *ElTy = AT->getElementType(); 1029 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1030 AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), 1031 AI->getName() + "." + Twine(i), AI); 1032 ElementAllocas.push_back(NA); 1033 WorkList.push_back(NA); // Add to worklist for recursive processing 1034 } 1035 } 1036 1037 // Now that we have created the new alloca instructions, rewrite all the 1038 // uses of the old alloca. 1039 RewriteForScalarRepl(AI, AI, 0, ElementAllocas); 1040 1041 // Now erase any instructions that were made dead while rewriting the alloca. 1042 DeleteDeadInstructions(); 1043 AI->eraseFromParent(); 1044 1045 ++NumReplaced; 1046} 1047 1048/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, 1049/// recursively including all their operands that become trivially dead. 1050void SROA::DeleteDeadInstructions() { 1051 while (!DeadInsts.empty()) { 1052 Instruction *I = cast<Instruction>(DeadInsts.pop_back_val()); 1053 1054 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 1055 if (Instruction *U = dyn_cast<Instruction>(*OI)) { 1056 // Zero out the operand and see if it becomes trivially dead. 1057 // (But, don't add allocas to the dead instruction list -- they are 1058 // already on the worklist and will be deleted separately.) 1059 *OI = 0; 1060 if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U)) 1061 DeadInsts.push_back(U); 1062 } 1063 1064 I->eraseFromParent(); 1065 } 1066} 1067 1068/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to 1069/// performing scalar replacement of alloca AI. The results are flagged in 1070/// the Info parameter. Offset indicates the position within AI that is 1071/// referenced by this instruction. 1072void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, 1073 AllocaInfo &Info) { 1074 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 1075 Instruction *User = cast<Instruction>(*UI); 1076 1077 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1078 isSafeForScalarRepl(BC, Offset, Info); 1079 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1080 uint64_t GEPOffset = Offset; 1081 isSafeGEP(GEPI, GEPOffset, Info); 1082 if (!Info.isUnsafe) 1083 isSafeForScalarRepl(GEPI, GEPOffset, Info); 1084 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 1085 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 1086 if (Length == 0) 1087 return MarkUnsafe(Info, User); 1088 isSafeMemAccess(Offset, Length->getZExtValue(), 0, 1089 UI.getOperandNo() == 0, Info, MI); 1090 } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1091 if (LI->isVolatile()) 1092 return MarkUnsafe(Info, User); 1093 const Type *LIType = LI->getType(); 1094 isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), 1095 LIType, false, Info, LI); 1096 Info.hasALoadOrStore = true; 1097 1098 } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1099 // Store is ok if storing INTO the pointer, not storing the pointer 1100 if (SI->isVolatile() || SI->getOperand(0) == I) 1101 return MarkUnsafe(Info, User); 1102 1103 const Type *SIType = SI->getOperand(0)->getType(); 1104 isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), 1105 SIType, true, Info, SI); 1106 Info.hasALoadOrStore = true; 1107 } else { 1108 return MarkUnsafe(Info, User); 1109 } 1110 if (Info.isUnsafe) return; 1111 } 1112} 1113 1114/// isSafeGEP - Check if a GEP instruction can be handled for scalar 1115/// replacement. It is safe when all the indices are constant, in-bounds 1116/// references, and when the resulting offset corresponds to an element within 1117/// the alloca type. The results are flagged in the Info parameter. Upon 1118/// return, Offset is adjusted as specified by the GEP indices. 1119void SROA::isSafeGEP(GetElementPtrInst *GEPI, 1120 uint64_t &Offset, AllocaInfo &Info) { 1121 gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); 1122 if (GEPIt == E) 1123 return; 1124 1125 // Walk through the GEP type indices, checking the types that this indexes 1126 // into. 1127 for (; GEPIt != E; ++GEPIt) { 1128 // Ignore struct elements, no extra checking needed for these. 1129 if ((*GEPIt)->isStructTy()) 1130 continue; 1131 1132 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); 1133 if (!IdxVal) 1134 return MarkUnsafe(Info, GEPI); 1135 } 1136 1137 // Compute the offset due to this GEP and check if the alloca has a 1138 // component element at that offset. 1139 SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 1140 Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), 1141 &Indices[0], Indices.size()); 1142 if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) 1143 MarkUnsafe(Info, GEPI); 1144} 1145 1146/// isHomogeneousAggregate - Check if type T is a struct or array containing 1147/// elements of the same type (which is always true for arrays). If so, 1148/// return true with NumElts and EltTy set to the number of elements and the 1149/// element type, respectively. 1150static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts, 1151 const Type *&EltTy) { 1152 if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { 1153 NumElts = AT->getNumElements(); 1154 EltTy = (NumElts == 0 ? 0 : AT->getElementType()); 1155 return true; 1156 } 1157 if (const StructType *ST = dyn_cast<StructType>(T)) { 1158 NumElts = ST->getNumContainedTypes(); 1159 EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); 1160 for (unsigned n = 1; n < NumElts; ++n) { 1161 if (ST->getContainedType(n) != EltTy) 1162 return false; 1163 } 1164 return true; 1165 } 1166 return false; 1167} 1168 1169/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are 1170/// "homogeneous" aggregates with the same element type and number of elements. 1171static bool isCompatibleAggregate(const Type *T1, const Type *T2) { 1172 if (T1 == T2) 1173 return true; 1174 1175 unsigned NumElts1, NumElts2; 1176 const Type *EltTy1, *EltTy2; 1177 if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && 1178 isHomogeneousAggregate(T2, NumElts2, EltTy2) && 1179 NumElts1 == NumElts2 && 1180 EltTy1 == EltTy2) 1181 return true; 1182 1183 return false; 1184} 1185 1186/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI 1187/// alloca or has an offset and size that corresponds to a component element 1188/// within it. The offset checked here may have been formed from a GEP with a 1189/// pointer bitcasted to a different type. 1190void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, 1191 const Type *MemOpType, bool isStore, 1192 AllocaInfo &Info, Instruction *TheAccess) { 1193 // Check if this is a load/store of the entire alloca. 1194 if (Offset == 0 && 1195 MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) { 1196 // This can be safe for MemIntrinsics (where MemOpType is 0) and integer 1197 // loads/stores (which are essentially the same as the MemIntrinsics with 1198 // regard to copying padding between elements). But, if an alloca is 1199 // flagged as both a source and destination of such operations, we'll need 1200 // to check later for padding between elements. 1201 if (!MemOpType || MemOpType->isIntegerTy()) { 1202 if (isStore) 1203 Info.isMemCpyDst = true; 1204 else 1205 Info.isMemCpySrc = true; 1206 return; 1207 } 1208 // This is also safe for references using a type that is compatible with 1209 // the type of the alloca, so that loads/stores can be rewritten using 1210 // insertvalue/extractvalue. 1211 if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) { 1212 Info.hasSubelementAccess = true; 1213 return; 1214 } 1215 } 1216 // Check if the offset/size correspond to a component within the alloca type. 1217 const Type *T = Info.AI->getAllocatedType(); 1218 if (TypeHasComponent(T, Offset, MemSize)) { 1219 Info.hasSubelementAccess = true; 1220 return; 1221 } 1222 1223 return MarkUnsafe(Info, TheAccess); 1224} 1225 1226/// TypeHasComponent - Return true if T has a component type with the 1227/// specified offset and size. If Size is zero, do not check the size. 1228bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { 1229 const Type *EltTy; 1230 uint64_t EltSize; 1231 if (const StructType *ST = dyn_cast<StructType>(T)) { 1232 const StructLayout *Layout = TD->getStructLayout(ST); 1233 unsigned EltIdx = Layout->getElementContainingOffset(Offset); 1234 EltTy = ST->getContainedType(EltIdx); 1235 EltSize = TD->getTypeAllocSize(EltTy); 1236 Offset -= Layout->getElementOffset(EltIdx); 1237 } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { 1238 EltTy = AT->getElementType(); 1239 EltSize = TD->getTypeAllocSize(EltTy); 1240 if (Offset >= AT->getNumElements() * EltSize) 1241 return false; 1242 Offset %= EltSize; 1243 } else { 1244 return false; 1245 } 1246 if (Offset == 0 && (Size == 0 || EltSize == Size)) 1247 return true; 1248 // Check if the component spans multiple elements. 1249 if (Offset + Size > EltSize) 1250 return false; 1251 return TypeHasComponent(EltTy, Offset, Size); 1252} 1253 1254/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite 1255/// the instruction I, which references it, to use the separate elements. 1256/// Offset indicates the position within AI that is referenced by this 1257/// instruction. 1258void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 1259 SmallVector<AllocaInst*, 32> &NewElts) { 1260 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 1261 Instruction *User = cast<Instruction>(*UI); 1262 1263 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1264 RewriteBitCast(BC, AI, Offset, NewElts); 1265 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1266 RewriteGEP(GEPI, AI, Offset, NewElts); 1267 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 1268 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 1269 uint64_t MemSize = Length->getZExtValue(); 1270 if (Offset == 0 && 1271 MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) 1272 RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); 1273 // Otherwise the intrinsic can only touch a single element and the 1274 // address operand will be updated, so nothing else needs to be done. 1275 } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1276 const Type *LIType = LI->getType(); 1277 1278 if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { 1279 // Replace: 1280 // %res = load { i32, i32 }* %alloc 1281 // with: 1282 // %load.0 = load i32* %alloc.0 1283 // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 1284 // %load.1 = load i32* %alloc.1 1285 // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 1286 // (Also works for arrays instead of structs) 1287 Value *Insert = UndefValue::get(LIType); 1288 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1289 Value *Load = new LoadInst(NewElts[i], "load", LI); 1290 Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); 1291 } 1292 LI->replaceAllUsesWith(Insert); 1293 DeadInsts.push_back(LI); 1294 } else if (LIType->isIntegerTy() && 1295 TD->getTypeAllocSize(LIType) == 1296 TD->getTypeAllocSize(AI->getAllocatedType())) { 1297 // If this is a load of the entire alloca to an integer, rewrite it. 1298 RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); 1299 } 1300 } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1301 Value *Val = SI->getOperand(0); 1302 const Type *SIType = Val->getType(); 1303 if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { 1304 // Replace: 1305 // store { i32, i32 } %val, { i32, i32 }* %alloc 1306 // with: 1307 // %val.0 = extractvalue { i32, i32 } %val, 0 1308 // store i32 %val.0, i32* %alloc.0 1309 // %val.1 = extractvalue { i32, i32 } %val, 1 1310 // store i32 %val.1, i32* %alloc.1 1311 // (Also works for arrays instead of structs) 1312 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1313 Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); 1314 new StoreInst(Extract, NewElts[i], SI); 1315 } 1316 DeadInsts.push_back(SI); 1317 } else if (SIType->isIntegerTy() && 1318 TD->getTypeAllocSize(SIType) == 1319 TD->getTypeAllocSize(AI->getAllocatedType())) { 1320 // If this is a store of the entire alloca from an integer, rewrite it. 1321 RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); 1322 } 1323 } 1324 } 1325} 1326 1327/// RewriteBitCast - Update a bitcast reference to the alloca being replaced 1328/// and recursively continue updating all of its uses. 1329void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 1330 SmallVector<AllocaInst*, 32> &NewElts) { 1331 RewriteForScalarRepl(BC, AI, Offset, NewElts); 1332 if (BC->getOperand(0) != AI) 1333 return; 1334 1335 // The bitcast references the original alloca. Replace its uses with 1336 // references to the first new element alloca. 1337 Instruction *Val = NewElts[0]; 1338 if (Val->getType() != BC->getDestTy()) { 1339 Val = new BitCastInst(Val, BC->getDestTy(), "", BC); 1340 Val->takeName(BC); 1341 } 1342 BC->replaceAllUsesWith(Val); 1343 DeadInsts.push_back(BC); 1344} 1345 1346/// FindElementAndOffset - Return the index of the element containing Offset 1347/// within the specified type, which must be either a struct or an array. 1348/// Sets T to the type of the element and Offset to the offset within that 1349/// element. IdxTy is set to the type of the index result to be used in a 1350/// GEP instruction. 1351uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, 1352 const Type *&IdxTy) { 1353 uint64_t Idx = 0; 1354 if (const StructType *ST = dyn_cast<StructType>(T)) { 1355 const StructLayout *Layout = TD->getStructLayout(ST); 1356 Idx = Layout->getElementContainingOffset(Offset); 1357 T = ST->getContainedType(Idx); 1358 Offset -= Layout->getElementOffset(Idx); 1359 IdxTy = Type::getInt32Ty(T->getContext()); 1360 return Idx; 1361 } 1362 const ArrayType *AT = cast<ArrayType>(T); 1363 T = AT->getElementType(); 1364 uint64_t EltSize = TD->getTypeAllocSize(T); 1365 Idx = Offset / EltSize; 1366 Offset -= Idx * EltSize; 1367 IdxTy = Type::getInt64Ty(T->getContext()); 1368 return Idx; 1369} 1370 1371/// RewriteGEP - Check if this GEP instruction moves the pointer across 1372/// elements of the alloca that are being split apart, and if so, rewrite 1373/// the GEP to be relative to the new element. 1374void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 1375 SmallVector<AllocaInst*, 32> &NewElts) { 1376 uint64_t OldOffset = Offset; 1377 SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 1378 Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), 1379 &Indices[0], Indices.size()); 1380 1381 RewriteForScalarRepl(GEPI, AI, Offset, NewElts); 1382 1383 const Type *T = AI->getAllocatedType(); 1384 const Type *IdxTy; 1385 uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); 1386 if (GEPI->getOperand(0) == AI) 1387 OldIdx = ~0ULL; // Force the GEP to be rewritten. 1388 1389 T = AI->getAllocatedType(); 1390 uint64_t EltOffset = Offset; 1391 uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); 1392 1393 // If this GEP does not move the pointer across elements of the alloca 1394 // being split, then it does not needs to be rewritten. 1395 if (Idx == OldIdx) 1396 return; 1397 1398 const Type *i32Ty = Type::getInt32Ty(AI->getContext()); 1399 SmallVector<Value*, 8> NewArgs; 1400 NewArgs.push_back(Constant::getNullValue(i32Ty)); 1401 while (EltOffset != 0) { 1402 uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); 1403 NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); 1404 } 1405 Instruction *Val = NewElts[Idx]; 1406 if (NewArgs.size() > 1) { 1407 Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), 1408 NewArgs.end(), "", GEPI); 1409 Val->takeName(GEPI); 1410 } 1411 if (Val->getType() != GEPI->getType()) 1412 Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); 1413 GEPI->replaceAllUsesWith(Val); 1414 DeadInsts.push_back(GEPI); 1415} 1416 1417/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. 1418/// Rewrite it to copy or set the elements of the scalarized memory. 1419void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 1420 AllocaInst *AI, 1421 SmallVector<AllocaInst*, 32> &NewElts) { 1422 // If this is a memcpy/memmove, construct the other pointer as the 1423 // appropriate type. The "Other" pointer is the pointer that goes to memory 1424 // that doesn't have anything to do with the alloca that we are promoting. For 1425 // memset, this Value* stays null. 1426 Value *OtherPtr = 0; 1427 unsigned MemAlignment = MI->getAlignment(); 1428 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy 1429 if (Inst == MTI->getRawDest()) 1430 OtherPtr = MTI->getRawSource(); 1431 else { 1432 assert(Inst == MTI->getRawSource()); 1433 OtherPtr = MTI->getRawDest(); 1434 } 1435 } 1436 1437 // If there is an other pointer, we want to convert it to the same pointer 1438 // type as AI has, so we can GEP through it safely. 1439 if (OtherPtr) { 1440 unsigned AddrSpace = 1441 cast<PointerType>(OtherPtr->getType())->getAddressSpace(); 1442 1443 // Remove bitcasts and all-zero GEPs from OtherPtr. This is an 1444 // optimization, but it's also required to detect the corner case where 1445 // both pointer operands are referencing the same memory, and where 1446 // OtherPtr may be a bitcast or GEP that currently being rewritten. (This 1447 // function is only called for mem intrinsics that access the whole 1448 // aggregate, so non-zero GEPs are not an issue here.) 1449 OtherPtr = OtherPtr->stripPointerCasts(); 1450 1451 // Copying the alloca to itself is a no-op: just delete it. 1452 if (OtherPtr == AI || OtherPtr == NewElts[0]) { 1453 // This code will run twice for a no-op memcpy -- once for each operand. 1454 // Put only one reference to MI on the DeadInsts list. 1455 for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(), 1456 E = DeadInsts.end(); I != E; ++I) 1457 if (*I == MI) return; 1458 DeadInsts.push_back(MI); 1459 return; 1460 } 1461 1462 // If the pointer is not the right type, insert a bitcast to the right 1463 // type. 1464 const Type *NewTy = 1465 PointerType::get(AI->getType()->getElementType(), AddrSpace); 1466 1467 if (OtherPtr->getType() != NewTy) 1468 OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); 1469 } 1470 1471 // Process each element of the aggregate. 1472 bool SROADest = MI->getRawDest() == Inst; 1473 1474 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); 1475 1476 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1477 // If this is a memcpy/memmove, emit a GEP of the other element address. 1478 Value *OtherElt = 0; 1479 unsigned OtherEltAlign = MemAlignment; 1480 1481 if (OtherPtr) { 1482 Value *Idx[2] = { Zero, 1483 ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; 1484 OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, 1485 OtherPtr->getName()+"."+Twine(i), 1486 MI); 1487 uint64_t EltOffset; 1488 const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); 1489 const Type *OtherTy = OtherPtrTy->getElementType(); 1490 if (const StructType *ST = dyn_cast<StructType>(OtherTy)) { 1491 EltOffset = TD->getStructLayout(ST)->getElementOffset(i); 1492 } else { 1493 const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); 1494 EltOffset = TD->getTypeAllocSize(EltTy)*i; 1495 } 1496 1497 // The alignment of the other pointer is the guaranteed alignment of the 1498 // element, which is affected by both the known alignment of the whole 1499 // mem intrinsic and the alignment of the element. If the alignment of 1500 // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the 1501 // known alignment is just 4 bytes. 1502 OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); 1503 } 1504 1505 Value *EltPtr = NewElts[i]; 1506 const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); 1507 1508 // If we got down to a scalar, insert a load or store as appropriate. 1509 if (EltTy->isSingleValueType()) { 1510 if (isa<MemTransferInst>(MI)) { 1511 if (SROADest) { 1512 // From Other to Alloca. 1513 Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); 1514 new StoreInst(Elt, EltPtr, MI); 1515 } else { 1516 // From Alloca to Other. 1517 Value *Elt = new LoadInst(EltPtr, "tmp", MI); 1518 new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); 1519 } 1520 continue; 1521 } 1522 assert(isa<MemSetInst>(MI)); 1523 1524 // If the stored element is zero (common case), just store a null 1525 // constant. 1526 Constant *StoreVal; 1527 if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) { 1528 if (CI->isZero()) { 1529 StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 1530 } else { 1531 // If EltTy is a vector type, get the element type. 1532 const Type *ValTy = EltTy->getScalarType(); 1533 1534 // Construct an integer with the right value. 1535 unsigned EltSize = TD->getTypeSizeInBits(ValTy); 1536 APInt OneVal(EltSize, CI->getZExtValue()); 1537 APInt TotalVal(OneVal); 1538 // Set each byte. 1539 for (unsigned i = 0; 8*i < EltSize; ++i) { 1540 TotalVal = TotalVal.shl(8); 1541 TotalVal |= OneVal; 1542 } 1543 1544 // Convert the integer value to the appropriate type. 1545 StoreVal = ConstantInt::get(CI->getContext(), TotalVal); 1546 if (ValTy->isPointerTy()) 1547 StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 1548 else if (ValTy->isFloatingPointTy()) 1549 StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 1550 assert(StoreVal->getType() == ValTy && "Type mismatch!"); 1551 1552 // If the requested value was a vector constant, create it. 1553 if (EltTy != ValTy) { 1554 unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); 1555 SmallVector<Constant*, 16> Elts(NumElts, StoreVal); 1556 StoreVal = ConstantVector::get(&Elts[0], NumElts); 1557 } 1558 } 1559 new StoreInst(StoreVal, EltPtr, MI); 1560 continue; 1561 } 1562 // Otherwise, if we're storing a byte variable, use a memset call for 1563 // this element. 1564 } 1565 1566 unsigned EltSize = TD->getTypeAllocSize(EltTy); 1567 1568 IRBuilder<> Builder(MI); 1569 1570 // Finally, insert the meminst for this element. 1571 if (isa<MemSetInst>(MI)) { 1572 Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize, 1573 MI->isVolatile()); 1574 } else { 1575 assert(isa<MemTransferInst>(MI)); 1576 Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr 1577 Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr 1578 1579 if (isa<MemCpyInst>(MI)) 1580 Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile()); 1581 else 1582 Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile()); 1583 } 1584 } 1585 DeadInsts.push_back(MI); 1586} 1587 1588/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that 1589/// overwrites the entire allocation. Extract out the pieces of the stored 1590/// integer and store them individually. 1591void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 1592 SmallVector<AllocaInst*, 32> &NewElts){ 1593 // Extract each element out of the integer according to its structure offset 1594 // and store the element value to the individual alloca. 1595 Value *SrcVal = SI->getOperand(0); 1596 const Type *AllocaEltTy = AI->getAllocatedType(); 1597 uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 1598 1599 IRBuilder<> Builder(SI); 1600 1601 // Handle tail padding by extending the operand 1602 if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) 1603 SrcVal = Builder.CreateZExt(SrcVal, 1604 IntegerType::get(SI->getContext(), AllocaSizeBits)); 1605 1606 DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI 1607 << '\n'); 1608 1609 // There are two forms here: AI could be an array or struct. Both cases 1610 // have different ways to compute the element offset. 1611 if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 1612 const StructLayout *Layout = TD->getStructLayout(EltSTy); 1613 1614 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1615 // Get the number of bits to shift SrcVal to get the value. 1616 const Type *FieldTy = EltSTy->getElementType(i); 1617 uint64_t Shift = Layout->getElementOffsetInBits(i); 1618 1619 if (TD->isBigEndian()) 1620 Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); 1621 1622 Value *EltVal = SrcVal; 1623 if (Shift) { 1624 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 1625 EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); 1626 } 1627 1628 // Truncate down to an integer of the right size. 1629 uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 1630 1631 // Ignore zero sized fields like {}, they obviously contain no data. 1632 if (FieldSizeBits == 0) continue; 1633 1634 if (FieldSizeBits != AllocaSizeBits) 1635 EltVal = Builder.CreateTrunc(EltVal, 1636 IntegerType::get(SI->getContext(), FieldSizeBits)); 1637 Value *DestField = NewElts[i]; 1638 if (EltVal->getType() == FieldTy) { 1639 // Storing to an integer field of this size, just do it. 1640 } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { 1641 // Bitcast to the right element type (for fp/vector values). 1642 EltVal = Builder.CreateBitCast(EltVal, FieldTy); 1643 } else { 1644 // Otherwise, bitcast the dest pointer (for aggregates). 1645 DestField = Builder.CreateBitCast(DestField, 1646 PointerType::getUnqual(EltVal->getType())); 1647 } 1648 new StoreInst(EltVal, DestField, SI); 1649 } 1650 1651 } else { 1652 const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); 1653 const Type *ArrayEltTy = ATy->getElementType(); 1654 uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 1655 uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); 1656 1657 uint64_t Shift; 1658 1659 if (TD->isBigEndian()) 1660 Shift = AllocaSizeBits-ElementOffset; 1661 else 1662 Shift = 0; 1663 1664 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1665 // Ignore zero sized fields like {}, they obviously contain no data. 1666 if (ElementSizeBits == 0) continue; 1667 1668 Value *EltVal = SrcVal; 1669 if (Shift) { 1670 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 1671 EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); 1672 } 1673 1674 // Truncate down to an integer of the right size. 1675 if (ElementSizeBits != AllocaSizeBits) 1676 EltVal = Builder.CreateTrunc(EltVal, 1677 IntegerType::get(SI->getContext(), 1678 ElementSizeBits)); 1679 Value *DestField = NewElts[i]; 1680 if (EltVal->getType() == ArrayEltTy) { 1681 // Storing to an integer field of this size, just do it. 1682 } else if (ArrayEltTy->isFloatingPointTy() || 1683 ArrayEltTy->isVectorTy()) { 1684 // Bitcast to the right element type (for fp/vector values). 1685 EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy); 1686 } else { 1687 // Otherwise, bitcast the dest pointer (for aggregates). 1688 DestField = Builder.CreateBitCast(DestField, 1689 PointerType::getUnqual(EltVal->getType())); 1690 } 1691 new StoreInst(EltVal, DestField, SI); 1692 1693 if (TD->isBigEndian()) 1694 Shift -= ElementOffset; 1695 else 1696 Shift += ElementOffset; 1697 } 1698 } 1699 1700 DeadInsts.push_back(SI); 1701} 1702 1703/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to 1704/// an integer. Load the individual pieces to form the aggregate value. 1705void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 1706 SmallVector<AllocaInst*, 32> &NewElts) { 1707 // Extract each element out of the NewElts according to its structure offset 1708 // and form the result value. 1709 const Type *AllocaEltTy = AI->getAllocatedType(); 1710 uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 1711 1712 DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI 1713 << '\n'); 1714 1715 // There are two forms here: AI could be an array or struct. Both cases 1716 // have different ways to compute the element offset. 1717 const StructLayout *Layout = 0; 1718 uint64_t ArrayEltBitOffset = 0; 1719 if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 1720 Layout = TD->getStructLayout(EltSTy); 1721 } else { 1722 const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); 1723 ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 1724 } 1725 1726 Value *ResultVal = 1727 Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); 1728 1729 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1730 // Load the value from the alloca. If the NewElt is an aggregate, cast 1731 // the pointer to an integer of the same size before doing the load. 1732 Value *SrcField = NewElts[i]; 1733 const Type *FieldTy = 1734 cast<PointerType>(SrcField->getType())->getElementType(); 1735 uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 1736 1737 // Ignore zero sized fields like {}, they obviously contain no data. 1738 if (FieldSizeBits == 0) continue; 1739 1740 const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 1741 FieldSizeBits); 1742 if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && 1743 !FieldTy->isVectorTy()) 1744 SrcField = new BitCastInst(SrcField, 1745 PointerType::getUnqual(FieldIntTy), 1746 "", LI); 1747 SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); 1748 1749 // If SrcField is a fp or vector of the right size but that isn't an 1750 // integer type, bitcast to an integer so we can shift it. 1751 if (SrcField->getType() != FieldIntTy) 1752 SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); 1753 1754 // Zero extend the field to be the same size as the final alloca so that 1755 // we can shift and insert it. 1756 if (SrcField->getType() != ResultVal->getType()) 1757 SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); 1758 1759 // Determine the number of bits to shift SrcField. 1760 uint64_t Shift; 1761 if (Layout) // Struct case. 1762 Shift = Layout->getElementOffsetInBits(i); 1763 else // Array case. 1764 Shift = i*ArrayEltBitOffset; 1765 1766 if (TD->isBigEndian()) 1767 Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); 1768 1769 if (Shift) { 1770 Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); 1771 SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); 1772 } 1773 1774 // Don't create an 'or x, 0' on the first iteration. 1775 if (!isa<Constant>(ResultVal) || 1776 !cast<Constant>(ResultVal)->isNullValue()) 1777 ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); 1778 else 1779 ResultVal = SrcField; 1780 } 1781 1782 // Handle tail padding by truncating the result 1783 if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) 1784 ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); 1785 1786 LI->replaceAllUsesWith(ResultVal); 1787 DeadInsts.push_back(LI); 1788} 1789 1790/// HasPadding - Return true if the specified type has any structure or 1791/// alignment padding in between the elements that would be split apart 1792/// by SROA; return false otherwise. 1793static bool HasPadding(const Type *Ty, const TargetData &TD) { 1794 if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 1795 Ty = ATy->getElementType(); 1796 return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); 1797 } 1798 1799 // SROA currently handles only Arrays and Structs. 1800 const StructType *STy = cast<StructType>(Ty); 1801 const StructLayout *SL = TD.getStructLayout(STy); 1802 unsigned PrevFieldBitOffset = 0; 1803 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 1804 unsigned FieldBitOffset = SL->getElementOffsetInBits(i); 1805 1806 // Check to see if there is any padding between this element and the 1807 // previous one. 1808 if (i) { 1809 unsigned PrevFieldEnd = 1810 PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); 1811 if (PrevFieldEnd < FieldBitOffset) 1812 return true; 1813 } 1814 PrevFieldBitOffset = FieldBitOffset; 1815 } 1816 // Check for tail padding. 1817 if (unsigned EltCount = STy->getNumElements()) { 1818 unsigned PrevFieldEnd = PrevFieldBitOffset + 1819 TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); 1820 if (PrevFieldEnd < SL->getSizeInBits()) 1821 return true; 1822 } 1823 return false; 1824} 1825 1826/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 1827/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 1828/// or 1 if safe after canonicalization has been performed. 1829bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { 1830 // Loop over the use list of the alloca. We can only transform it if all of 1831 // the users are safe to transform. 1832 AllocaInfo Info(AI); 1833 1834 isSafeForScalarRepl(AI, 0, Info); 1835 if (Info.isUnsafe) { 1836 DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); 1837 return false; 1838 } 1839 1840 // Okay, we know all the users are promotable. If the aggregate is a memcpy 1841 // source and destination, we have to be careful. In particular, the memcpy 1842 // could be moving around elements that live in structure padding of the LLVM 1843 // types, but may actually be used. In these cases, we refuse to promote the 1844 // struct. 1845 if (Info.isMemCpySrc && Info.isMemCpyDst && 1846 HasPadding(AI->getAllocatedType(), *TD)) 1847 return false; 1848 1849 // If the alloca never has an access to just *part* of it, but is accessed 1850 // via loads and stores, then we should use ConvertToScalarInfo to promote 1851 // the alloca instead of promoting each piece at a time and inserting fission 1852 // and fusion code. 1853 if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { 1854 // If the struct/array just has one element, use basic SRoA. 1855 if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 1856 if (ST->getNumElements() > 1) return false; 1857 } else { 1858 if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1) 1859 return false; 1860 } 1861 } 1862 return true; 1863} 1864 1865 1866 1867/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to 1868/// some part of a constant global variable. This intentionally only accepts 1869/// constant expressions because we don't can't rewrite arbitrary instructions. 1870static bool PointsToConstantGlobal(Value *V) { 1871 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 1872 return GV->isConstant(); 1873 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 1874 if (CE->getOpcode() == Instruction::BitCast || 1875 CE->getOpcode() == Instruction::GetElementPtr) 1876 return PointsToConstantGlobal(CE->getOperand(0)); 1877 return false; 1878} 1879 1880/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 1881/// pointer to an alloca. Ignore any reads of the pointer, return false if we 1882/// see any stores or other unknown uses. If we see pointer arithmetic, keep 1883/// track of whether it moves the pointer (with isOffset) but otherwise traverse 1884/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 1885/// the alloca, and if the source pointer is a pointer to a constant global, we 1886/// can optimize this. 1887static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 1888 bool isOffset) { 1889 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1890 User *U = cast<Instruction>(*UI); 1891 1892 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 1893 // Ignore non-volatile loads, they are always ok. 1894 if (LI->isVolatile()) return false; 1895 continue; 1896 } 1897 1898 if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 1899 // If uses of the bitcast are ok, we are ok. 1900 if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) 1901 return false; 1902 continue; 1903 } 1904 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 1905 // If the GEP has all zero indices, it doesn't offset the pointer. If it 1906 // doesn't, it does. 1907 if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, 1908 isOffset || !GEP->hasAllZeroIndices())) 1909 return false; 1910 continue; 1911 } 1912 1913 if (CallSite CS = U) { 1914 // If this is a readonly/readnone call site, then we know it is just a 1915 // load and we can ignore it. 1916 if (CS.onlyReadsMemory()) 1917 continue; 1918 1919 // If this is the function being called then we treat it like a load and 1920 // ignore it. 1921 if (CS.isCallee(UI)) 1922 continue; 1923 1924 // If this is being passed as a byval argument, the caller is making a 1925 // copy, so it is only a read of the alloca. 1926 unsigned ArgNo = CS.getArgumentNo(UI); 1927 if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal)) 1928 continue; 1929 } 1930 1931 // If this is isn't our memcpy/memmove, reject it as something we can't 1932 // handle. 1933 MemTransferInst *MI = dyn_cast<MemTransferInst>(U); 1934 if (MI == 0) 1935 return false; 1936 1937 // If the transfer is using the alloca as a source of the transfer, then 1938 // ignore it since it is a load (unless the transfer is volatile). 1939 if (UI.getOperandNo() == 1) { 1940 if (MI->isVolatile()) return false; 1941 continue; 1942 } 1943 1944 // If we already have seen a copy, reject the second one. 1945 if (TheCopy) return false; 1946 1947 // If the pointer has been offset from the start of the alloca, we can't 1948 // safely handle this. 1949 if (isOffset) return false; 1950 1951 // If the memintrinsic isn't using the alloca as the dest, reject it. 1952 if (UI.getOperandNo() != 0) return false; 1953 1954 // If the source of the memcpy/move is not a constant global, reject it. 1955 if (!PointsToConstantGlobal(MI->getSource())) 1956 return false; 1957 1958 // Otherwise, the transform is safe. Remember the copy instruction. 1959 TheCopy = MI; 1960 } 1961 return true; 1962} 1963 1964/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 1965/// modified by a copy from a constant global. If we can prove this, we can 1966/// replace any uses of the alloca with uses of the global directly. 1967MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { 1968 MemTransferInst *TheCopy = 0; 1969 if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) 1970 return TheCopy; 1971 return 0; 1972} 1973