ScalarReplAggregates.cpp revision 5a1cb644c903da49dc612a0ba5044505d066259e
1//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This transformation implements the well known scalar replacement of 11// aggregates transformation. This xform breaks up alloca instructions of 12// aggregate type (structure or array) into individual alloca instructions for 13// each member (if possible). Then, if possible, it transforms the individual 14// alloca instructions into nice clean scalar SSA form. 15// 16// This combines a simple SRoA algorithm with the Mem2Reg algorithm because 17// often interact, especially for C++ programs. As such, iterating between 18// SRoA, then Mem2Reg until we run out of things to promote works well. 19// 20//===----------------------------------------------------------------------===// 21 22#define DEBUG_TYPE "scalarrepl" 23#include "llvm/Transforms/Scalar.h" 24#include "llvm/Constants.h" 25#include "llvm/DerivedTypes.h" 26#include "llvm/Function.h" 27#include "llvm/GlobalVariable.h" 28#include "llvm/Instructions.h" 29#include "llvm/IntrinsicInst.h" 30#include "llvm/LLVMContext.h" 31#include "llvm/Module.h" 32#include "llvm/Pass.h" 33#include "llvm/Analysis/DebugInfo.h" 34#include "llvm/Analysis/DIBuilder.h" 35#include "llvm/Analysis/Dominators.h" 36#include "llvm/Analysis/Loads.h" 37#include "llvm/Analysis/ValueTracking.h" 38#include "llvm/Target/TargetData.h" 39#include "llvm/Transforms/Utils/PromoteMemToReg.h" 40#include "llvm/Transforms/Utils/Local.h" 41#include "llvm/Transforms/Utils/SSAUpdater.h" 42#include "llvm/Support/CallSite.h" 43#include "llvm/Support/Debug.h" 44#include "llvm/Support/ErrorHandling.h" 45#include "llvm/Support/GetElementPtrTypeIterator.h" 46#include "llvm/Support/IRBuilder.h" 47#include "llvm/Support/MathExtras.h" 48#include "llvm/Support/raw_ostream.h" 49#include "llvm/ADT/SetVector.h" 50#include "llvm/ADT/SmallVector.h" 51#include "llvm/ADT/Statistic.h" 52using namespace llvm; 53 54STATISTIC(NumReplaced, "Number of allocas broken up"); 55STATISTIC(NumPromoted, "Number of allocas promoted"); 56STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); 57STATISTIC(NumConverted, "Number of aggregates converted to scalar"); 58STATISTIC(NumGlobals, "Number of allocas copied from constant global"); 59 60namespace { 61 struct SROA : public FunctionPass { 62 SROA(int T, bool hasDT, char &ID) 63 : FunctionPass(ID), HasDomTree(hasDT) { 64 if (T == -1) 65 SRThreshold = 128; 66 else 67 SRThreshold = T; 68 } 69 70 bool runOnFunction(Function &F); 71 72 bool performScalarRepl(Function &F); 73 bool performPromotion(Function &F); 74 75 private: 76 bool HasDomTree; 77 TargetData *TD; 78 79 /// DeadInsts - Keep track of instructions we have made dead, so that 80 /// we can remove them after we are done working. 81 SmallVector<Value*, 32> DeadInsts; 82 83 /// AllocaInfo - When analyzing uses of an alloca instruction, this captures 84 /// information about the uses. All these fields are initialized to false 85 /// and set to true when something is learned. 86 struct AllocaInfo { 87 /// The alloca to promote. 88 AllocaInst *AI; 89 90 /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite 91 /// looping and avoid redundant work. 92 SmallPtrSet<PHINode*, 8> CheckedPHIs; 93 94 /// isUnsafe - This is set to true if the alloca cannot be SROA'd. 95 bool isUnsafe : 1; 96 97 /// isMemCpySrc - This is true if this aggregate is memcpy'd from. 98 bool isMemCpySrc : 1; 99 100 /// isMemCpyDst - This is true if this aggregate is memcpy'd into. 101 bool isMemCpyDst : 1; 102 103 /// hasSubelementAccess - This is true if a subelement of the alloca is 104 /// ever accessed, or false if the alloca is only accessed with mem 105 /// intrinsics or load/store that only access the entire alloca at once. 106 bool hasSubelementAccess : 1; 107 108 /// hasALoadOrStore - This is true if there are any loads or stores to it. 109 /// The alloca may just be accessed with memcpy, for example, which would 110 /// not set this. 111 bool hasALoadOrStore : 1; 112 113 explicit AllocaInfo(AllocaInst *ai) 114 : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), 115 hasSubelementAccess(false), hasALoadOrStore(false) {} 116 }; 117 118 unsigned SRThreshold; 119 120 void MarkUnsafe(AllocaInfo &I, Instruction *User) { 121 I.isUnsafe = true; 122 DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n'); 123 } 124 125 bool isSafeAllocaToScalarRepl(AllocaInst *AI); 126 127 void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info); 128 void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset, 129 AllocaInfo &Info); 130 void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); 131 void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, 132 Type *MemOpType, bool isStore, AllocaInfo &Info, 133 Instruction *TheAccess, bool AllowWholeAccess); 134 bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size); 135 uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, 136 Type *&IdxTy); 137 138 void DoScalarReplacement(AllocaInst *AI, 139 std::vector<AllocaInst*> &WorkList); 140 void DeleteDeadInstructions(); 141 142 void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 143 SmallVector<AllocaInst*, 32> &NewElts); 144 void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 145 SmallVector<AllocaInst*, 32> &NewElts); 146 void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 147 SmallVector<AllocaInst*, 32> &NewElts); 148 void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, 149 uint64_t Offset, 150 SmallVector<AllocaInst*, 32> &NewElts); 151 void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 152 AllocaInst *AI, 153 SmallVector<AllocaInst*, 32> &NewElts); 154 void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 155 SmallVector<AllocaInst*, 32> &NewElts); 156 void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 157 SmallVector<AllocaInst*, 32> &NewElts); 158 159 static MemTransferInst *isOnlyCopiedFromConstantGlobal( 160 AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); 161 }; 162 163 // SROA_DT - SROA that uses DominatorTree. 164 struct SROA_DT : public SROA { 165 static char ID; 166 public: 167 SROA_DT(int T = -1) : SROA(T, true, ID) { 168 initializeSROA_DTPass(*PassRegistry::getPassRegistry()); 169 } 170 171 // getAnalysisUsage - This pass does not require any passes, but we know it 172 // will not alter the CFG, so say so. 173 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 174 AU.addRequired<DominatorTree>(); 175 AU.setPreservesCFG(); 176 } 177 }; 178 179 // SROA_SSAUp - SROA that uses SSAUpdater. 180 struct SROA_SSAUp : public SROA { 181 static char ID; 182 public: 183 SROA_SSAUp(int T = -1) : SROA(T, false, ID) { 184 initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); 185 } 186 187 // getAnalysisUsage - This pass does not require any passes, but we know it 188 // will not alter the CFG, so say so. 189 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 190 AU.setPreservesCFG(); 191 } 192 }; 193 194} 195 196char SROA_DT::ID = 0; 197char SROA_SSAUp::ID = 0; 198 199INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl", 200 "Scalar Replacement of Aggregates (DT)", false, false) 201INITIALIZE_PASS_DEPENDENCY(DominatorTree) 202INITIALIZE_PASS_END(SROA_DT, "scalarrepl", 203 "Scalar Replacement of Aggregates (DT)", false, false) 204 205INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa", 206 "Scalar Replacement of Aggregates (SSAUp)", false, false) 207INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa", 208 "Scalar Replacement of Aggregates (SSAUp)", false, false) 209 210// Public interface to the ScalarReplAggregates pass 211FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold, 212 bool UseDomTree) { 213 if (UseDomTree) 214 return new SROA_DT(Threshold); 215 return new SROA_SSAUp(Threshold); 216} 217 218 219//===----------------------------------------------------------------------===// 220// Convert To Scalar Optimization. 221//===----------------------------------------------------------------------===// 222 223namespace { 224/// ConvertToScalarInfo - This class implements the "Convert To Scalar" 225/// optimization, which scans the uses of an alloca and determines if it can 226/// rewrite it in terms of a single new alloca that can be mem2reg'd. 227class ConvertToScalarInfo { 228 /// AllocaSize - The size of the alloca being considered in bytes. 229 unsigned AllocaSize; 230 const TargetData &TD; 231 232 /// IsNotTrivial - This is set to true if there is some access to the object 233 /// which means that mem2reg can't promote it. 234 bool IsNotTrivial; 235 236 /// ScalarKind - Tracks the kind of alloca being considered for promotion, 237 /// computed based on the uses of the alloca rather than the LLVM type system. 238 enum { 239 Unknown, 240 241 // Accesses via GEPs that are consistent with element access of a vector 242 // type. This will not be converted into a vector unless there is a later 243 // access using an actual vector type. 244 ImplicitVector, 245 246 // Accesses via vector operations and GEPs that are consistent with the 247 // layout of a vector type. 248 Vector, 249 250 // An integer bag-of-bits with bitwise operations for insertion and 251 // extraction. Any combination of types can be converted into this kind 252 // of scalar. 253 Integer 254 } ScalarKind; 255 256 /// VectorTy - This tracks the type that we should promote the vector to if 257 /// it is possible to turn it into a vector. This starts out null, and if it 258 /// isn't possible to turn into a vector type, it gets set to VoidTy. 259 VectorType *VectorTy; 260 261 /// HadNonMemTransferAccess - True if there is at least one access to the 262 /// alloca that is not a MemTransferInst. We don't want to turn structs into 263 /// large integers unless there is some potential for optimization. 264 bool HadNonMemTransferAccess; 265 266public: 267 explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) 268 : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown), 269 VectorTy(0), HadNonMemTransferAccess(false) { } 270 271 AllocaInst *TryConvert(AllocaInst *AI); 272 273private: 274 bool CanConvertToScalar(Value *V, uint64_t Offset); 275 void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); 276 bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); 277 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); 278 279 Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, 280 uint64_t Offset, IRBuilder<> &Builder); 281 Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, 282 uint64_t Offset, IRBuilder<> &Builder); 283}; 284} // end anonymous namespace. 285 286 287/// TryConvert - Analyze the specified alloca, and if it is safe to do so, 288/// rewrite it to be a new alloca which is mem2reg'able. This returns the new 289/// alloca if possible or null if not. 290AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { 291 // If we can't convert this scalar, or if mem2reg can trivially do it, bail 292 // out. 293 if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) 294 return 0; 295 296 // If an alloca has only memset / memcpy uses, it may still have an Unknown 297 // ScalarKind. Treat it as an Integer below. 298 if (ScalarKind == Unknown) 299 ScalarKind = Integer; 300 301 // FIXME: It should be possible to promote the vector type up to the alloca's 302 // size. 303 if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) 304 ScalarKind = Integer; 305 306 // If we were able to find a vector type that can handle this with 307 // insert/extract elements, and if there was at least one use that had 308 // a vector type, promote this to a vector. We don't want to promote 309 // random stuff that doesn't use vectors (e.g. <9 x double>) because then 310 // we just get a lot of insert/extracts. If at least one vector is 311 // involved, then we probably really do have a union of vector/array. 312 Type *NewTy; 313 if (ScalarKind == Vector) { 314 assert(VectorTy && "Missing type for vector scalar."); 315 DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " 316 << *VectorTy << '\n'); 317 NewTy = VectorTy; // Use the vector type. 318 } else { 319 unsigned BitWidth = AllocaSize * 8; 320 if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && 321 !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth)) 322 return 0; 323 324 DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); 325 // Create and insert the integer alloca. 326 NewTy = IntegerType::get(AI->getContext(), BitWidth); 327 } 328 AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); 329 ConvertUsesToScalar(AI, NewAI, 0); 330 return NewAI; 331} 332 333/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type 334/// (VectorTy) so far at the offset specified by Offset (which is specified in 335/// bytes). 336/// 337/// There are three cases we handle here: 338/// 1) A union of vector types of the same size and potentially its elements. 339/// Here we turn element accesses into insert/extract element operations. 340/// This promotes a <4 x float> with a store of float to the third element 341/// into a <4 x float> that uses insert element. 342/// 2) A union of vector types with power-of-2 size differences, e.g. a float, 343/// <2 x float> and <4 x float>. Here we turn element accesses into insert 344/// and extract element operations, and <2 x float> accesses into a cast to 345/// <2 x double>, an extract, and a cast back to <2 x float>. 346/// 3) A fully general blob of memory, which we turn into some (potentially 347/// large) integer type with extract and insert operations where the loads 348/// and stores would mutate the memory. We mark this by setting VectorTy 349/// to VoidTy. 350void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, 351 uint64_t Offset) { 352 // If we already decided to turn this into a blob of integer memory, there is 353 // nothing to be done. 354 if (ScalarKind == Integer) 355 return; 356 357 // If this could be contributing to a vector, analyze it. 358 359 // If the In type is a vector that is the same size as the alloca, see if it 360 // matches the existing VecTy. 361 if (VectorType *VInTy = dyn_cast<VectorType>(In)) { 362 if (MergeInVectorType(VInTy, Offset)) 363 return; 364 } else if (In->isFloatTy() || In->isDoubleTy() || 365 (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && 366 isPowerOf2_32(In->getPrimitiveSizeInBits()))) { 367 // Full width accesses can be ignored, because they can always be turned 368 // into bitcasts. 369 unsigned EltSize = In->getPrimitiveSizeInBits()/8; 370 if (EltSize == AllocaSize) 371 return; 372 373 // If we're accessing something that could be an element of a vector, see 374 // if the implied vector agrees with what we already have and if Offset is 375 // compatible with it. 376 if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && 377 (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) { 378 if (!VectorTy) { 379 ScalarKind = ImplicitVector; 380 VectorTy = VectorType::get(In, AllocaSize/EltSize); 381 return; 382 } 383 384 unsigned CurrentEltSize = VectorTy->getElementType() 385 ->getPrimitiveSizeInBits()/8; 386 if (EltSize == CurrentEltSize) 387 return; 388 389 if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) 390 return; 391 } 392 } 393 394 // Otherwise, we have a case that we can't handle with an optimized vector 395 // form. We can still turn this into a large integer. 396 ScalarKind = Integer; 397} 398 399/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, 400/// returning true if the type was successfully merged and false otherwise. 401bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, 402 uint64_t Offset) { 403 // TODO: Support nonzero offsets? 404 if (Offset != 0) 405 return false; 406 407 // Only allow vectors that are a power-of-2 away from the size of the alloca. 408 if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8))) 409 return false; 410 411 // If this the first vector we see, remember the type so that we know the 412 // element size. 413 if (!VectorTy) { 414 ScalarKind = Vector; 415 VectorTy = VInTy; 416 return true; 417 } 418 419 unsigned BitWidth = VectorTy->getBitWidth(); 420 unsigned InBitWidth = VInTy->getBitWidth(); 421 422 // Vectors of the same size can be converted using a simple bitcast. 423 if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) { 424 ScalarKind = Vector; 425 return true; 426 } 427 428 Type *ElementTy = VectorTy->getElementType(); 429 Type *InElementTy = VInTy->getElementType(); 430 431 // If they're the same alloc size, we'll be attempting to convert between 432 // them with a vector shuffle, which requires the element types to match. 433 if (TD.getTypeAllocSize(VectorTy) == TD.getTypeAllocSize(VInTy) && 434 ElementTy != InElementTy) 435 return false; 436 437 // Do not allow mixed integer and floating-point accesses from vectors of 438 // different sizes. 439 if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy()) 440 return false; 441 442 if (ElementTy->isFloatingPointTy()) { 443 // Only allow floating-point vectors of different sizes if they have the 444 // same element type. 445 // TODO: This could be loosened a bit, but would anything benefit? 446 if (ElementTy != InElementTy) 447 return false; 448 449 // There are no arbitrary-precision floating-point types, which limits the 450 // number of legal vector types with larger element types that we can form 451 // to bitcast and extract a subvector. 452 // TODO: We could support some more cases with mixed fp128 and double here. 453 if (!(BitWidth == 64 || BitWidth == 128) || 454 !(InBitWidth == 64 || InBitWidth == 128)) 455 return false; 456 } else { 457 assert(ElementTy->isIntegerTy() && "Vector elements must be either integer " 458 "or floating-point."); 459 unsigned BitWidth = ElementTy->getPrimitiveSizeInBits(); 460 unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits(); 461 462 // Do not allow integer types smaller than a byte or types whose widths are 463 // not a multiple of a byte. 464 if (BitWidth < 8 || InBitWidth < 8 || 465 BitWidth % 8 != 0 || InBitWidth % 8 != 0) 466 return false; 467 } 468 469 // Pick the largest of the two vector types. 470 ScalarKind = Vector; 471 if (InBitWidth > BitWidth) 472 VectorTy = VInTy; 473 474 return true; 475} 476 477/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all 478/// its accesses to a single vector type, return true and set VecTy to 479/// the new type. If we could convert the alloca into a single promotable 480/// integer, return true but set VecTy to VoidTy. Further, if the use is not a 481/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset 482/// is the current offset from the base of the alloca being analyzed. 483/// 484/// If we see at least one access to the value that is as a vector type, set the 485/// SawVec flag. 486bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { 487 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 488 Instruction *User = cast<Instruction>(*UI); 489 490 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 491 // Don't break volatile loads. 492 if (LI->isVolatile()) 493 return false; 494 // Don't touch MMX operations. 495 if (LI->getType()->isX86_MMXTy()) 496 return false; 497 HadNonMemTransferAccess = true; 498 MergeInTypeForLoadOrStore(LI->getType(), Offset); 499 continue; 500 } 501 502 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 503 // Storing the pointer, not into the value? 504 if (SI->getOperand(0) == V || SI->isVolatile()) return false; 505 // Don't touch MMX operations. 506 if (SI->getOperand(0)->getType()->isX86_MMXTy()) 507 return false; 508 HadNonMemTransferAccess = true; 509 MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset); 510 continue; 511 } 512 513 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 514 if (!onlyUsedByLifetimeMarkers(BCI)) 515 IsNotTrivial = true; // Can't be mem2reg'd. 516 if (!CanConvertToScalar(BCI, Offset)) 517 return false; 518 continue; 519 } 520 521 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 522 // If this is a GEP with a variable indices, we can't handle it. 523 if (!GEP->hasAllConstantIndices()) 524 return false; 525 526 // Compute the offset that this GEP adds to the pointer. 527 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 528 uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), 529 Indices); 530 // See if all uses can be converted. 531 if (!CanConvertToScalar(GEP, Offset+GEPOffset)) 532 return false; 533 IsNotTrivial = true; // Can't be mem2reg'd. 534 HadNonMemTransferAccess = true; 535 continue; 536 } 537 538 // If this is a constant sized memset of a constant value (e.g. 0) we can 539 // handle it. 540 if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 541 // Store of constant value. 542 if (!isa<ConstantInt>(MSI->getValue())) 543 return false; 544 545 // Store of constant size. 546 ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength()); 547 if (!Len) 548 return false; 549 550 // If the size differs from the alloca, we can only convert the alloca to 551 // an integer bag-of-bits. 552 // FIXME: This should handle all of the cases that are currently accepted 553 // as vector element insertions. 554 if (Len->getZExtValue() != AllocaSize || Offset != 0) 555 ScalarKind = Integer; 556 557 IsNotTrivial = true; // Can't be mem2reg'd. 558 HadNonMemTransferAccess = true; 559 continue; 560 } 561 562 // If this is a memcpy or memmove into or out of the whole allocation, we 563 // can handle it like a load or store of the scalar type. 564 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 565 ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); 566 if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) 567 return false; 568 569 IsNotTrivial = true; // Can't be mem2reg'd. 570 continue; 571 } 572 573 // If this is a lifetime intrinsic, we can handle it. 574 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 575 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 576 II->getIntrinsicID() == Intrinsic::lifetime_end) { 577 continue; 578 } 579 } 580 581 // Otherwise, we cannot handle this! 582 return false; 583 } 584 585 return true; 586} 587 588/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca 589/// directly. This happens when we are converting an "integer union" to a 590/// single integer scalar, or when we are converting a "vector union" to a 591/// vector with insert/extractelement instructions. 592/// 593/// Offset is an offset from the original alloca, in bits that need to be 594/// shifted to the right. By the end of this, there should be no uses of Ptr. 595void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, 596 uint64_t Offset) { 597 while (!Ptr->use_empty()) { 598 Instruction *User = cast<Instruction>(Ptr->use_back()); 599 600 if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 601 ConvertUsesToScalar(CI, NewAI, Offset); 602 CI->eraseFromParent(); 603 continue; 604 } 605 606 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 607 // Compute the offset that this GEP adds to the pointer. 608 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 609 uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), 610 Indices); 611 ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); 612 GEP->eraseFromParent(); 613 continue; 614 } 615 616 IRBuilder<> Builder(User); 617 618 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 619 // The load is a bit extract from NewAI shifted right by Offset bits. 620 Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); 621 Value *NewLoadVal 622 = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); 623 LI->replaceAllUsesWith(NewLoadVal); 624 LI->eraseFromParent(); 625 continue; 626 } 627 628 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 629 assert(SI->getOperand(0) != Ptr && "Consistency error!"); 630 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 631 Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, 632 Builder); 633 Builder.CreateStore(New, NewAI); 634 SI->eraseFromParent(); 635 636 // If the load we just inserted is now dead, then the inserted store 637 // overwrote the entire thing. 638 if (Old->use_empty()) 639 Old->eraseFromParent(); 640 continue; 641 } 642 643 // If this is a constant sized memset of a constant value (e.g. 0) we can 644 // transform it into a store of the expanded constant value. 645 if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 646 assert(MSI->getRawDest() == Ptr && "Consistency error!"); 647 unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 648 if (NumBytes != 0) { 649 unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); 650 651 // Compute the value replicated the right number of times. 652 APInt APVal(NumBytes*8, Val); 653 654 // Splat the value if non-zero. 655 if (Val) 656 for (unsigned i = 1; i != NumBytes; ++i) 657 APVal |= APVal << 8; 658 659 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 660 Value *New = ConvertScalar_InsertValue( 661 ConstantInt::get(User->getContext(), APVal), 662 Old, Offset, Builder); 663 Builder.CreateStore(New, NewAI); 664 665 // If the load we just inserted is now dead, then the memset overwrote 666 // the entire thing. 667 if (Old->use_empty()) 668 Old->eraseFromParent(); 669 } 670 MSI->eraseFromParent(); 671 continue; 672 } 673 674 // If this is a memcpy or memmove into or out of the whole allocation, we 675 // can handle it like a load or store of the scalar type. 676 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 677 assert(Offset == 0 && "must be store to start of alloca"); 678 679 // If the source and destination are both to the same alloca, then this is 680 // a noop copy-to-self, just delete it. Otherwise, emit a load and store 681 // as appropriate. 682 AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0)); 683 684 if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) { 685 // Dest must be OrigAI, change this to be a load from the original 686 // pointer (bitcasted), then a store to our new alloca. 687 assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); 688 Value *SrcPtr = MTI->getSource(); 689 PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); 690 PointerType* AIPTy = cast<PointerType>(NewAI->getType()); 691 if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { 692 AIPTy = PointerType::get(AIPTy->getElementType(), 693 SPTy->getAddressSpace()); 694 } 695 SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy); 696 697 LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); 698 SrcVal->setAlignment(MTI->getAlignment()); 699 Builder.CreateStore(SrcVal, NewAI); 700 } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) { 701 // Src must be OrigAI, change this to be a load from NewAI then a store 702 // through the original dest pointer (bitcasted). 703 assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); 704 LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); 705 706 PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); 707 PointerType* AIPTy = cast<PointerType>(NewAI->getType()); 708 if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { 709 AIPTy = PointerType::get(AIPTy->getElementType(), 710 DPTy->getAddressSpace()); 711 } 712 Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy); 713 714 StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); 715 NewStore->setAlignment(MTI->getAlignment()); 716 } else { 717 // Noop transfer. Src == Dst 718 } 719 720 MTI->eraseFromParent(); 721 continue; 722 } 723 724 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 725 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 726 II->getIntrinsicID() == Intrinsic::lifetime_end) { 727 // There's no need to preserve these, as the resulting alloca will be 728 // converted to a register anyways. 729 II->eraseFromParent(); 730 continue; 731 } 732 } 733 734 llvm_unreachable("Unsupported operation!"); 735 } 736} 737 738/// getScaledElementType - Gets a scaled element type for a partial vector 739/// access of an alloca. The input types must be integer or floating-point 740/// scalar or vector types, and the resulting type is an integer, float or 741/// double. 742static Type *getScaledElementType(Type *Ty1, Type *Ty2, 743 unsigned NewBitWidth) { 744 bool IsFP1 = Ty1->isFloatingPointTy() || 745 (Ty1->isVectorTy() && 746 cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy()); 747 bool IsFP2 = Ty2->isFloatingPointTy() || 748 (Ty2->isVectorTy() && 749 cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy()); 750 751 LLVMContext &Context = Ty1->getContext(); 752 753 // Prefer floating-point types over integer types, as integer types may have 754 // been created by earlier scalar replacement. 755 if (IsFP1 || IsFP2) { 756 if (NewBitWidth == 32) 757 return Type::getFloatTy(Context); 758 if (NewBitWidth == 64) 759 return Type::getDoubleTy(Context); 760 } 761 762 return Type::getIntNTy(Context, NewBitWidth); 763} 764 765/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector 766/// to another vector of the same element type which has the same allocation 767/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>). 768static Value *CreateShuffleVectorCast(Value *FromVal, Type *ToType, 769 IRBuilder<> &Builder) { 770 Type *FromType = FromVal->getType(); 771 VectorType *FromVTy = cast<VectorType>(FromType); 772 VectorType *ToVTy = cast<VectorType>(ToType); 773 assert((ToVTy->getElementType() == FromVTy->getElementType()) && 774 "Vectors must have the same element type"); 775 Value *UnV = UndefValue::get(FromType); 776 unsigned numEltsFrom = FromVTy->getNumElements(); 777 unsigned numEltsTo = ToVTy->getNumElements(); 778 779 SmallVector<Constant*, 3> Args; 780 Type* Int32Ty = Builder.getInt32Ty(); 781 unsigned minNumElts = std::min(numEltsFrom, numEltsTo); 782 unsigned i; 783 for (i=0; i != minNumElts; ++i) 784 Args.push_back(ConstantInt::get(Int32Ty, i)); 785 786 if (i < numEltsTo) { 787 Constant* UnC = UndefValue::get(Int32Ty); 788 for (; i != numEltsTo; ++i) 789 Args.push_back(UnC); 790 } 791 Constant *Mask = ConstantVector::get(Args); 792 return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); 793} 794 795/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer 796/// or vector value FromVal, extracting the bits from the offset specified by 797/// Offset. This returns the value, which is of type ToType. 798/// 799/// This happens when we are converting an "integer union" to a single 800/// integer scalar, or when we are converting a "vector union" to a vector with 801/// insert/extractelement instructions. 802/// 803/// Offset is an offset from the original alloca, in bits that need to be 804/// shifted to the right. 805Value *ConvertToScalarInfo:: 806ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, 807 uint64_t Offset, IRBuilder<> &Builder) { 808 // If the load is of the whole new alloca, no conversion is needed. 809 Type *FromType = FromVal->getType(); 810 if (FromType == ToType && Offset == 0) 811 return FromVal; 812 813 // If the result alloca is a vector type, this is either an element 814 // access or a bitcast to another vector type of the same size. 815 if (VectorType *VTy = dyn_cast<VectorType>(FromType)) { 816 unsigned FromTypeSize = TD.getTypeAllocSize(FromType); 817 unsigned ToTypeSize = TD.getTypeAllocSize(ToType); 818 if (FromTypeSize == ToTypeSize) { 819 // If the two types have the same primitive size, use a bit cast. 820 // Otherwise, it is two vectors with the same element type that has 821 // the same allocation size but different number of elements so use 822 // a shuffle vector. 823 if (FromType->getPrimitiveSizeInBits() == 824 ToType->getPrimitiveSizeInBits()) 825 return Builder.CreateBitCast(FromVal, ToType, "tmp"); 826 else 827 return CreateShuffleVectorCast(FromVal, ToType, Builder); 828 } 829 830 if (isPowerOf2_64(FromTypeSize / ToTypeSize)) { 831 assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " 832 "of a smaller vector type at a nonzero offset."); 833 834 Type *CastElementTy = getScaledElementType(FromType, ToType, 835 ToTypeSize * 8); 836 unsigned NumCastVectorElements = FromTypeSize / ToTypeSize; 837 838 LLVMContext &Context = FromVal->getContext(); 839 Type *CastTy = VectorType::get(CastElementTy, 840 NumCastVectorElements); 841 Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); 842 843 unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); 844 unsigned Elt = Offset/EltSize; 845 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 846 Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( 847 Type::getInt32Ty(Context), Elt), "tmp"); 848 return Builder.CreateBitCast(Extract, ToType, "tmp"); 849 } 850 851 // Otherwise it must be an element access. 852 unsigned Elt = 0; 853 if (Offset) { 854 unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); 855 Elt = Offset/EltSize; 856 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 857 } 858 // Return the element extracted out of it. 859 Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( 860 Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); 861 if (V->getType() != ToType) 862 V = Builder.CreateBitCast(V, ToType, "tmp"); 863 return V; 864 } 865 866 // If ToType is a first class aggregate, extract out each of the pieces and 867 // use insertvalue's to form the FCA. 868 if (StructType *ST = dyn_cast<StructType>(ToType)) { 869 const StructLayout &Layout = *TD.getStructLayout(ST); 870 Value *Res = UndefValue::get(ST); 871 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 872 Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), 873 Offset+Layout.getElementOffsetInBits(i), 874 Builder); 875 Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 876 } 877 return Res; 878 } 879 880 if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) { 881 uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); 882 Value *Res = UndefValue::get(AT); 883 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 884 Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), 885 Offset+i*EltSize, Builder); 886 Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 887 } 888 return Res; 889 } 890 891 // Otherwise, this must be a union that was converted to an integer value. 892 IntegerType *NTy = cast<IntegerType>(FromVal->getType()); 893 894 // If this is a big-endian system and the load is narrower than the 895 // full alloca type, we need to do a shift to get the right bits. 896 int ShAmt = 0; 897 if (TD.isBigEndian()) { 898 // On big-endian machines, the lowest bit is stored at the bit offset 899 // from the pointer given by getTypeStoreSizeInBits. This matters for 900 // integers with a bitwidth that is not a multiple of 8. 901 ShAmt = TD.getTypeStoreSizeInBits(NTy) - 902 TD.getTypeStoreSizeInBits(ToType) - Offset; 903 } else { 904 ShAmt = Offset; 905 } 906 907 // Note: we support negative bitwidths (with shl) which are not defined. 908 // We do this to support (f.e.) loads off the end of a structure where 909 // only some bits are used. 910 if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) 911 FromVal = Builder.CreateLShr(FromVal, 912 ConstantInt::get(FromVal->getType(), 913 ShAmt), "tmp"); 914 else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) 915 FromVal = Builder.CreateShl(FromVal, 916 ConstantInt::get(FromVal->getType(), 917 -ShAmt), "tmp"); 918 919 // Finally, unconditionally truncate the integer to the right width. 920 unsigned LIBitWidth = TD.getTypeSizeInBits(ToType); 921 if (LIBitWidth < NTy->getBitWidth()) 922 FromVal = 923 Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 924 LIBitWidth), "tmp"); 925 else if (LIBitWidth > NTy->getBitWidth()) 926 FromVal = 927 Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 928 LIBitWidth), "tmp"); 929 930 // If the result is an integer, this is a trunc or bitcast. 931 if (ToType->isIntegerTy()) { 932 // Should be done. 933 } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { 934 // Just do a bitcast, we know the sizes match up. 935 FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); 936 } else { 937 // Otherwise must be a pointer. 938 FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); 939 } 940 assert(FromVal->getType() == ToType && "Didn't convert right?"); 941 return FromVal; 942} 943 944/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer 945/// or vector value "Old" at the offset specified by Offset. 946/// 947/// This happens when we are converting an "integer union" to a 948/// single integer scalar, or when we are converting a "vector union" to a 949/// vector with insert/extractelement instructions. 950/// 951/// Offset is an offset from the original alloca, in bits that need to be 952/// shifted to the right. 953Value *ConvertToScalarInfo:: 954ConvertScalar_InsertValue(Value *SV, Value *Old, 955 uint64_t Offset, IRBuilder<> &Builder) { 956 // Convert the stored type to the actual type, shift it left to insert 957 // then 'or' into place. 958 Type *AllocaType = Old->getType(); 959 LLVMContext &Context = Old->getContext(); 960 961 if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { 962 uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy); 963 uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType()); 964 965 // Changing the whole vector with memset or with an access of a different 966 // vector type? 967 if (ValSize == VecSize) { 968 // If the two types have the same primitive size, use a bit cast. 969 // Otherwise, it is two vectors with the same element type that has 970 // the same allocation size but different number of elements so use 971 // a shuffle vector. 972 if (VTy->getPrimitiveSizeInBits() == 973 SV->getType()->getPrimitiveSizeInBits()) 974 return Builder.CreateBitCast(SV, AllocaType, "tmp"); 975 else 976 return CreateShuffleVectorCast(SV, VTy, Builder); 977 } 978 979 if (isPowerOf2_64(VecSize / ValSize)) { 980 assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " 981 "value of a smaller vector type at a nonzero offset."); 982 983 Type *CastElementTy = getScaledElementType(VTy, SV->getType(), 984 ValSize); 985 unsigned NumCastVectorElements = VecSize / ValSize; 986 987 LLVMContext &Context = SV->getContext(); 988 Type *OldCastTy = VectorType::get(CastElementTy, 989 NumCastVectorElements); 990 Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); 991 992 Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); 993 994 unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); 995 unsigned Elt = Offset/EltSize; 996 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 997 Value *Insert = 998 Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( 999 Type::getInt32Ty(Context), Elt), "tmp"); 1000 return Builder.CreateBitCast(Insert, AllocaType, "tmp"); 1001 } 1002 1003 // Must be an element insertion. 1004 assert(SV->getType() == VTy->getElementType()); 1005 uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); 1006 unsigned Elt = Offset/EltSize; 1007 return Builder.CreateInsertElement(Old, SV, 1008 ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), 1009 "tmp"); 1010 } 1011 1012 // If SV is a first-class aggregate value, insert each value recursively. 1013 if (StructType *ST = dyn_cast<StructType>(SV->getType())) { 1014 const StructLayout &Layout = *TD.getStructLayout(ST); 1015 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 1016 Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 1017 Old = ConvertScalar_InsertValue(Elt, Old, 1018 Offset+Layout.getElementOffsetInBits(i), 1019 Builder); 1020 } 1021 return Old; 1022 } 1023 1024 if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { 1025 uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); 1026 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1027 Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 1028 Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); 1029 } 1030 return Old; 1031 } 1032 1033 // If SV is a float, convert it to the appropriate integer type. 1034 // If it is a pointer, do the same. 1035 unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType()); 1036 unsigned DestWidth = TD.getTypeSizeInBits(AllocaType); 1037 unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType()); 1038 unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType); 1039 if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) 1040 SV = Builder.CreateBitCast(SV, 1041 IntegerType::get(SV->getContext(),SrcWidth), "tmp"); 1042 else if (SV->getType()->isPointerTy()) 1043 SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp"); 1044 1045 // Zero extend or truncate the value if needed. 1046 if (SV->getType() != AllocaType) { 1047 if (SV->getType()->getPrimitiveSizeInBits() < 1048 AllocaType->getPrimitiveSizeInBits()) 1049 SV = Builder.CreateZExt(SV, AllocaType, "tmp"); 1050 else { 1051 // Truncation may be needed if storing more than the alloca can hold 1052 // (undefined behavior). 1053 SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); 1054 SrcWidth = DestWidth; 1055 SrcStoreWidth = DestStoreWidth; 1056 } 1057 } 1058 1059 // If this is a big-endian system and the store is narrower than the 1060 // full alloca type, we need to do a shift to get the right bits. 1061 int ShAmt = 0; 1062 if (TD.isBigEndian()) { 1063 // On big-endian machines, the lowest bit is stored at the bit offset 1064 // from the pointer given by getTypeStoreSizeInBits. This matters for 1065 // integers with a bitwidth that is not a multiple of 8. 1066 ShAmt = DestStoreWidth - SrcStoreWidth - Offset; 1067 } else { 1068 ShAmt = Offset; 1069 } 1070 1071 // Note: we support negative bitwidths (with shr) which are not defined. 1072 // We do this to support (f.e.) stores off the end of a structure where 1073 // only some bits in the structure are set. 1074 APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); 1075 if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { 1076 SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), 1077 ShAmt), "tmp"); 1078 Mask <<= ShAmt; 1079 } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { 1080 SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), 1081 -ShAmt), "tmp"); 1082 Mask = Mask.lshr(-ShAmt); 1083 } 1084 1085 // Mask out the bits we are about to insert from the old value, and or 1086 // in the new bits. 1087 if (SrcWidth != DestWidth) { 1088 assert(DestWidth > SrcWidth); 1089 Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); 1090 SV = Builder.CreateOr(Old, SV, "ins"); 1091 } 1092 return SV; 1093} 1094 1095 1096//===----------------------------------------------------------------------===// 1097// SRoA Driver 1098//===----------------------------------------------------------------------===// 1099 1100 1101bool SROA::runOnFunction(Function &F) { 1102 TD = getAnalysisIfAvailable<TargetData>(); 1103 1104 bool Changed = performPromotion(F); 1105 1106 // FIXME: ScalarRepl currently depends on TargetData more than it 1107 // theoretically needs to. It should be refactored in order to support 1108 // target-independent IR. Until this is done, just skip the actual 1109 // scalar-replacement portion of this pass. 1110 if (!TD) return Changed; 1111 1112 while (1) { 1113 bool LocalChange = performScalarRepl(F); 1114 if (!LocalChange) break; // No need to repromote if no scalarrepl 1115 Changed = true; 1116 LocalChange = performPromotion(F); 1117 if (!LocalChange) break; // No need to re-scalarrepl if no promotion 1118 } 1119 1120 return Changed; 1121} 1122 1123namespace { 1124class AllocaPromoter : public LoadAndStorePromoter { 1125 AllocaInst *AI; 1126 DIBuilder *DIB; 1127 SmallVector<DbgDeclareInst *, 4> DDIs; 1128 SmallVector<DbgValueInst *, 4> DVIs; 1129public: 1130 AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, 1131 DIBuilder *DB) 1132 : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} 1133 1134 void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { 1135 // Remember which alloca we're promoting (for isInstInList). 1136 this->AI = AI; 1137 if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) 1138 for (Value::use_iterator UI = DebugNode->use_begin(), 1139 E = DebugNode->use_end(); UI != E; ++UI) 1140 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 1141 DDIs.push_back(DDI); 1142 else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) 1143 DVIs.push_back(DVI); 1144 1145 LoadAndStorePromoter::run(Insts); 1146 AI->eraseFromParent(); 1147 for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), 1148 E = DDIs.end(); I != E; ++I) { 1149 DbgDeclareInst *DDI = *I; 1150 DDI->eraseFromParent(); 1151 } 1152 for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), 1153 E = DVIs.end(); I != E; ++I) { 1154 DbgValueInst *DVI = *I; 1155 DVI->eraseFromParent(); 1156 } 1157 } 1158 1159 virtual bool isInstInList(Instruction *I, 1160 const SmallVectorImpl<Instruction*> &Insts) const { 1161 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1162 return LI->getOperand(0) == AI; 1163 return cast<StoreInst>(I)->getPointerOperand() == AI; 1164 } 1165 1166 virtual void updateDebugInfo(Instruction *Inst) const { 1167 for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), 1168 E = DDIs.end(); I != E; ++I) { 1169 DbgDeclareInst *DDI = *I; 1170 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 1171 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 1172 else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) 1173 ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); 1174 } 1175 for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 1176 E = DVIs.end(); I != E; ++I) { 1177 DbgValueInst *DVI = *I; 1178 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 1179 Instruction *DbgVal = NULL; 1180 // If an argument is zero extended then use argument directly. The ZExt 1181 // may be zapped by an optimization pass in future. 1182 Argument *ExtendedArg = NULL; 1183 if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) 1184 ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); 1185 if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) 1186 ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); 1187 if (ExtendedArg) 1188 DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, 1189 DIVariable(DVI->getVariable()), 1190 SI); 1191 else 1192 DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, 1193 DIVariable(DVI->getVariable()), 1194 SI); 1195 DbgVal->setDebugLoc(DVI->getDebugLoc()); 1196 } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 1197 Instruction *DbgVal = 1198 DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, 1199 DIVariable(DVI->getVariable()), LI); 1200 DbgVal->setDebugLoc(DVI->getDebugLoc()); 1201 } 1202 } 1203 } 1204}; 1205} // end anon namespace 1206 1207/// isSafeSelectToSpeculate - Select instructions that use an alloca and are 1208/// subsequently loaded can be rewritten to load both input pointers and then 1209/// select between the result, allowing the load of the alloca to be promoted. 1210/// From this: 1211/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other 1212/// %V = load i32* %P2 1213/// to: 1214/// %V1 = load i32* %Alloca -> will be mem2reg'd 1215/// %V2 = load i32* %Other 1216/// %V = select i1 %cond, i32 %V1, i32 %V2 1217/// 1218/// We can do this to a select if its only uses are loads and if the operand to 1219/// the select can be loaded unconditionally. 1220static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { 1221 bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); 1222 bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); 1223 1224 for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); 1225 UI != UE; ++UI) { 1226 LoadInst *LI = dyn_cast<LoadInst>(*UI); 1227 if (LI == 0 || LI->isVolatile()) return false; 1228 1229 // Both operands to the select need to be dereferencable, either absolutely 1230 // (e.g. allocas) or at this point because we can see other accesses to it. 1231 if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, 1232 LI->getAlignment(), TD)) 1233 return false; 1234 if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, 1235 LI->getAlignment(), TD)) 1236 return false; 1237 } 1238 1239 return true; 1240} 1241 1242/// isSafePHIToSpeculate - PHI instructions that use an alloca and are 1243/// subsequently loaded can be rewritten to load both input pointers in the pred 1244/// blocks and then PHI the results, allowing the load of the alloca to be 1245/// promoted. 1246/// From this: 1247/// %P2 = phi [i32* %Alloca, i32* %Other] 1248/// %V = load i32* %P2 1249/// to: 1250/// %V1 = load i32* %Alloca -> will be mem2reg'd 1251/// ... 1252/// %V2 = load i32* %Other 1253/// ... 1254/// %V = phi [i32 %V1, i32 %V2] 1255/// 1256/// We can do this to a select if its only uses are loads and if the operand to 1257/// the select can be loaded unconditionally. 1258static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { 1259 // For now, we can only do this promotion if the load is in the same block as 1260 // the PHI, and if there are no stores between the phi and load. 1261 // TODO: Allow recursive phi users. 1262 // TODO: Allow stores. 1263 BasicBlock *BB = PN->getParent(); 1264 unsigned MaxAlign = 0; 1265 for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); 1266 UI != UE; ++UI) { 1267 LoadInst *LI = dyn_cast<LoadInst>(*UI); 1268 if (LI == 0 || LI->isVolatile()) return false; 1269 1270 // For now we only allow loads in the same block as the PHI. This is a 1271 // common case that happens when instcombine merges two loads through a PHI. 1272 if (LI->getParent() != BB) return false; 1273 1274 // Ensure that there are no instructions between the PHI and the load that 1275 // could store. 1276 for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI) 1277 if (BBI->mayWriteToMemory()) 1278 return false; 1279 1280 MaxAlign = std::max(MaxAlign, LI->getAlignment()); 1281 } 1282 1283 // Okay, we know that we have one or more loads in the same block as the PHI. 1284 // We can transform this if it is safe to push the loads into the predecessor 1285 // blocks. The only thing to watch out for is that we can't put a possibly 1286 // trapping load in the predecessor if it is a critical edge. 1287 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1288 BasicBlock *Pred = PN->getIncomingBlock(i); 1289 1290 // If the predecessor has a single successor, then the edge isn't critical. 1291 if (Pred->getTerminator()->getNumSuccessors() == 1) 1292 continue; 1293 1294 Value *InVal = PN->getIncomingValue(i); 1295 1296 // If the InVal is an invoke in the pred, we can't put a load on the edge. 1297 if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) 1298 if (II->getParent() == Pred) 1299 return false; 1300 1301 // If this pointer is always safe to load, or if we can prove that there is 1302 // already a load in the block, then we can move the load to the pred block. 1303 if (InVal->isDereferenceablePointer() || 1304 isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD)) 1305 continue; 1306 1307 return false; 1308 } 1309 1310 return true; 1311} 1312 1313 1314/// tryToMakeAllocaBePromotable - This returns true if the alloca only has 1315/// direct (non-volatile) loads and stores to it. If the alloca is close but 1316/// not quite there, this will transform the code to allow promotion. As such, 1317/// it is a non-pure predicate. 1318static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { 1319 SetVector<Instruction*, SmallVector<Instruction*, 4>, 1320 SmallPtrSet<Instruction*, 4> > InstsToRewrite; 1321 1322 for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); 1323 UI != UE; ++UI) { 1324 User *U = *UI; 1325 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 1326 if (LI->isVolatile()) 1327 return false; 1328 continue; 1329 } 1330 1331 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1332 if (SI->getOperand(0) == AI || SI->isVolatile()) 1333 return false; // Don't allow a store OF the AI, only INTO the AI. 1334 continue; 1335 } 1336 1337 if (SelectInst *SI = dyn_cast<SelectInst>(U)) { 1338 // If the condition being selected on is a constant, fold the select, yes 1339 // this does (rarely) happen early on. 1340 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) { 1341 Value *Result = SI->getOperand(1+CI->isZero()); 1342 SI->replaceAllUsesWith(Result); 1343 SI->eraseFromParent(); 1344 1345 // This is very rare and we just scrambled the use list of AI, start 1346 // over completely. 1347 return tryToMakeAllocaBePromotable(AI, TD); 1348 } 1349 1350 // If it is safe to turn "load (select c, AI, ptr)" into a select of two 1351 // loads, then we can transform this by rewriting the select. 1352 if (!isSafeSelectToSpeculate(SI, TD)) 1353 return false; 1354 1355 InstsToRewrite.insert(SI); 1356 continue; 1357 } 1358 1359 if (PHINode *PN = dyn_cast<PHINode>(U)) { 1360 if (PN->use_empty()) { // Dead PHIs can be stripped. 1361 InstsToRewrite.insert(PN); 1362 continue; 1363 } 1364 1365 // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads 1366 // in the pred blocks, then we can transform this by rewriting the PHI. 1367 if (!isSafePHIToSpeculate(PN, TD)) 1368 return false; 1369 1370 InstsToRewrite.insert(PN); 1371 continue; 1372 } 1373 1374 if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 1375 if (onlyUsedByLifetimeMarkers(BCI)) { 1376 InstsToRewrite.insert(BCI); 1377 continue; 1378 } 1379 } 1380 1381 return false; 1382 } 1383 1384 // If there are no instructions to rewrite, then all uses are load/stores and 1385 // we're done! 1386 if (InstsToRewrite.empty()) 1387 return true; 1388 1389 // If we have instructions that need to be rewritten for this to be promotable 1390 // take care of it now. 1391 for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { 1392 if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) { 1393 // This could only be a bitcast used by nothing but lifetime intrinsics. 1394 for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end(); 1395 I != E;) { 1396 Use &U = I.getUse(); 1397 ++I; 1398 cast<Instruction>(U.getUser())->eraseFromParent(); 1399 } 1400 BCI->eraseFromParent(); 1401 continue; 1402 } 1403 1404 if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) { 1405 // Selects in InstsToRewrite only have load uses. Rewrite each as two 1406 // loads with a new select. 1407 while (!SI->use_empty()) { 1408 LoadInst *LI = cast<LoadInst>(SI->use_back()); 1409 1410 IRBuilder<> Builder(LI); 1411 LoadInst *TrueLoad = 1412 Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); 1413 LoadInst *FalseLoad = 1414 Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); 1415 1416 // Transfer alignment and TBAA info if present. 1417 TrueLoad->setAlignment(LI->getAlignment()); 1418 FalseLoad->setAlignment(LI->getAlignment()); 1419 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { 1420 TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1421 FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1422 } 1423 1424 Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); 1425 V->takeName(LI); 1426 LI->replaceAllUsesWith(V); 1427 LI->eraseFromParent(); 1428 } 1429 1430 // Now that all the loads are gone, the select is gone too. 1431 SI->eraseFromParent(); 1432 continue; 1433 } 1434 1435 // Otherwise, we have a PHI node which allows us to push the loads into the 1436 // predecessors. 1437 PHINode *PN = cast<PHINode>(InstsToRewrite[i]); 1438 if (PN->use_empty()) { 1439 PN->eraseFromParent(); 1440 continue; 1441 } 1442 1443 Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); 1444 PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), 1445 PN->getName()+".ld", PN); 1446 1447 // Get the TBAA tag and alignment to use from one of the loads. It doesn't 1448 // matter which one we get and if any differ, it doesn't matter. 1449 LoadInst *SomeLoad = cast<LoadInst>(PN->use_back()); 1450 MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); 1451 unsigned Align = SomeLoad->getAlignment(); 1452 1453 // Rewrite all loads of the PN to use the new PHI. 1454 while (!PN->use_empty()) { 1455 LoadInst *LI = cast<LoadInst>(PN->use_back()); 1456 LI->replaceAllUsesWith(NewPN); 1457 LI->eraseFromParent(); 1458 } 1459 1460 // Inject loads into all of the pred blocks. Keep track of which blocks we 1461 // insert them into in case we have multiple edges from the same block. 1462 DenseMap<BasicBlock*, LoadInst*> InsertedLoads; 1463 1464 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1465 BasicBlock *Pred = PN->getIncomingBlock(i); 1466 LoadInst *&Load = InsertedLoads[Pred]; 1467 if (Load == 0) { 1468 Load = new LoadInst(PN->getIncomingValue(i), 1469 PN->getName() + "." + Pred->getName(), 1470 Pred->getTerminator()); 1471 Load->setAlignment(Align); 1472 if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); 1473 } 1474 1475 NewPN->addIncoming(Load, Pred); 1476 } 1477 1478 PN->eraseFromParent(); 1479 } 1480 1481 ++NumAdjusted; 1482 return true; 1483} 1484 1485bool SROA::performPromotion(Function &F) { 1486 std::vector<AllocaInst*> Allocas; 1487 DominatorTree *DT = 0; 1488 if (HasDomTree) 1489 DT = &getAnalysis<DominatorTree>(); 1490 1491 BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 1492 DIBuilder DIB(*F.getParent()); 1493 bool Changed = false; 1494 SmallVector<Instruction*, 64> Insts; 1495 while (1) { 1496 Allocas.clear(); 1497 1498 // Find allocas that are safe to promote, by looking at all instructions in 1499 // the entry node 1500 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 1501 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 1502 if (tryToMakeAllocaBePromotable(AI, TD)) 1503 Allocas.push_back(AI); 1504 1505 if (Allocas.empty()) break; 1506 1507 if (HasDomTree) 1508 PromoteMemToReg(Allocas, *DT); 1509 else { 1510 SSAUpdater SSA; 1511 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 1512 AllocaInst *AI = Allocas[i]; 1513 1514 // Build list of instructions to promote. 1515 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 1516 UI != E; ++UI) 1517 Insts.push_back(cast<Instruction>(*UI)); 1518 AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts); 1519 Insts.clear(); 1520 } 1521 } 1522 NumPromoted += Allocas.size(); 1523 Changed = true; 1524 } 1525 1526 return Changed; 1527} 1528 1529 1530/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for 1531/// SROA. It must be a struct or array type with a small number of elements. 1532static bool ShouldAttemptScalarRepl(AllocaInst *AI) { 1533 Type *T = AI->getAllocatedType(); 1534 // Do not promote any struct into more than 32 separate vars. 1535 if (StructType *ST = dyn_cast<StructType>(T)) 1536 return ST->getNumElements() <= 32; 1537 // Arrays are much less likely to be safe for SROA; only consider 1538 // them if they are very small. 1539 if (ArrayType *AT = dyn_cast<ArrayType>(T)) 1540 return AT->getNumElements() <= 8; 1541 return false; 1542} 1543 1544 1545// performScalarRepl - This algorithm is a simple worklist driven algorithm, 1546// which runs on all of the alloca instructions in the function, removing them 1547// if they are only used by getelementptr instructions. 1548// 1549bool SROA::performScalarRepl(Function &F) { 1550 std::vector<AllocaInst*> WorkList; 1551 1552 // Scan the entry basic block, adding allocas to the worklist. 1553 BasicBlock &BB = F.getEntryBlock(); 1554 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 1555 if (AllocaInst *A = dyn_cast<AllocaInst>(I)) 1556 WorkList.push_back(A); 1557 1558 // Process the worklist 1559 bool Changed = false; 1560 while (!WorkList.empty()) { 1561 AllocaInst *AI = WorkList.back(); 1562 WorkList.pop_back(); 1563 1564 // Handle dead allocas trivially. These can be formed by SROA'ing arrays 1565 // with unused elements. 1566 if (AI->use_empty()) { 1567 AI->eraseFromParent(); 1568 Changed = true; 1569 continue; 1570 } 1571 1572 // If this alloca is impossible for us to promote, reject it early. 1573 if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) 1574 continue; 1575 1576 // Check to see if this allocation is only modified by a memcpy/memmove from 1577 // a constant global. If this is the case, we can change all users to use 1578 // the constant global instead. This is commonly produced by the CFE by 1579 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 1580 // is only subsequently read. 1581 SmallVector<Instruction *, 4> ToDelete; 1582 if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { 1583 DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); 1584 DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 1585 for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 1586 ToDelete[i]->eraseFromParent(); 1587 Constant *TheSrc = cast<Constant>(Copy->getSource()); 1588 AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); 1589 Copy->eraseFromParent(); // Don't mutate the global. 1590 AI->eraseFromParent(); 1591 ++NumGlobals; 1592 Changed = true; 1593 continue; 1594 } 1595 1596 // Check to see if we can perform the core SROA transformation. We cannot 1597 // transform the allocation instruction if it is an array allocation 1598 // (allocations OF arrays are ok though), and an allocation of a scalar 1599 // value cannot be decomposed at all. 1600 uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); 1601 1602 // Do not promote [0 x %struct]. 1603 if (AllocaSize == 0) continue; 1604 1605 // Do not promote any struct whose size is too big. 1606 if (AllocaSize > SRThreshold) continue; 1607 1608 // If the alloca looks like a good candidate for scalar replacement, and if 1609 // all its users can be transformed, then split up the aggregate into its 1610 // separate elements. 1611 if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { 1612 DoScalarReplacement(AI, WorkList); 1613 Changed = true; 1614 continue; 1615 } 1616 1617 // If we can turn this aggregate value (potentially with casts) into a 1618 // simple scalar value that can be mem2reg'd into a register value. 1619 // IsNotTrivial tracks whether this is something that mem2reg could have 1620 // promoted itself. If so, we don't want to transform it needlessly. Note 1621 // that we can't just check based on the type: the alloca may be of an i32 1622 // but that has pointer arithmetic to set byte 3 of it or something. 1623 if (AllocaInst *NewAI = 1624 ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) { 1625 NewAI->takeName(AI); 1626 AI->eraseFromParent(); 1627 ++NumConverted; 1628 Changed = true; 1629 continue; 1630 } 1631 1632 // Otherwise, couldn't process this alloca. 1633 } 1634 1635 return Changed; 1636} 1637 1638/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 1639/// predicate, do SROA now. 1640void SROA::DoScalarReplacement(AllocaInst *AI, 1641 std::vector<AllocaInst*> &WorkList) { 1642 DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); 1643 SmallVector<AllocaInst*, 32> ElementAllocas; 1644 if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 1645 ElementAllocas.reserve(ST->getNumContainedTypes()); 1646 for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 1647 AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 1648 AI->getAlignment(), 1649 AI->getName() + "." + Twine(i), AI); 1650 ElementAllocas.push_back(NA); 1651 WorkList.push_back(NA); // Add to worklist for recursive processing 1652 } 1653 } else { 1654 ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 1655 ElementAllocas.reserve(AT->getNumElements()); 1656 Type *ElTy = AT->getElementType(); 1657 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1658 AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), 1659 AI->getName() + "." + Twine(i), AI); 1660 ElementAllocas.push_back(NA); 1661 WorkList.push_back(NA); // Add to worklist for recursive processing 1662 } 1663 } 1664 1665 // Now that we have created the new alloca instructions, rewrite all the 1666 // uses of the old alloca. 1667 RewriteForScalarRepl(AI, AI, 0, ElementAllocas); 1668 1669 // Now erase any instructions that were made dead while rewriting the alloca. 1670 DeleteDeadInstructions(); 1671 AI->eraseFromParent(); 1672 1673 ++NumReplaced; 1674} 1675 1676/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, 1677/// recursively including all their operands that become trivially dead. 1678void SROA::DeleteDeadInstructions() { 1679 while (!DeadInsts.empty()) { 1680 Instruction *I = cast<Instruction>(DeadInsts.pop_back_val()); 1681 1682 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 1683 if (Instruction *U = dyn_cast<Instruction>(*OI)) { 1684 // Zero out the operand and see if it becomes trivially dead. 1685 // (But, don't add allocas to the dead instruction list -- they are 1686 // already on the worklist and will be deleted separately.) 1687 *OI = 0; 1688 if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U)) 1689 DeadInsts.push_back(U); 1690 } 1691 1692 I->eraseFromParent(); 1693 } 1694} 1695 1696/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to 1697/// performing scalar replacement of alloca AI. The results are flagged in 1698/// the Info parameter. Offset indicates the position within AI that is 1699/// referenced by this instruction. 1700void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, 1701 AllocaInfo &Info) { 1702 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 1703 Instruction *User = cast<Instruction>(*UI); 1704 1705 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1706 isSafeForScalarRepl(BC, Offset, Info); 1707 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1708 uint64_t GEPOffset = Offset; 1709 isSafeGEP(GEPI, GEPOffset, Info); 1710 if (!Info.isUnsafe) 1711 isSafeForScalarRepl(GEPI, GEPOffset, Info); 1712 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 1713 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 1714 if (Length == 0) 1715 return MarkUnsafe(Info, User); 1716 isSafeMemAccess(Offset, Length->getZExtValue(), 0, 1717 UI.getOperandNo() == 0, Info, MI, 1718 true /*AllowWholeAccess*/); 1719 } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1720 if (LI->isVolatile()) 1721 return MarkUnsafe(Info, User); 1722 Type *LIType = LI->getType(); 1723 isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), 1724 LIType, false, Info, LI, true /*AllowWholeAccess*/); 1725 Info.hasALoadOrStore = true; 1726 1727 } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1728 // Store is ok if storing INTO the pointer, not storing the pointer 1729 if (SI->isVolatile() || SI->getOperand(0) == I) 1730 return MarkUnsafe(Info, User); 1731 1732 Type *SIType = SI->getOperand(0)->getType(); 1733 isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), 1734 SIType, true, Info, SI, true /*AllowWholeAccess*/); 1735 Info.hasALoadOrStore = true; 1736 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 1737 if (II->getIntrinsicID() != Intrinsic::lifetime_start && 1738 II->getIntrinsicID() != Intrinsic::lifetime_end) 1739 return MarkUnsafe(Info, User); 1740 } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { 1741 isSafePHISelectUseForScalarRepl(User, Offset, Info); 1742 } else { 1743 return MarkUnsafe(Info, User); 1744 } 1745 if (Info.isUnsafe) return; 1746 } 1747} 1748 1749 1750/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer 1751/// derived from the alloca, we can often still split the alloca into elements. 1752/// This is useful if we have a large alloca where one element is phi'd 1753/// together somewhere: we can SRoA and promote all the other elements even if 1754/// we end up not being able to promote this one. 1755/// 1756/// All we require is that the uses of the PHI do not index into other parts of 1757/// the alloca. The most important use case for this is single load and stores 1758/// that are PHI'd together, which can happen due to code sinking. 1759void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, 1760 AllocaInfo &Info) { 1761 // If we've already checked this PHI, don't do it again. 1762 if (PHINode *PN = dyn_cast<PHINode>(I)) 1763 if (!Info.CheckedPHIs.insert(PN)) 1764 return; 1765 1766 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 1767 Instruction *User = cast<Instruction>(*UI); 1768 1769 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1770 isSafePHISelectUseForScalarRepl(BC, Offset, Info); 1771 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1772 // Only allow "bitcast" GEPs for simplicity. We could generalize this, 1773 // but would have to prove that we're staying inside of an element being 1774 // promoted. 1775 if (!GEPI->hasAllZeroIndices()) 1776 return MarkUnsafe(Info, User); 1777 isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); 1778 } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1779 if (LI->isVolatile()) 1780 return MarkUnsafe(Info, User); 1781 Type *LIType = LI->getType(); 1782 isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), 1783 LIType, false, Info, LI, false /*AllowWholeAccess*/); 1784 Info.hasALoadOrStore = true; 1785 1786 } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1787 // Store is ok if storing INTO the pointer, not storing the pointer 1788 if (SI->isVolatile() || SI->getOperand(0) == I) 1789 return MarkUnsafe(Info, User); 1790 1791 Type *SIType = SI->getOperand(0)->getType(); 1792 isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), 1793 SIType, true, Info, SI, false /*AllowWholeAccess*/); 1794 Info.hasALoadOrStore = true; 1795 } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { 1796 isSafePHISelectUseForScalarRepl(User, Offset, Info); 1797 } else { 1798 return MarkUnsafe(Info, User); 1799 } 1800 if (Info.isUnsafe) return; 1801 } 1802} 1803 1804/// isSafeGEP - Check if a GEP instruction can be handled for scalar 1805/// replacement. It is safe when all the indices are constant, in-bounds 1806/// references, and when the resulting offset corresponds to an element within 1807/// the alloca type. The results are flagged in the Info parameter. Upon 1808/// return, Offset is adjusted as specified by the GEP indices. 1809void SROA::isSafeGEP(GetElementPtrInst *GEPI, 1810 uint64_t &Offset, AllocaInfo &Info) { 1811 gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); 1812 if (GEPIt == E) 1813 return; 1814 1815 // Walk through the GEP type indices, checking the types that this indexes 1816 // into. 1817 for (; GEPIt != E; ++GEPIt) { 1818 // Ignore struct elements, no extra checking needed for these. 1819 if ((*GEPIt)->isStructTy()) 1820 continue; 1821 1822 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); 1823 if (!IdxVal) 1824 return MarkUnsafe(Info, GEPI); 1825 } 1826 1827 // Compute the offset due to this GEP and check if the alloca has a 1828 // component element at that offset. 1829 SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 1830 Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); 1831 if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) 1832 MarkUnsafe(Info, GEPI); 1833} 1834 1835/// isHomogeneousAggregate - Check if type T is a struct or array containing 1836/// elements of the same type (which is always true for arrays). If so, 1837/// return true with NumElts and EltTy set to the number of elements and the 1838/// element type, respectively. 1839static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, 1840 Type *&EltTy) { 1841 if (ArrayType *AT = dyn_cast<ArrayType>(T)) { 1842 NumElts = AT->getNumElements(); 1843 EltTy = (NumElts == 0 ? 0 : AT->getElementType()); 1844 return true; 1845 } 1846 if (StructType *ST = dyn_cast<StructType>(T)) { 1847 NumElts = ST->getNumContainedTypes(); 1848 EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); 1849 for (unsigned n = 1; n < NumElts; ++n) { 1850 if (ST->getContainedType(n) != EltTy) 1851 return false; 1852 } 1853 return true; 1854 } 1855 return false; 1856} 1857 1858/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are 1859/// "homogeneous" aggregates with the same element type and number of elements. 1860static bool isCompatibleAggregate(Type *T1, Type *T2) { 1861 if (T1 == T2) 1862 return true; 1863 1864 unsigned NumElts1, NumElts2; 1865 Type *EltTy1, *EltTy2; 1866 if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && 1867 isHomogeneousAggregate(T2, NumElts2, EltTy2) && 1868 NumElts1 == NumElts2 && 1869 EltTy1 == EltTy2) 1870 return true; 1871 1872 return false; 1873} 1874 1875/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI 1876/// alloca or has an offset and size that corresponds to a component element 1877/// within it. The offset checked here may have been formed from a GEP with a 1878/// pointer bitcasted to a different type. 1879/// 1880/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a 1881/// unit. If false, it only allows accesses known to be in a single element. 1882void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, 1883 Type *MemOpType, bool isStore, 1884 AllocaInfo &Info, Instruction *TheAccess, 1885 bool AllowWholeAccess) { 1886 // Check if this is a load/store of the entire alloca. 1887 if (Offset == 0 && AllowWholeAccess && 1888 MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) { 1889 // This can be safe for MemIntrinsics (where MemOpType is 0) and integer 1890 // loads/stores (which are essentially the same as the MemIntrinsics with 1891 // regard to copying padding between elements). But, if an alloca is 1892 // flagged as both a source and destination of such operations, we'll need 1893 // to check later for padding between elements. 1894 if (!MemOpType || MemOpType->isIntegerTy()) { 1895 if (isStore) 1896 Info.isMemCpyDst = true; 1897 else 1898 Info.isMemCpySrc = true; 1899 return; 1900 } 1901 // This is also safe for references using a type that is compatible with 1902 // the type of the alloca, so that loads/stores can be rewritten using 1903 // insertvalue/extractvalue. 1904 if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) { 1905 Info.hasSubelementAccess = true; 1906 return; 1907 } 1908 } 1909 // Check if the offset/size correspond to a component within the alloca type. 1910 Type *T = Info.AI->getAllocatedType(); 1911 if (TypeHasComponent(T, Offset, MemSize)) { 1912 Info.hasSubelementAccess = true; 1913 return; 1914 } 1915 1916 return MarkUnsafe(Info, TheAccess); 1917} 1918 1919/// TypeHasComponent - Return true if T has a component type with the 1920/// specified offset and size. If Size is zero, do not check the size. 1921bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { 1922 Type *EltTy; 1923 uint64_t EltSize; 1924 if (StructType *ST = dyn_cast<StructType>(T)) { 1925 const StructLayout *Layout = TD->getStructLayout(ST); 1926 unsigned EltIdx = Layout->getElementContainingOffset(Offset); 1927 EltTy = ST->getContainedType(EltIdx); 1928 EltSize = TD->getTypeAllocSize(EltTy); 1929 Offset -= Layout->getElementOffset(EltIdx); 1930 } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { 1931 EltTy = AT->getElementType(); 1932 EltSize = TD->getTypeAllocSize(EltTy); 1933 if (Offset >= AT->getNumElements() * EltSize) 1934 return false; 1935 Offset %= EltSize; 1936 } else { 1937 return false; 1938 } 1939 if (Offset == 0 && (Size == 0 || EltSize == Size)) 1940 return true; 1941 // Check if the component spans multiple elements. 1942 if (Offset + Size > EltSize) 1943 return false; 1944 return TypeHasComponent(EltTy, Offset, Size); 1945} 1946 1947/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite 1948/// the instruction I, which references it, to use the separate elements. 1949/// Offset indicates the position within AI that is referenced by this 1950/// instruction. 1951void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 1952 SmallVector<AllocaInst*, 32> &NewElts) { 1953 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) { 1954 Use &TheUse = UI.getUse(); 1955 Instruction *User = cast<Instruction>(*UI++); 1956 1957 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1958 RewriteBitCast(BC, AI, Offset, NewElts); 1959 continue; 1960 } 1961 1962 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1963 RewriteGEP(GEPI, AI, Offset, NewElts); 1964 continue; 1965 } 1966 1967 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 1968 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 1969 uint64_t MemSize = Length->getZExtValue(); 1970 if (Offset == 0 && 1971 MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) 1972 RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); 1973 // Otherwise the intrinsic can only touch a single element and the 1974 // address operand will be updated, so nothing else needs to be done. 1975 continue; 1976 } 1977 1978 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 1979 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 1980 II->getIntrinsicID() == Intrinsic::lifetime_end) { 1981 RewriteLifetimeIntrinsic(II, AI, Offset, NewElts); 1982 } 1983 continue; 1984 } 1985 1986 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1987 Type *LIType = LI->getType(); 1988 1989 if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { 1990 // Replace: 1991 // %res = load { i32, i32 }* %alloc 1992 // with: 1993 // %load.0 = load i32* %alloc.0 1994 // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 1995 // %load.1 = load i32* %alloc.1 1996 // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 1997 // (Also works for arrays instead of structs) 1998 Value *Insert = UndefValue::get(LIType); 1999 IRBuilder<> Builder(LI); 2000 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2001 Value *Load = Builder.CreateLoad(NewElts[i], "load"); 2002 Insert = Builder.CreateInsertValue(Insert, Load, i, "insert"); 2003 } 2004 LI->replaceAllUsesWith(Insert); 2005 DeadInsts.push_back(LI); 2006 } else if (LIType->isIntegerTy() && 2007 TD->getTypeAllocSize(LIType) == 2008 TD->getTypeAllocSize(AI->getAllocatedType())) { 2009 // If this is a load of the entire alloca to an integer, rewrite it. 2010 RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); 2011 } 2012 continue; 2013 } 2014 2015 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 2016 Value *Val = SI->getOperand(0); 2017 Type *SIType = Val->getType(); 2018 if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { 2019 // Replace: 2020 // store { i32, i32 } %val, { i32, i32 }* %alloc 2021 // with: 2022 // %val.0 = extractvalue { i32, i32 } %val, 0 2023 // store i32 %val.0, i32* %alloc.0 2024 // %val.1 = extractvalue { i32, i32 } %val, 1 2025 // store i32 %val.1, i32* %alloc.1 2026 // (Also works for arrays instead of structs) 2027 IRBuilder<> Builder(SI); 2028 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2029 Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName()); 2030 Builder.CreateStore(Extract, NewElts[i]); 2031 } 2032 DeadInsts.push_back(SI); 2033 } else if (SIType->isIntegerTy() && 2034 TD->getTypeAllocSize(SIType) == 2035 TD->getTypeAllocSize(AI->getAllocatedType())) { 2036 // If this is a store of the entire alloca from an integer, rewrite it. 2037 RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); 2038 } 2039 continue; 2040 } 2041 2042 if (isa<SelectInst>(User) || isa<PHINode>(User)) { 2043 // If we have a PHI user of the alloca itself (as opposed to a GEP or 2044 // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to 2045 // the new pointer. 2046 if (!isa<AllocaInst>(I)) continue; 2047 2048 assert(Offset == 0 && NewElts[0] && 2049 "Direct alloca use should have a zero offset"); 2050 2051 // If we have a use of the alloca, we know the derived uses will be 2052 // utilizing just the first element of the scalarized result. Insert a 2053 // bitcast of the first alloca before the user as required. 2054 AllocaInst *NewAI = NewElts[0]; 2055 BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI); 2056 NewAI->moveBefore(BCI); 2057 TheUse = BCI; 2058 continue; 2059 } 2060 } 2061} 2062 2063/// RewriteBitCast - Update a bitcast reference to the alloca being replaced 2064/// and recursively continue updating all of its uses. 2065void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 2066 SmallVector<AllocaInst*, 32> &NewElts) { 2067 RewriteForScalarRepl(BC, AI, Offset, NewElts); 2068 if (BC->getOperand(0) != AI) 2069 return; 2070 2071 // The bitcast references the original alloca. Replace its uses with 2072 // references to the first new element alloca. 2073 Instruction *Val = NewElts[0]; 2074 if (Val->getType() != BC->getDestTy()) { 2075 Val = new BitCastInst(Val, BC->getDestTy(), "", BC); 2076 Val->takeName(BC); 2077 } 2078 BC->replaceAllUsesWith(Val); 2079 DeadInsts.push_back(BC); 2080} 2081 2082/// FindElementAndOffset - Return the index of the element containing Offset 2083/// within the specified type, which must be either a struct or an array. 2084/// Sets T to the type of the element and Offset to the offset within that 2085/// element. IdxTy is set to the type of the index result to be used in a 2086/// GEP instruction. 2087uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, 2088 Type *&IdxTy) { 2089 uint64_t Idx = 0; 2090 if (StructType *ST = dyn_cast<StructType>(T)) { 2091 const StructLayout *Layout = TD->getStructLayout(ST); 2092 Idx = Layout->getElementContainingOffset(Offset); 2093 T = ST->getContainedType(Idx); 2094 Offset -= Layout->getElementOffset(Idx); 2095 IdxTy = Type::getInt32Ty(T->getContext()); 2096 return Idx; 2097 } 2098 ArrayType *AT = cast<ArrayType>(T); 2099 T = AT->getElementType(); 2100 uint64_t EltSize = TD->getTypeAllocSize(T); 2101 Idx = Offset / EltSize; 2102 Offset -= Idx * EltSize; 2103 IdxTy = Type::getInt64Ty(T->getContext()); 2104 return Idx; 2105} 2106 2107/// RewriteGEP - Check if this GEP instruction moves the pointer across 2108/// elements of the alloca that are being split apart, and if so, rewrite 2109/// the GEP to be relative to the new element. 2110void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 2111 SmallVector<AllocaInst*, 32> &NewElts) { 2112 uint64_t OldOffset = Offset; 2113 SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 2114 Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); 2115 2116 RewriteForScalarRepl(GEPI, AI, Offset, NewElts); 2117 2118 Type *T = AI->getAllocatedType(); 2119 Type *IdxTy; 2120 uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); 2121 if (GEPI->getOperand(0) == AI) 2122 OldIdx = ~0ULL; // Force the GEP to be rewritten. 2123 2124 T = AI->getAllocatedType(); 2125 uint64_t EltOffset = Offset; 2126 uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); 2127 2128 // If this GEP does not move the pointer across elements of the alloca 2129 // being split, then it does not needs to be rewritten. 2130 if (Idx == OldIdx) 2131 return; 2132 2133 Type *i32Ty = Type::getInt32Ty(AI->getContext()); 2134 SmallVector<Value*, 8> NewArgs; 2135 NewArgs.push_back(Constant::getNullValue(i32Ty)); 2136 while (EltOffset != 0) { 2137 uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); 2138 NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); 2139 } 2140 Instruction *Val = NewElts[Idx]; 2141 if (NewArgs.size() > 1) { 2142 Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); 2143 Val->takeName(GEPI); 2144 } 2145 if (Val->getType() != GEPI->getType()) 2146 Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); 2147 GEPI->replaceAllUsesWith(Val); 2148 DeadInsts.push_back(GEPI); 2149} 2150 2151/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it 2152/// to mark the lifetime of the scalarized memory. 2153void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, 2154 uint64_t Offset, 2155 SmallVector<AllocaInst*, 32> &NewElts) { 2156 ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0)); 2157 // Put matching lifetime markers on everything from Offset up to 2158 // Offset+OldSize. 2159 Type *AIType = AI->getAllocatedType(); 2160 uint64_t NewOffset = Offset; 2161 Type *IdxTy; 2162 uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy); 2163 2164 IRBuilder<> Builder(II); 2165 uint64_t Size = OldSize->getLimitedValue(); 2166 2167 if (NewOffset) { 2168 // Splice the first element and index 'NewOffset' bytes in. SROA will 2169 // split the alloca again later. 2170 Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy()); 2171 V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); 2172 2173 IdxTy = NewElts[Idx]->getAllocatedType(); 2174 uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset; 2175 if (EltSize > Size) { 2176 EltSize = Size; 2177 Size = 0; 2178 } else { 2179 Size -= EltSize; 2180 } 2181 if (II->getIntrinsicID() == Intrinsic::lifetime_start) 2182 Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize)); 2183 else 2184 Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize)); 2185 ++Idx; 2186 } 2187 2188 for (; Idx != NewElts.size() && Size; ++Idx) { 2189 IdxTy = NewElts[Idx]->getAllocatedType(); 2190 uint64_t EltSize = TD->getTypeAllocSize(IdxTy); 2191 if (EltSize > Size) { 2192 EltSize = Size; 2193 Size = 0; 2194 } else { 2195 Size -= EltSize; 2196 } 2197 if (II->getIntrinsicID() == Intrinsic::lifetime_start) 2198 Builder.CreateLifetimeStart(NewElts[Idx], 2199 Builder.getInt64(EltSize)); 2200 else 2201 Builder.CreateLifetimeEnd(NewElts[Idx], 2202 Builder.getInt64(EltSize)); 2203 } 2204 DeadInsts.push_back(II); 2205} 2206 2207/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. 2208/// Rewrite it to copy or set the elements of the scalarized memory. 2209void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 2210 AllocaInst *AI, 2211 SmallVector<AllocaInst*, 32> &NewElts) { 2212 // If this is a memcpy/memmove, construct the other pointer as the 2213 // appropriate type. The "Other" pointer is the pointer that goes to memory 2214 // that doesn't have anything to do with the alloca that we are promoting. For 2215 // memset, this Value* stays null. 2216 Value *OtherPtr = 0; 2217 unsigned MemAlignment = MI->getAlignment(); 2218 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy 2219 if (Inst == MTI->getRawDest()) 2220 OtherPtr = MTI->getRawSource(); 2221 else { 2222 assert(Inst == MTI->getRawSource()); 2223 OtherPtr = MTI->getRawDest(); 2224 } 2225 } 2226 2227 // If there is an other pointer, we want to convert it to the same pointer 2228 // type as AI has, so we can GEP through it safely. 2229 if (OtherPtr) { 2230 unsigned AddrSpace = 2231 cast<PointerType>(OtherPtr->getType())->getAddressSpace(); 2232 2233 // Remove bitcasts and all-zero GEPs from OtherPtr. This is an 2234 // optimization, but it's also required to detect the corner case where 2235 // both pointer operands are referencing the same memory, and where 2236 // OtherPtr may be a bitcast or GEP that currently being rewritten. (This 2237 // function is only called for mem intrinsics that access the whole 2238 // aggregate, so non-zero GEPs are not an issue here.) 2239 OtherPtr = OtherPtr->stripPointerCasts(); 2240 2241 // Copying the alloca to itself is a no-op: just delete it. 2242 if (OtherPtr == AI || OtherPtr == NewElts[0]) { 2243 // This code will run twice for a no-op memcpy -- once for each operand. 2244 // Put only one reference to MI on the DeadInsts list. 2245 for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(), 2246 E = DeadInsts.end(); I != E; ++I) 2247 if (*I == MI) return; 2248 DeadInsts.push_back(MI); 2249 return; 2250 } 2251 2252 // If the pointer is not the right type, insert a bitcast to the right 2253 // type. 2254 Type *NewTy = 2255 PointerType::get(AI->getType()->getElementType(), AddrSpace); 2256 2257 if (OtherPtr->getType() != NewTy) 2258 OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); 2259 } 2260 2261 // Process each element of the aggregate. 2262 bool SROADest = MI->getRawDest() == Inst; 2263 2264 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); 2265 2266 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2267 // If this is a memcpy/memmove, emit a GEP of the other element address. 2268 Value *OtherElt = 0; 2269 unsigned OtherEltAlign = MemAlignment; 2270 2271 if (OtherPtr) { 2272 Value *Idx[2] = { Zero, 2273 ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; 2274 OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, 2275 OtherPtr->getName()+"."+Twine(i), 2276 MI); 2277 uint64_t EltOffset; 2278 PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); 2279 Type *OtherTy = OtherPtrTy->getElementType(); 2280 if (StructType *ST = dyn_cast<StructType>(OtherTy)) { 2281 EltOffset = TD->getStructLayout(ST)->getElementOffset(i); 2282 } else { 2283 Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); 2284 EltOffset = TD->getTypeAllocSize(EltTy)*i; 2285 } 2286 2287 // The alignment of the other pointer is the guaranteed alignment of the 2288 // element, which is affected by both the known alignment of the whole 2289 // mem intrinsic and the alignment of the element. If the alignment of 2290 // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the 2291 // known alignment is just 4 bytes. 2292 OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); 2293 } 2294 2295 Value *EltPtr = NewElts[i]; 2296 Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); 2297 2298 // If we got down to a scalar, insert a load or store as appropriate. 2299 if (EltTy->isSingleValueType()) { 2300 if (isa<MemTransferInst>(MI)) { 2301 if (SROADest) { 2302 // From Other to Alloca. 2303 Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); 2304 new StoreInst(Elt, EltPtr, MI); 2305 } else { 2306 // From Alloca to Other. 2307 Value *Elt = new LoadInst(EltPtr, "tmp", MI); 2308 new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); 2309 } 2310 continue; 2311 } 2312 assert(isa<MemSetInst>(MI)); 2313 2314 // If the stored element is zero (common case), just store a null 2315 // constant. 2316 Constant *StoreVal; 2317 if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) { 2318 if (CI->isZero()) { 2319 StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 2320 } else { 2321 // If EltTy is a vector type, get the element type. 2322 Type *ValTy = EltTy->getScalarType(); 2323 2324 // Construct an integer with the right value. 2325 unsigned EltSize = TD->getTypeSizeInBits(ValTy); 2326 APInt OneVal(EltSize, CI->getZExtValue()); 2327 APInt TotalVal(OneVal); 2328 // Set each byte. 2329 for (unsigned i = 0; 8*i < EltSize; ++i) { 2330 TotalVal = TotalVal.shl(8); 2331 TotalVal |= OneVal; 2332 } 2333 2334 // Convert the integer value to the appropriate type. 2335 StoreVal = ConstantInt::get(CI->getContext(), TotalVal); 2336 if (ValTy->isPointerTy()) 2337 StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 2338 else if (ValTy->isFloatingPointTy()) 2339 StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 2340 assert(StoreVal->getType() == ValTy && "Type mismatch!"); 2341 2342 // If the requested value was a vector constant, create it. 2343 if (EltTy != ValTy) { 2344 unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); 2345 SmallVector<Constant*, 16> Elts(NumElts, StoreVal); 2346 StoreVal = ConstantVector::get(Elts); 2347 } 2348 } 2349 new StoreInst(StoreVal, EltPtr, MI); 2350 continue; 2351 } 2352 // Otherwise, if we're storing a byte variable, use a memset call for 2353 // this element. 2354 } 2355 2356 unsigned EltSize = TD->getTypeAllocSize(EltTy); 2357 2358 IRBuilder<> Builder(MI); 2359 2360 // Finally, insert the meminst for this element. 2361 if (isa<MemSetInst>(MI)) { 2362 Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize, 2363 MI->isVolatile()); 2364 } else { 2365 assert(isa<MemTransferInst>(MI)); 2366 Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr 2367 Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr 2368 2369 if (isa<MemCpyInst>(MI)) 2370 Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile()); 2371 else 2372 Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile()); 2373 } 2374 } 2375 DeadInsts.push_back(MI); 2376} 2377 2378/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that 2379/// overwrites the entire allocation. Extract out the pieces of the stored 2380/// integer and store them individually. 2381void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 2382 SmallVector<AllocaInst*, 32> &NewElts){ 2383 // Extract each element out of the integer according to its structure offset 2384 // and store the element value to the individual alloca. 2385 Value *SrcVal = SI->getOperand(0); 2386 Type *AllocaEltTy = AI->getAllocatedType(); 2387 uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 2388 2389 IRBuilder<> Builder(SI); 2390 2391 // Handle tail padding by extending the operand 2392 if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) 2393 SrcVal = Builder.CreateZExt(SrcVal, 2394 IntegerType::get(SI->getContext(), AllocaSizeBits)); 2395 2396 DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI 2397 << '\n'); 2398 2399 // There are two forms here: AI could be an array or struct. Both cases 2400 // have different ways to compute the element offset. 2401 if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 2402 const StructLayout *Layout = TD->getStructLayout(EltSTy); 2403 2404 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2405 // Get the number of bits to shift SrcVal to get the value. 2406 Type *FieldTy = EltSTy->getElementType(i); 2407 uint64_t Shift = Layout->getElementOffsetInBits(i); 2408 2409 if (TD->isBigEndian()) 2410 Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); 2411 2412 Value *EltVal = SrcVal; 2413 if (Shift) { 2414 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 2415 EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); 2416 } 2417 2418 // Truncate down to an integer of the right size. 2419 uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 2420 2421 // Ignore zero sized fields like {}, they obviously contain no data. 2422 if (FieldSizeBits == 0) continue; 2423 2424 if (FieldSizeBits != AllocaSizeBits) 2425 EltVal = Builder.CreateTrunc(EltVal, 2426 IntegerType::get(SI->getContext(), FieldSizeBits)); 2427 Value *DestField = NewElts[i]; 2428 if (EltVal->getType() == FieldTy) { 2429 // Storing to an integer field of this size, just do it. 2430 } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { 2431 // Bitcast to the right element type (for fp/vector values). 2432 EltVal = Builder.CreateBitCast(EltVal, FieldTy); 2433 } else { 2434 // Otherwise, bitcast the dest pointer (for aggregates). 2435 DestField = Builder.CreateBitCast(DestField, 2436 PointerType::getUnqual(EltVal->getType())); 2437 } 2438 new StoreInst(EltVal, DestField, SI); 2439 } 2440 2441 } else { 2442 ArrayType *ATy = cast<ArrayType>(AllocaEltTy); 2443 Type *ArrayEltTy = ATy->getElementType(); 2444 uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 2445 uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); 2446 2447 uint64_t Shift; 2448 2449 if (TD->isBigEndian()) 2450 Shift = AllocaSizeBits-ElementOffset; 2451 else 2452 Shift = 0; 2453 2454 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2455 // Ignore zero sized fields like {}, they obviously contain no data. 2456 if (ElementSizeBits == 0) continue; 2457 2458 Value *EltVal = SrcVal; 2459 if (Shift) { 2460 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 2461 EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); 2462 } 2463 2464 // Truncate down to an integer of the right size. 2465 if (ElementSizeBits != AllocaSizeBits) 2466 EltVal = Builder.CreateTrunc(EltVal, 2467 IntegerType::get(SI->getContext(), 2468 ElementSizeBits)); 2469 Value *DestField = NewElts[i]; 2470 if (EltVal->getType() == ArrayEltTy) { 2471 // Storing to an integer field of this size, just do it. 2472 } else if (ArrayEltTy->isFloatingPointTy() || 2473 ArrayEltTy->isVectorTy()) { 2474 // Bitcast to the right element type (for fp/vector values). 2475 EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy); 2476 } else { 2477 // Otherwise, bitcast the dest pointer (for aggregates). 2478 DestField = Builder.CreateBitCast(DestField, 2479 PointerType::getUnqual(EltVal->getType())); 2480 } 2481 new StoreInst(EltVal, DestField, SI); 2482 2483 if (TD->isBigEndian()) 2484 Shift -= ElementOffset; 2485 else 2486 Shift += ElementOffset; 2487 } 2488 } 2489 2490 DeadInsts.push_back(SI); 2491} 2492 2493/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to 2494/// an integer. Load the individual pieces to form the aggregate value. 2495void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 2496 SmallVector<AllocaInst*, 32> &NewElts) { 2497 // Extract each element out of the NewElts according to its structure offset 2498 // and form the result value. 2499 Type *AllocaEltTy = AI->getAllocatedType(); 2500 uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 2501 2502 DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI 2503 << '\n'); 2504 2505 // There are two forms here: AI could be an array or struct. Both cases 2506 // have different ways to compute the element offset. 2507 const StructLayout *Layout = 0; 2508 uint64_t ArrayEltBitOffset = 0; 2509 if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 2510 Layout = TD->getStructLayout(EltSTy); 2511 } else { 2512 Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); 2513 ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 2514 } 2515 2516 Value *ResultVal = 2517 Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); 2518 2519 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2520 // Load the value from the alloca. If the NewElt is an aggregate, cast 2521 // the pointer to an integer of the same size before doing the load. 2522 Value *SrcField = NewElts[i]; 2523 Type *FieldTy = 2524 cast<PointerType>(SrcField->getType())->getElementType(); 2525 uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 2526 2527 // Ignore zero sized fields like {}, they obviously contain no data. 2528 if (FieldSizeBits == 0) continue; 2529 2530 IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 2531 FieldSizeBits); 2532 if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && 2533 !FieldTy->isVectorTy()) 2534 SrcField = new BitCastInst(SrcField, 2535 PointerType::getUnqual(FieldIntTy), 2536 "", LI); 2537 SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); 2538 2539 // If SrcField is a fp or vector of the right size but that isn't an 2540 // integer type, bitcast to an integer so we can shift it. 2541 if (SrcField->getType() != FieldIntTy) 2542 SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); 2543 2544 // Zero extend the field to be the same size as the final alloca so that 2545 // we can shift and insert it. 2546 if (SrcField->getType() != ResultVal->getType()) 2547 SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); 2548 2549 // Determine the number of bits to shift SrcField. 2550 uint64_t Shift; 2551 if (Layout) // Struct case. 2552 Shift = Layout->getElementOffsetInBits(i); 2553 else // Array case. 2554 Shift = i*ArrayEltBitOffset; 2555 2556 if (TD->isBigEndian()) 2557 Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); 2558 2559 if (Shift) { 2560 Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); 2561 SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); 2562 } 2563 2564 // Don't create an 'or x, 0' on the first iteration. 2565 if (!isa<Constant>(ResultVal) || 2566 !cast<Constant>(ResultVal)->isNullValue()) 2567 ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); 2568 else 2569 ResultVal = SrcField; 2570 } 2571 2572 // Handle tail padding by truncating the result 2573 if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) 2574 ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); 2575 2576 LI->replaceAllUsesWith(ResultVal); 2577 DeadInsts.push_back(LI); 2578} 2579 2580/// HasPadding - Return true if the specified type has any structure or 2581/// alignment padding in between the elements that would be split apart 2582/// by SROA; return false otherwise. 2583static bool HasPadding(Type *Ty, const TargetData &TD) { 2584 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 2585 Ty = ATy->getElementType(); 2586 return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); 2587 } 2588 2589 // SROA currently handles only Arrays and Structs. 2590 StructType *STy = cast<StructType>(Ty); 2591 const StructLayout *SL = TD.getStructLayout(STy); 2592 unsigned PrevFieldBitOffset = 0; 2593 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 2594 unsigned FieldBitOffset = SL->getElementOffsetInBits(i); 2595 2596 // Check to see if there is any padding between this element and the 2597 // previous one. 2598 if (i) { 2599 unsigned PrevFieldEnd = 2600 PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); 2601 if (PrevFieldEnd < FieldBitOffset) 2602 return true; 2603 } 2604 PrevFieldBitOffset = FieldBitOffset; 2605 } 2606 // Check for tail padding. 2607 if (unsigned EltCount = STy->getNumElements()) { 2608 unsigned PrevFieldEnd = PrevFieldBitOffset + 2609 TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); 2610 if (PrevFieldEnd < SL->getSizeInBits()) 2611 return true; 2612 } 2613 return false; 2614} 2615 2616/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 2617/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 2618/// or 1 if safe after canonicalization has been performed. 2619bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { 2620 // Loop over the use list of the alloca. We can only transform it if all of 2621 // the users are safe to transform. 2622 AllocaInfo Info(AI); 2623 2624 isSafeForScalarRepl(AI, 0, Info); 2625 if (Info.isUnsafe) { 2626 DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); 2627 return false; 2628 } 2629 2630 // Okay, we know all the users are promotable. If the aggregate is a memcpy 2631 // source and destination, we have to be careful. In particular, the memcpy 2632 // could be moving around elements that live in structure padding of the LLVM 2633 // types, but may actually be used. In these cases, we refuse to promote the 2634 // struct. 2635 if (Info.isMemCpySrc && Info.isMemCpyDst && 2636 HasPadding(AI->getAllocatedType(), *TD)) 2637 return false; 2638 2639 // If the alloca never has an access to just *part* of it, but is accessed 2640 // via loads and stores, then we should use ConvertToScalarInfo to promote 2641 // the alloca instead of promoting each piece at a time and inserting fission 2642 // and fusion code. 2643 if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { 2644 // If the struct/array just has one element, use basic SRoA. 2645 if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 2646 if (ST->getNumElements() > 1) return false; 2647 } else { 2648 if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1) 2649 return false; 2650 } 2651 } 2652 2653 return true; 2654} 2655 2656 2657 2658/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to 2659/// some part of a constant global variable. This intentionally only accepts 2660/// constant expressions because we don't can't rewrite arbitrary instructions. 2661static bool PointsToConstantGlobal(Value *V) { 2662 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 2663 return GV->isConstant(); 2664 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 2665 if (CE->getOpcode() == Instruction::BitCast || 2666 CE->getOpcode() == Instruction::GetElementPtr) 2667 return PointsToConstantGlobal(CE->getOperand(0)); 2668 return false; 2669} 2670 2671/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 2672/// pointer to an alloca. Ignore any reads of the pointer, return false if we 2673/// see any stores or other unknown uses. If we see pointer arithmetic, keep 2674/// track of whether it moves the pointer (with isOffset) but otherwise traverse 2675/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 2676/// the alloca, and if the source pointer is a pointer to a constant global, we 2677/// can optimize this. 2678static bool 2679isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 2680 bool isOffset, 2681 SmallVector<Instruction *, 4> &LifetimeMarkers) { 2682 // We track lifetime intrinsics as we encounter them. If we decide to go 2683 // ahead and replace the value with the global, this lets the caller quickly 2684 // eliminate the markers. 2685 2686 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 2687 User *U = cast<Instruction>(*UI); 2688 2689 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 2690 // Ignore non-volatile loads, they are always ok. 2691 if (LI->isVolatile()) return false; 2692 continue; 2693 } 2694 2695 if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 2696 // If uses of the bitcast are ok, we are ok. 2697 if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, 2698 LifetimeMarkers)) 2699 return false; 2700 continue; 2701 } 2702 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 2703 // If the GEP has all zero indices, it doesn't offset the pointer. If it 2704 // doesn't, it does. 2705 if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, 2706 isOffset || !GEP->hasAllZeroIndices(), 2707 LifetimeMarkers)) 2708 return false; 2709 continue; 2710 } 2711 2712 if (CallSite CS = U) { 2713 // If this is the function being called then we treat it like a load and 2714 // ignore it. 2715 if (CS.isCallee(UI)) 2716 continue; 2717 2718 // If this is a readonly/readnone call site, then we know it is just a 2719 // load (but one that potentially returns the value itself), so we can 2720 // ignore it if we know that the value isn't captured. 2721 unsigned ArgNo = CS.getArgumentNo(UI); 2722 if (CS.onlyReadsMemory() && 2723 (CS.getInstruction()->use_empty() || 2724 CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))) 2725 continue; 2726 2727 // If this is being passed as a byval argument, the caller is making a 2728 // copy, so it is only a read of the alloca. 2729 if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal)) 2730 continue; 2731 } 2732 2733 // Lifetime intrinsics can be handled by the caller. 2734 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 2735 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 2736 II->getIntrinsicID() == Intrinsic::lifetime_end) { 2737 assert(II->use_empty() && "Lifetime markers have no result to use!"); 2738 LifetimeMarkers.push_back(II); 2739 continue; 2740 } 2741 } 2742 2743 // If this is isn't our memcpy/memmove, reject it as something we can't 2744 // handle. 2745 MemTransferInst *MI = dyn_cast<MemTransferInst>(U); 2746 if (MI == 0) 2747 return false; 2748 2749 // If the transfer is using the alloca as a source of the transfer, then 2750 // ignore it since it is a load (unless the transfer is volatile). 2751 if (UI.getOperandNo() == 1) { 2752 if (MI->isVolatile()) return false; 2753 continue; 2754 } 2755 2756 // If we already have seen a copy, reject the second one. 2757 if (TheCopy) return false; 2758 2759 // If the pointer has been offset from the start of the alloca, we can't 2760 // safely handle this. 2761 if (isOffset) return false; 2762 2763 // If the memintrinsic isn't using the alloca as the dest, reject it. 2764 if (UI.getOperandNo() != 0) return false; 2765 2766 // If the source of the memcpy/move is not a constant global, reject it. 2767 if (!PointsToConstantGlobal(MI->getSource())) 2768 return false; 2769 2770 // Otherwise, the transform is safe. Remember the copy instruction. 2771 TheCopy = MI; 2772 } 2773 return true; 2774} 2775 2776/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 2777/// modified by a copy from a constant global. If we can prove this, we can 2778/// replace any uses of the alloca with uses of the global directly. 2779MemTransferInst * 2780SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, 2781 SmallVector<Instruction*, 4> &ToDelete) { 2782 MemTransferInst *TheCopy = 0; 2783 if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) 2784 return TheCopy; 2785 return 0; 2786} 2787