ScalarReplAggregates.cpp revision 3d730f7453af5ecb1be4b8c5d48843ad5637de37
1//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This transformation implements the well known scalar replacement of 11// aggregates transformation. This xform breaks up alloca instructions of 12// aggregate type (structure or array) into individual alloca instructions for 13// each member (if possible). Then, if possible, it transforms the individual 14// alloca instructions into nice clean scalar SSA form. 15// 16// This combines a simple SRoA algorithm with the Mem2Reg algorithm because 17// often interact, especially for C++ programs. As such, iterating between 18// SRoA, then Mem2Reg until we run out of things to promote works well. 19// 20//===----------------------------------------------------------------------===// 21 22#define DEBUG_TYPE "scalarrepl" 23#include "llvm/Transforms/Scalar.h" 24#include "llvm/Constants.h" 25#include "llvm/DerivedTypes.h" 26#include "llvm/Function.h" 27#include "llvm/GlobalVariable.h" 28#include "llvm/Instructions.h" 29#include "llvm/IntrinsicInst.h" 30#include "llvm/Pass.h" 31#include "llvm/Analysis/Dominators.h" 32#include "llvm/Target/TargetData.h" 33#include "llvm/Transforms/Utils/PromoteMemToReg.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/GetElementPtrTypeIterator.h" 36#include "llvm/Support/MathExtras.h" 37#include "llvm/Support/Compiler.h" 38#include "llvm/ADT/SmallVector.h" 39#include "llvm/ADT/Statistic.h" 40#include "llvm/ADT/StringExtras.h" 41using namespace llvm; 42 43STATISTIC(NumReplaced, "Number of allocas broken up"); 44STATISTIC(NumPromoted, "Number of allocas promoted"); 45STATISTIC(NumConverted, "Number of aggregates converted to scalar"); 46STATISTIC(NumGlobals, "Number of allocas copied from constant global"); 47 48namespace { 49 struct VISIBILITY_HIDDEN SROA : public FunctionPass { 50 static char ID; // Pass identification, replacement for typeid 51 explicit SROA(signed T = -1) : FunctionPass(&ID) { 52 if (T == -1) 53 SRThreshold = 128; 54 else 55 SRThreshold = T; 56 } 57 58 bool runOnFunction(Function &F); 59 60 bool performScalarRepl(Function &F); 61 bool performPromotion(Function &F); 62 63 // getAnalysisUsage - This pass does not require any passes, but we know it 64 // will not alter the CFG, so say so. 65 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 66 AU.addRequired<DominatorTree>(); 67 AU.addRequired<DominanceFrontier>(); 68 AU.addRequired<TargetData>(); 69 AU.setPreservesCFG(); 70 } 71 72 private: 73 TargetData *TD; 74 75 /// AllocaInfo - When analyzing uses of an alloca instruction, this captures 76 /// information about the uses. All these fields are initialized to false 77 /// and set to true when something is learned. 78 struct AllocaInfo { 79 /// isUnsafe - This is set to true if the alloca cannot be SROA'd. 80 bool isUnsafe : 1; 81 82 /// needsCanon - This is set to true if there is some use of the alloca 83 /// that requires canonicalization. 84 bool needsCanon : 1; 85 86 /// isMemCpySrc - This is true if this aggregate is memcpy'd from. 87 bool isMemCpySrc : 1; 88 89 /// isMemCpyDst - This is true if this aggregate is memcpy'd into. 90 bool isMemCpyDst : 1; 91 92 AllocaInfo() 93 : isUnsafe(false), needsCanon(false), 94 isMemCpySrc(false), isMemCpyDst(false) {} 95 }; 96 97 unsigned SRThreshold; 98 99 void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } 100 101 int isSafeAllocaToScalarRepl(AllocationInst *AI); 102 103 void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, 104 AllocaInfo &Info); 105 void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, 106 AllocaInfo &Info); 107 void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, 108 unsigned OpNo, AllocaInfo &Info); 109 void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI, 110 AllocaInfo &Info); 111 112 void DoScalarReplacement(AllocationInst *AI, 113 std::vector<AllocationInst*> &WorkList); 114 void CanonicalizeAllocaUsers(AllocationInst *AI); 115 AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base); 116 117 void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, 118 SmallVector<AllocaInst*, 32> &NewElts); 119 120 void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, 121 AllocationInst *AI, 122 SmallVector<AllocaInst*, 32> &NewElts); 123 void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI, 124 SmallVector<AllocaInst*, 32> &NewElts); 125 void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, 126 SmallVector<AllocaInst*, 32> &NewElts); 127 128 bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, 129 uint64_t Offset, unsigned AllocaSize); 130 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); 131 Value *ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI, 132 uint64_t Offset); 133 Value *ConvertUsesOfStoreToScalar(Value *StoredVal, AllocaInst *NewAI, 134 uint64_t Offset, Instruction *InsertPt); 135 static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI); 136 }; 137} 138 139char SROA::ID = 0; 140static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); 141 142// Public interface to the ScalarReplAggregates pass 143FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { 144 return new SROA(Threshold); 145} 146 147 148bool SROA::runOnFunction(Function &F) { 149 TD = &getAnalysis<TargetData>(); 150 151 bool Changed = performPromotion(F); 152 while (1) { 153 bool LocalChange = performScalarRepl(F); 154 if (!LocalChange) break; // No need to repromote if no scalarrepl 155 Changed = true; 156 LocalChange = performPromotion(F); 157 if (!LocalChange) break; // No need to re-scalarrepl if no promotion 158 } 159 160 return Changed; 161} 162 163 164bool SROA::performPromotion(Function &F) { 165 std::vector<AllocaInst*> Allocas; 166 DominatorTree &DT = getAnalysis<DominatorTree>(); 167 DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); 168 169 BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 170 171 bool Changed = false; 172 173 while (1) { 174 Allocas.clear(); 175 176 // Find allocas that are safe to promote, by looking at all instructions in 177 // the entry node 178 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 179 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 180 if (isAllocaPromotable(AI)) 181 Allocas.push_back(AI); 182 183 if (Allocas.empty()) break; 184 185 PromoteMemToReg(Allocas, DT, DF); 186 NumPromoted += Allocas.size(); 187 Changed = true; 188 } 189 190 return Changed; 191} 192 193/// getNumSAElements - Return the number of elements in the specific struct or 194/// array. 195static uint64_t getNumSAElements(const Type *T) { 196 if (const StructType *ST = dyn_cast<StructType>(T)) 197 return ST->getNumElements(); 198 return cast<ArrayType>(T)->getNumElements(); 199} 200 201// performScalarRepl - This algorithm is a simple worklist driven algorithm, 202// which runs on all of the malloc/alloca instructions in the function, removing 203// them if they are only used by getelementptr instructions. 204// 205bool SROA::performScalarRepl(Function &F) { 206 std::vector<AllocationInst*> WorkList; 207 208 // Scan the entry basic block, adding any alloca's and mallocs to the worklist 209 BasicBlock &BB = F.getEntryBlock(); 210 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 211 if (AllocationInst *A = dyn_cast<AllocationInst>(I)) 212 WorkList.push_back(A); 213 214 // Process the worklist 215 bool Changed = false; 216 while (!WorkList.empty()) { 217 AllocationInst *AI = WorkList.back(); 218 WorkList.pop_back(); 219 220 // Handle dead allocas trivially. These can be formed by SROA'ing arrays 221 // with unused elements. 222 if (AI->use_empty()) { 223 AI->eraseFromParent(); 224 continue; 225 } 226 227 // If this alloca is impossible for us to promote, reject it early. 228 if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) 229 continue; 230 231 // Check to see if this allocation is only modified by a memcpy/memmove from 232 // a constant global. If this is the case, we can change all users to use 233 // the constant global instead. This is commonly produced by the CFE by 234 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 235 // is only subsequently read. 236 if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { 237 DOUT << "Found alloca equal to global: " << *AI; 238 DOUT << " memcpy = " << *TheCopy; 239 Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); 240 AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); 241 TheCopy->eraseFromParent(); // Don't mutate the global. 242 AI->eraseFromParent(); 243 ++NumGlobals; 244 Changed = true; 245 continue; 246 } 247 248 // Check to see if we can perform the core SROA transformation. We cannot 249 // transform the allocation instruction if it is an array allocation 250 // (allocations OF arrays are ok though), and an allocation of a scalar 251 // value cannot be decomposed at all. 252 uint64_t AllocaSize = TD->getTypePaddedSize(AI->getAllocatedType()); 253 254 if ((isa<StructType>(AI->getAllocatedType()) || 255 isa<ArrayType>(AI->getAllocatedType())) && 256 // Do not promote any struct whose size is too big. 257 AllocaSize < SRThreshold && 258 // Do not promote any struct into more than "32" separate vars. 259 getNumSAElements(AI->getAllocatedType()) < SRThreshold/4) { 260 // Check that all of the users of the allocation are capable of being 261 // transformed. 262 switch (isSafeAllocaToScalarRepl(AI)) { 263 default: assert(0 && "Unexpected value!"); 264 case 0: // Not safe to scalar replace. 265 break; 266 case 1: // Safe, but requires cleanup/canonicalizations first 267 CanonicalizeAllocaUsers(AI); 268 // FALL THROUGH. 269 case 3: // Safe to scalar replace. 270 DoScalarReplacement(AI, WorkList); 271 Changed = true; 272 continue; 273 } 274 } 275 276 // If we can turn this aggregate value (potentially with casts) into a 277 // simple scalar value that can be mem2reg'd into a register value. 278 // IsNotTrivial tracks whether this is something that mem2reg could have 279 // promoted itself. If so, we don't want to transform it needlessly. Note 280 // that we can't just check based on the type: the alloca may be of an i32 281 // but that has pointer arithmetic to set byte 3 of it or something. 282 bool IsNotTrivial = false; 283 const Type *VectorTy = 0; 284 if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, 285 0, unsigned(AllocaSize)) && IsNotTrivial) { 286 AllocaInst *NewAI; 287 if (VectorTy && isa<VectorType>(VectorTy)) { 288 DOUT << "CONVERT TO VECTOR: " << *AI << " TYPE = " << *VectorTy <<"\n"; 289 290 // Create and insert the vector alloca. 291 NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); 292 ConvertUsesToScalar(AI, NewAI, 0); 293 } else { 294 DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"; 295 296 // Create and insert the integer alloca. 297 const Type *NewTy = IntegerType::get(AllocaSize*8); 298 NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); 299 ConvertUsesToScalar(AI, NewAI, 0); 300 } 301 NewAI->takeName(AI); 302 AI->eraseFromParent(); 303 ++NumConverted; 304 Changed = true; 305 continue; 306 } 307 308 // Otherwise, couldn't process this alloca. 309 } 310 311 return Changed; 312} 313 314/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 315/// predicate, do SROA now. 316void SROA::DoScalarReplacement(AllocationInst *AI, 317 std::vector<AllocationInst*> &WorkList) { 318 DOUT << "Found inst to SROA: " << *AI; 319 SmallVector<AllocaInst*, 32> ElementAllocas; 320 if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 321 ElementAllocas.reserve(ST->getNumContainedTypes()); 322 for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 323 AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 324 AI->getAlignment(), 325 AI->getName() + "." + utostr(i), AI); 326 ElementAllocas.push_back(NA); 327 WorkList.push_back(NA); // Add to worklist for recursive processing 328 } 329 } else { 330 const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 331 ElementAllocas.reserve(AT->getNumElements()); 332 const Type *ElTy = AT->getElementType(); 333 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 334 AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), 335 AI->getName() + "." + utostr(i), AI); 336 ElementAllocas.push_back(NA); 337 WorkList.push_back(NA); // Add to worklist for recursive processing 338 } 339 } 340 341 // Now that we have created the alloca instructions that we want to use, 342 // expand the getelementptr instructions to use them. 343 // 344 while (!AI->use_empty()) { 345 Instruction *User = cast<Instruction>(AI->use_back()); 346 if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) { 347 RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); 348 BCInst->eraseFromParent(); 349 continue; 350 } 351 352 // Replace: 353 // %res = load { i32, i32 }* %alloc 354 // with: 355 // %load.0 = load i32* %alloc.0 356 // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 357 // %load.1 = load i32* %alloc.1 358 // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 359 // (Also works for arrays instead of structs) 360 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 361 Value *Insert = UndefValue::get(LI->getType()); 362 for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { 363 Value *Load = new LoadInst(ElementAllocas[i], "load", LI); 364 Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); 365 } 366 LI->replaceAllUsesWith(Insert); 367 LI->eraseFromParent(); 368 continue; 369 } 370 371 // Replace: 372 // store { i32, i32 } %val, { i32, i32 }* %alloc 373 // with: 374 // %val.0 = extractvalue { i32, i32 } %val, 0 375 // store i32 %val.0, i32* %alloc.0 376 // %val.1 = extractvalue { i32, i32 } %val, 1 377 // store i32 %val.1, i32* %alloc.1 378 // (Also works for arrays instead of structs) 379 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 380 Value *Val = SI->getOperand(0); 381 for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { 382 Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); 383 new StoreInst(Extract, ElementAllocas[i], SI); 384 } 385 SI->eraseFromParent(); 386 continue; 387 } 388 389 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); 390 // We now know that the GEP is of the form: GEP <ptr>, 0, <cst> 391 unsigned Idx = 392 (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 393 394 assert(Idx < ElementAllocas.size() && "Index out of range?"); 395 AllocaInst *AllocaToUse = ElementAllocas[Idx]; 396 397 Value *RepValue; 398 if (GEPI->getNumOperands() == 3) { 399 // Do not insert a new getelementptr instruction with zero indices, only 400 // to have it optimized out later. 401 RepValue = AllocaToUse; 402 } else { 403 // We are indexing deeply into the structure, so we still need a 404 // getelement ptr instruction to finish the indexing. This may be 405 // expanded itself once the worklist is rerun. 406 // 407 SmallVector<Value*, 8> NewArgs; 408 NewArgs.push_back(Constant::getNullValue(Type::Int32Ty)); 409 NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); 410 RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), 411 NewArgs.end(), "", GEPI); 412 RepValue->takeName(GEPI); 413 } 414 415 // If this GEP is to the start of the aggregate, check for memcpys. 416 if (Idx == 0 && GEPI->hasAllZeroIndices()) 417 RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); 418 419 // Move all of the users over to the new GEP. 420 GEPI->replaceAllUsesWith(RepValue); 421 // Delete the old GEP 422 GEPI->eraseFromParent(); 423 } 424 425 // Finally, delete the Alloca instruction 426 AI->eraseFromParent(); 427 NumReplaced++; 428} 429 430 431/// isSafeElementUse - Check to see if this use is an allowed use for a 432/// getelementptr instruction of an array aggregate allocation. isFirstElt 433/// indicates whether Ptr is known to the start of the aggregate. 434/// 435void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, 436 AllocaInfo &Info) { 437 for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 438 I != E; ++I) { 439 Instruction *User = cast<Instruction>(*I); 440 switch (User->getOpcode()) { 441 case Instruction::Load: break; 442 case Instruction::Store: 443 // Store is ok if storing INTO the pointer, not storing the pointer 444 if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); 445 break; 446 case Instruction::GetElementPtr: { 447 GetElementPtrInst *GEP = cast<GetElementPtrInst>(User); 448 bool AreAllZeroIndices = isFirstElt; 449 if (GEP->getNumOperands() > 1) { 450 if (!isa<ConstantInt>(GEP->getOperand(1)) || 451 !cast<ConstantInt>(GEP->getOperand(1))->isZero()) 452 // Using pointer arithmetic to navigate the array. 453 return MarkUnsafe(Info); 454 455 if (AreAllZeroIndices) 456 AreAllZeroIndices = GEP->hasAllZeroIndices(); 457 } 458 isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); 459 if (Info.isUnsafe) return; 460 break; 461 } 462 case Instruction::BitCast: 463 if (isFirstElt) { 464 isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info); 465 if (Info.isUnsafe) return; 466 break; 467 } 468 DOUT << " Transformation preventing inst: " << *User; 469 return MarkUnsafe(Info); 470 case Instruction::Call: 471 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 472 if (isFirstElt) { 473 isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); 474 if (Info.isUnsafe) return; 475 break; 476 } 477 } 478 DOUT << " Transformation preventing inst: " << *User; 479 return MarkUnsafe(Info); 480 default: 481 DOUT << " Transformation preventing inst: " << *User; 482 return MarkUnsafe(Info); 483 } 484 } 485 return; // All users look ok :) 486} 487 488/// AllUsersAreLoads - Return true if all users of this value are loads. 489static bool AllUsersAreLoads(Value *Ptr) { 490 for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 491 I != E; ++I) 492 if (cast<Instruction>(*I)->getOpcode() != Instruction::Load) 493 return false; 494 return true; 495} 496 497/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an 498/// aggregate allocation. 499/// 500void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, 501 AllocaInfo &Info) { 502 if (BitCastInst *C = dyn_cast<BitCastInst>(User)) 503 return isSafeUseOfBitCastedAllocation(C, AI, Info); 504 505 if (LoadInst *LI = dyn_cast<LoadInst>(User)) 506 if (!LI->isVolatile()) 507 return;// Loads (returning a first class aggregrate) are always rewritable 508 509 if (StoreInst *SI = dyn_cast<StoreInst>(User)) 510 if (!SI->isVolatile() && SI->getOperand(0) != AI) 511 return;// Store is ok if storing INTO the pointer, not storing the pointer 512 513 GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User); 514 if (GEPI == 0) 515 return MarkUnsafe(Info); 516 517 gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); 518 519 // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>". 520 if (I == E || 521 I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { 522 return MarkUnsafe(Info); 523 } 524 525 ++I; 526 if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? 527 528 bool IsAllZeroIndices = true; 529 530 // If the first index is a non-constant index into an array, see if we can 531 // handle it as a special case. 532 if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 533 if (!isa<ConstantInt>(I.getOperand())) { 534 IsAllZeroIndices = 0; 535 uint64_t NumElements = AT->getNumElements(); 536 537 // If this is an array index and the index is not constant, we cannot 538 // promote... that is unless the array has exactly one or two elements in 539 // it, in which case we CAN promote it, but we have to canonicalize this 540 // out if this is the only problem. 541 if ((NumElements == 1 || NumElements == 2) && 542 AllUsersAreLoads(GEPI)) { 543 Info.needsCanon = true; 544 return; // Canonicalization required! 545 } 546 return MarkUnsafe(Info); 547 } 548 } 549 550 // Walk through the GEP type indices, checking the types that this indexes 551 // into. 552 for (; I != E; ++I) { 553 // Ignore struct elements, no extra checking needed for these. 554 if (isa<StructType>(*I)) 555 continue; 556 557 ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand()); 558 if (!IdxVal) return MarkUnsafe(Info); 559 560 // Are all indices still zero? 561 IsAllZeroIndices &= IdxVal->isZero(); 562 563 if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 564 // This GEP indexes an array. Verify that this is an in-range constant 565 // integer. Specifically, consider A[0][i]. We cannot know that the user 566 // isn't doing invalid things like allowing i to index an out-of-range 567 // subscript that accesses A[1]. Because of this, we have to reject SROA 568 // of any accesses into structs where any of the components are variables. 569 if (IdxVal->getZExtValue() >= AT->getNumElements()) 570 return MarkUnsafe(Info); 571 } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) { 572 if (IdxVal->getZExtValue() >= VT->getNumElements()) 573 return MarkUnsafe(Info); 574 } 575 } 576 577 // If there are any non-simple uses of this getelementptr, make sure to reject 578 // them. 579 return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); 580} 581 582/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory 583/// intrinsic can be promoted by SROA. At this point, we know that the operand 584/// of the memintrinsic is a pointer to the beginning of the allocation. 585void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, 586 unsigned OpNo, AllocaInfo &Info) { 587 // If not constant length, give up. 588 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 589 if (!Length) return MarkUnsafe(Info); 590 591 // If not the whole aggregate, give up. 592 if (Length->getZExtValue() != 593 TD->getTypePaddedSize(AI->getType()->getElementType())) 594 return MarkUnsafe(Info); 595 596 // We only know about memcpy/memset/memmove. 597 if (!isa<MemCpyInst>(MI) && !isa<MemSetInst>(MI) && !isa<MemMoveInst>(MI)) 598 return MarkUnsafe(Info); 599 600 // Otherwise, we can transform it. Determine whether this is a memcpy/set 601 // into or out of the aggregate. 602 if (OpNo == 1) 603 Info.isMemCpyDst = true; 604 else { 605 assert(OpNo == 2); 606 Info.isMemCpySrc = true; 607 } 608} 609 610/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast 611/// are 612void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI, 613 AllocaInfo &Info) { 614 for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); 615 UI != E; ++UI) { 616 if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) { 617 isSafeUseOfBitCastedAllocation(BCU, AI, Info); 618 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { 619 isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); 620 } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 621 if (SI->isVolatile()) 622 return MarkUnsafe(Info); 623 624 // If storing the entire alloca in one chunk through a bitcasted pointer 625 // to integer, we can transform it. This happens (for example) when you 626 // cast a {i32,i32}* to i64* and store through it. This is similar to the 627 // memcpy case and occurs in various "byval" cases and emulated memcpys. 628 if (isa<IntegerType>(SI->getOperand(0)->getType()) && 629 TD->getTypePaddedSize(SI->getOperand(0)->getType()) == 630 TD->getTypePaddedSize(AI->getType()->getElementType())) { 631 Info.isMemCpyDst = true; 632 continue; 633 } 634 return MarkUnsafe(Info); 635 } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) { 636 if (LI->isVolatile()) 637 return MarkUnsafe(Info); 638 639 // If loading the entire alloca in one chunk through a bitcasted pointer 640 // to integer, we can transform it. This happens (for example) when you 641 // cast a {i32,i32}* to i64* and load through it. This is similar to the 642 // memcpy case and occurs in various "byval" cases and emulated memcpys. 643 if (isa<IntegerType>(LI->getType()) && 644 TD->getTypePaddedSize(LI->getType()) == 645 TD->getTypePaddedSize(AI->getType()->getElementType())) { 646 Info.isMemCpySrc = true; 647 continue; 648 } 649 return MarkUnsafe(Info); 650 } else { 651 return MarkUnsafe(Info); 652 } 653 if (Info.isUnsafe) return; 654 } 655} 656 657/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes 658/// to its first element. Transform users of the cast to use the new values 659/// instead. 660void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, 661 SmallVector<AllocaInst*, 32> &NewElts) { 662 Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); 663 while (UI != UE) { 664 Instruction *User = cast<Instruction>(*UI++); 665 if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) { 666 RewriteBitCastUserOfAlloca(BCU, AI, NewElts); 667 if (BCU->use_empty()) BCU->eraseFromParent(); 668 continue; 669 } 670 671 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 672 // This must be memcpy/memmove/memset of the entire aggregate. 673 // Split into one per element. 674 RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); 675 continue; 676 } 677 678 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 679 // If this is a store of the entire alloca from an integer, rewrite it. 680 RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); 681 continue; 682 } 683 684 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 685 // If this is a load of the entire alloca to an integer, rewrite it. 686 RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); 687 continue; 688 } 689 690 // Otherwise it must be some other user of a gep of the first pointer. Just 691 // leave these alone. 692 continue; 693 } 694} 695 696/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. 697/// Rewrite it to copy or set the elements of the scalarized memory. 698void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, 699 AllocationInst *AI, 700 SmallVector<AllocaInst*, 32> &NewElts) { 701 702 // If this is a memcpy/memmove, construct the other pointer as the 703 // appropriate type. 704 Value *OtherPtr = 0; 705 if (MemCpyInst *MCI = dyn_cast<MemCpyInst>(MI)) { 706 if (BCInst == MCI->getRawDest()) 707 OtherPtr = MCI->getRawSource(); 708 else { 709 assert(BCInst == MCI->getRawSource()); 710 OtherPtr = MCI->getRawDest(); 711 } 712 } else if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { 713 if (BCInst == MMI->getRawDest()) 714 OtherPtr = MMI->getRawSource(); 715 else { 716 assert(BCInst == MMI->getRawSource()); 717 OtherPtr = MMI->getRawDest(); 718 } 719 } 720 721 // If there is an other pointer, we want to convert it to the same pointer 722 // type as AI has, so we can GEP through it safely. 723 if (OtherPtr) { 724 // It is likely that OtherPtr is a bitcast, if so, remove it. 725 if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) 726 OtherPtr = BC->getOperand(0); 727 // All zero GEPs are effectively bitcasts. 728 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) 729 if (GEP->hasAllZeroIndices()) 730 OtherPtr = GEP->getOperand(0); 731 732 if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) 733 if (BCE->getOpcode() == Instruction::BitCast) 734 OtherPtr = BCE->getOperand(0); 735 736 // If the pointer is not the right type, insert a bitcast to the right 737 // type. 738 if (OtherPtr->getType() != AI->getType()) 739 OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), 740 MI); 741 } 742 743 // Process each element of the aggregate. 744 Value *TheFn = MI->getOperand(0); 745 const Type *BytePtrTy = MI->getRawDest()->getType(); 746 bool SROADest = MI->getRawDest() == BCInst; 747 748 Constant *Zero = Constant::getNullValue(Type::Int32Ty); 749 750 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 751 // If this is a memcpy/memmove, emit a GEP of the other element address. 752 Value *OtherElt = 0; 753 if (OtherPtr) { 754 Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) }; 755 OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, 756 OtherPtr->getNameStr()+"."+utostr(i), 757 MI); 758 } 759 760 Value *EltPtr = NewElts[i]; 761 const Type *EltTy =cast<PointerType>(EltPtr->getType())->getElementType(); 762 763 // If we got down to a scalar, insert a load or store as appropriate. 764 if (EltTy->isSingleValueType()) { 765 if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) { 766 Value *Elt = new LoadInst(SROADest ? OtherElt : EltPtr, "tmp", 767 MI); 768 new StoreInst(Elt, SROADest ? EltPtr : OtherElt, MI); 769 continue; 770 } 771 assert(isa<MemSetInst>(MI)); 772 773 // If the stored element is zero (common case), just store a null 774 // constant. 775 Constant *StoreVal; 776 if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { 777 if (CI->isZero()) { 778 StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 779 } else { 780 // If EltTy is a vector type, get the element type. 781 const Type *ValTy = EltTy; 782 if (const VectorType *VTy = dyn_cast<VectorType>(ValTy)) 783 ValTy = VTy->getElementType(); 784 785 // Construct an integer with the right value. 786 unsigned EltSize = TD->getTypeSizeInBits(ValTy); 787 APInt OneVal(EltSize, CI->getZExtValue()); 788 APInt TotalVal(OneVal); 789 // Set each byte. 790 for (unsigned i = 0; 8*i < EltSize; ++i) { 791 TotalVal = TotalVal.shl(8); 792 TotalVal |= OneVal; 793 } 794 795 // Convert the integer value to the appropriate type. 796 StoreVal = ConstantInt::get(TotalVal); 797 if (isa<PointerType>(ValTy)) 798 StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 799 else if (ValTy->isFloatingPoint()) 800 StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 801 assert(StoreVal->getType() == ValTy && "Type mismatch!"); 802 803 // If the requested value was a vector constant, create it. 804 if (EltTy != ValTy) { 805 unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); 806 SmallVector<Constant*, 16> Elts(NumElts, StoreVal); 807 StoreVal = ConstantVector::get(&Elts[0], NumElts); 808 } 809 } 810 new StoreInst(StoreVal, EltPtr, MI); 811 continue; 812 } 813 // Otherwise, if we're storing a byte variable, use a memset call for 814 // this element. 815 } 816 817 // Cast the element pointer to BytePtrTy. 818 if (EltPtr->getType() != BytePtrTy) 819 EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI); 820 821 // Cast the other pointer (if we have one) to BytePtrTy. 822 if (OtherElt && OtherElt->getType() != BytePtrTy) 823 OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), 824 MI); 825 826 unsigned EltSize = TD->getTypePaddedSize(EltTy); 827 828 // Finally, insert the meminst for this element. 829 if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) { 830 Value *Ops[] = { 831 SROADest ? EltPtr : OtherElt, // Dest ptr 832 SROADest ? OtherElt : EltPtr, // Src ptr 833 ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 834 Zero // Align 835 }; 836 CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 837 } else { 838 assert(isa<MemSetInst>(MI)); 839 Value *Ops[] = { 840 EltPtr, MI->getOperand(2), // Dest, Value, 841 ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 842 Zero // Align 843 }; 844 CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 845 } 846 } 847 MI->eraseFromParent(); 848} 849 850/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that 851/// overwrites the entire allocation. Extract out the pieces of the stored 852/// integer and store them individually. 853void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, 854 AllocationInst *AI, 855 SmallVector<AllocaInst*, 32> &NewElts){ 856 // Extract each element out of the integer according to its structure offset 857 // and store the element value to the individual alloca. 858 Value *SrcVal = SI->getOperand(0); 859 const Type *AllocaEltTy = AI->getType()->getElementType(); 860 uint64_t AllocaSizeBits = TD->getTypePaddedSizeInBits(AllocaEltTy); 861 862 // If this isn't a store of an integer to the whole alloca, it may be a store 863 // to the first element. Just ignore the store in this case and normal SROA 864 // will handle it. 865 if (!isa<IntegerType>(SrcVal->getType()) || 866 TD->getTypePaddedSizeInBits(SrcVal->getType()) != AllocaSizeBits) 867 return; 868 869 DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI; 870 871 // There are two forms here: AI could be an array or struct. Both cases 872 // have different ways to compute the element offset. 873 if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 874 const StructLayout *Layout = TD->getStructLayout(EltSTy); 875 876 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 877 // Get the number of bits to shift SrcVal to get the value. 878 const Type *FieldTy = EltSTy->getElementType(i); 879 uint64_t Shift = Layout->getElementOffsetInBits(i); 880 881 if (TD->isBigEndian()) 882 Shift = AllocaSizeBits-Shift-TD->getTypePaddedSizeInBits(FieldTy); 883 884 Value *EltVal = SrcVal; 885 if (Shift) { 886 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 887 EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 888 "sroa.store.elt", SI); 889 } 890 891 // Truncate down to an integer of the right size. 892 uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 893 894 // Ignore zero sized fields like {}, they obviously contain no data. 895 if (FieldSizeBits == 0) continue; 896 897 if (FieldSizeBits != AllocaSizeBits) 898 EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI); 899 Value *DestField = NewElts[i]; 900 if (EltVal->getType() == FieldTy) { 901 // Storing to an integer field of this size, just do it. 902 } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) { 903 // Bitcast to the right element type (for fp/vector values). 904 EltVal = new BitCastInst(EltVal, FieldTy, "", SI); 905 } else { 906 // Otherwise, bitcast the dest pointer (for aggregates). 907 DestField = new BitCastInst(DestField, 908 PointerType::getUnqual(EltVal->getType()), 909 "", SI); 910 } 911 new StoreInst(EltVal, DestField, SI); 912 } 913 914 } else { 915 const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); 916 const Type *ArrayEltTy = ATy->getElementType(); 917 uint64_t ElementOffset = TD->getTypePaddedSizeInBits(ArrayEltTy); 918 uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); 919 920 uint64_t Shift; 921 922 if (TD->isBigEndian()) 923 Shift = AllocaSizeBits-ElementOffset; 924 else 925 Shift = 0; 926 927 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 928 // Ignore zero sized fields like {}, they obviously contain no data. 929 if (ElementSizeBits == 0) continue; 930 931 Value *EltVal = SrcVal; 932 if (Shift) { 933 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 934 EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 935 "sroa.store.elt", SI); 936 } 937 938 // Truncate down to an integer of the right size. 939 if (ElementSizeBits != AllocaSizeBits) 940 EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI); 941 Value *DestField = NewElts[i]; 942 if (EltVal->getType() == ArrayEltTy) { 943 // Storing to an integer field of this size, just do it. 944 } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) { 945 // Bitcast to the right element type (for fp/vector values). 946 EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); 947 } else { 948 // Otherwise, bitcast the dest pointer (for aggregates). 949 DestField = new BitCastInst(DestField, 950 PointerType::getUnqual(EltVal->getType()), 951 "", SI); 952 } 953 new StoreInst(EltVal, DestField, SI); 954 955 if (TD->isBigEndian()) 956 Shift -= ElementOffset; 957 else 958 Shift += ElementOffset; 959 } 960 } 961 962 SI->eraseFromParent(); 963} 964 965/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to 966/// an integer. Load the individual pieces to form the aggregate value. 967void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, 968 SmallVector<AllocaInst*, 32> &NewElts) { 969 // Extract each element out of the NewElts according to its structure offset 970 // and form the result value. 971 const Type *AllocaEltTy = AI->getType()->getElementType(); 972 uint64_t AllocaSizeBits = TD->getTypePaddedSizeInBits(AllocaEltTy); 973 974 // If this isn't a load of the whole alloca to an integer, it may be a load 975 // of the first element. Just ignore the load in this case and normal SROA 976 // will handle it. 977 if (!isa<IntegerType>(LI->getType()) || 978 TD->getTypePaddedSizeInBits(LI->getType()) != AllocaSizeBits) 979 return; 980 981 DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI; 982 983 // There are two forms here: AI could be an array or struct. Both cases 984 // have different ways to compute the element offset. 985 const StructLayout *Layout = 0; 986 uint64_t ArrayEltBitOffset = 0; 987 if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 988 Layout = TD->getStructLayout(EltSTy); 989 } else { 990 const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); 991 ArrayEltBitOffset = TD->getTypePaddedSizeInBits(ArrayEltTy); 992 } 993 994 Value *ResultVal = Constant::getNullValue(LI->getType()); 995 996 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 997 // Load the value from the alloca. If the NewElt is an aggregate, cast 998 // the pointer to an integer of the same size before doing the load. 999 Value *SrcField = NewElts[i]; 1000 const Type *FieldTy = 1001 cast<PointerType>(SrcField->getType())->getElementType(); 1002 uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 1003 1004 // Ignore zero sized fields like {}, they obviously contain no data. 1005 if (FieldSizeBits == 0) continue; 1006 1007 const IntegerType *FieldIntTy = IntegerType::get(FieldSizeBits); 1008 if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() && 1009 !isa<VectorType>(FieldTy)) 1010 SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy), 1011 "", LI); 1012 SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); 1013 1014 // If SrcField is a fp or vector of the right size but that isn't an 1015 // integer type, bitcast to an integer so we can shift it. 1016 if (SrcField->getType() != FieldIntTy) 1017 SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); 1018 1019 // Zero extend the field to be the same size as the final alloca so that 1020 // we can shift and insert it. 1021 if (SrcField->getType() != ResultVal->getType()) 1022 SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); 1023 1024 // Determine the number of bits to shift SrcField. 1025 uint64_t Shift; 1026 if (Layout) // Struct case. 1027 Shift = Layout->getElementOffsetInBits(i); 1028 else // Array case. 1029 Shift = i*ArrayEltBitOffset; 1030 1031 if (TD->isBigEndian()) 1032 Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); 1033 1034 if (Shift) { 1035 Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); 1036 SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); 1037 } 1038 1039 ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); 1040 } 1041 1042 LI->replaceAllUsesWith(ResultVal); 1043 LI->eraseFromParent(); 1044} 1045 1046 1047/// HasPadding - Return true if the specified type has any structure or 1048/// alignment padding, false otherwise. 1049static bool HasPadding(const Type *Ty, const TargetData &TD) { 1050 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 1051 const StructLayout *SL = TD.getStructLayout(STy); 1052 unsigned PrevFieldBitOffset = 0; 1053 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 1054 unsigned FieldBitOffset = SL->getElementOffsetInBits(i); 1055 1056 // Padding in sub-elements? 1057 if (HasPadding(STy->getElementType(i), TD)) 1058 return true; 1059 1060 // Check to see if there is any padding between this element and the 1061 // previous one. 1062 if (i) { 1063 unsigned PrevFieldEnd = 1064 PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); 1065 if (PrevFieldEnd < FieldBitOffset) 1066 return true; 1067 } 1068 1069 PrevFieldBitOffset = FieldBitOffset; 1070 } 1071 1072 // Check for tail padding. 1073 if (unsigned EltCount = STy->getNumElements()) { 1074 unsigned PrevFieldEnd = PrevFieldBitOffset + 1075 TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); 1076 if (PrevFieldEnd < SL->getSizeInBits()) 1077 return true; 1078 } 1079 1080 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 1081 return HasPadding(ATy->getElementType(), TD); 1082 } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1083 return HasPadding(VTy->getElementType(), TD); 1084 } 1085 return TD.getTypeSizeInBits(Ty) != TD.getTypePaddedSizeInBits(Ty); 1086} 1087 1088/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 1089/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 1090/// or 1 if safe after canonicalization has been performed. 1091/// 1092int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { 1093 // Loop over the use list of the alloca. We can only transform it if all of 1094 // the users are safe to transform. 1095 AllocaInfo Info; 1096 1097 for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); 1098 I != E; ++I) { 1099 isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info); 1100 if (Info.isUnsafe) { 1101 DOUT << "Cannot transform: " << *AI << " due to user: " << **I; 1102 return 0; 1103 } 1104 } 1105 1106 // Okay, we know all the users are promotable. If the aggregate is a memcpy 1107 // source and destination, we have to be careful. In particular, the memcpy 1108 // could be moving around elements that live in structure padding of the LLVM 1109 // types, but may actually be used. In these cases, we refuse to promote the 1110 // struct. 1111 if (Info.isMemCpySrc && Info.isMemCpyDst && 1112 HasPadding(AI->getType()->getElementType(), *TD)) 1113 return 0; 1114 1115 // If we require cleanup, return 1, otherwise return 3. 1116 return Info.needsCanon ? 1 : 3; 1117} 1118 1119/// CanonicalizeAllocaUsers - If SROA reported that it can promote the specified 1120/// allocation, but only if cleaned up, perform the cleanups required. 1121void SROA::CanonicalizeAllocaUsers(AllocationInst *AI) { 1122 // At this point, we know that the end result will be SROA'd and promoted, so 1123 // we can insert ugly code if required so long as sroa+mem2reg will clean it 1124 // up. 1125 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 1126 UI != E; ) { 1127 GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI++); 1128 if (!GEPI) continue; 1129 gep_type_iterator I = gep_type_begin(GEPI); 1130 ++I; 1131 1132 if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 1133 uint64_t NumElements = AT->getNumElements(); 1134 1135 if (!isa<ConstantInt>(I.getOperand())) { 1136 if (NumElements == 1) { 1137 GEPI->setOperand(2, Constant::getNullValue(Type::Int32Ty)); 1138 } else { 1139 assert(NumElements == 2 && "Unhandled case!"); 1140 // All users of the GEP must be loads. At each use of the GEP, insert 1141 // two loads of the appropriate indexed GEP and select between them. 1142 Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), 1143 Constant::getNullValue(I.getOperand()->getType()), 1144 "isone", GEPI); 1145 // Insert the new GEP instructions, which are properly indexed. 1146 SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end()); 1147 Indices[1] = Constant::getNullValue(Type::Int32Ty); 1148 Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), 1149 Indices.begin(), 1150 Indices.end(), 1151 GEPI->getName()+".0", GEPI); 1152 Indices[1] = ConstantInt::get(Type::Int32Ty, 1); 1153 Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), 1154 Indices.begin(), 1155 Indices.end(), 1156 GEPI->getName()+".1", GEPI); 1157 // Replace all loads of the variable index GEP with loads from both 1158 // indexes and a select. 1159 while (!GEPI->use_empty()) { 1160 LoadInst *LI = cast<LoadInst>(GEPI->use_back()); 1161 Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI); 1162 Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI); 1163 Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI); 1164 LI->replaceAllUsesWith(R); 1165 LI->eraseFromParent(); 1166 } 1167 GEPI->eraseFromParent(); 1168 } 1169 } 1170 } 1171 } 1172} 1173 1174/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at 1175/// the offset specified by Offset (which is specified in bytes). 1176/// 1177/// There are two cases we handle here: 1178/// 1) A union of vector types of the same size and potentially its elements. 1179/// Here we turn element accesses into insert/extract element operations. 1180/// This promotes a <4 x float> with a store of float to the third element 1181/// into a <4 x float> that uses insert element. 1182/// 2) A fully general blob of memory, which we turn into some (potentially 1183/// large) integer type with extract and insert operations where the loads 1184/// and stores would mutate the memory. 1185static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, 1186 unsigned AllocaSize, const TargetData &TD) { 1187 // If this could be contributing to a vector, analyze it. 1188 if (VecTy != Type::VoidTy) { // either null or a vector type. 1189 1190 // If the In type is a vector that is the same size as the alloca, see if it 1191 // matches the existing VecTy. 1192 if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { 1193 if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { 1194 // If we're storing/loading a vector of the right size, allow it as a 1195 // vector. If this the first vector we see, remember the type so that 1196 // we know the element size. 1197 if (VecTy == 0) 1198 VecTy = VInTy; 1199 return; 1200 } 1201 } else if (In == Type::FloatTy || In == Type::DoubleTy || 1202 (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 && 1203 isPowerOf2_32(In->getPrimitiveSizeInBits()))) { 1204 // If we're accessing something that could be an element of a vector, see 1205 // if the implied vector agrees with what we already have and if Offset is 1206 // compatible with it. 1207 unsigned EltSize = In->getPrimitiveSizeInBits()/8; 1208 if (Offset % EltSize == 0 && 1209 AllocaSize % EltSize == 0 && 1210 (VecTy == 0 || 1211 cast<VectorType>(VecTy)->getElementType() 1212 ->getPrimitiveSizeInBits()/8 == EltSize)) { 1213 if (VecTy == 0) 1214 VecTy = VectorType::get(In, AllocaSize/EltSize); 1215 return; 1216 } 1217 } 1218 } 1219 1220 // Otherwise, we have a case that we can't handle with an optimized vector 1221 // form. We can still turn this into a large integer. 1222 VecTy = Type::VoidTy; 1223} 1224 1225/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all 1226/// its accesses to use a to single vector type, return true, and set VecTy to 1227/// the new type. If we could convert the alloca into a single promotable 1228/// integer, return true but set VecTy to VoidTy. Further, if the use is not a 1229/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset 1230/// is the current offset from the base of the alloca being analyzed. 1231/// 1232bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, 1233 const Type *&VecTy, uint64_t Offset, 1234 unsigned AllocaSize) { 1235 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1236 Instruction *User = cast<Instruction>(*UI); 1237 1238 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1239 // Don't break volatile loads. 1240 if (LI->isVolatile()) 1241 return false; 1242 MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD); 1243 continue; 1244 } 1245 1246 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1247 // Storing the pointer, not into the value? 1248 if (SI->getOperand(0) == V || SI->isVolatile()) return 0; 1249 MergeInType(SI->getOperand(0)->getType(), Offset, VecTy, AllocaSize, *TD); 1250 continue; 1251 } 1252 1253 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 1254 if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, Offset, AllocaSize)) 1255 return false; 1256 IsNotTrivial = true; 1257 continue; 1258 } 1259 1260 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 1261 // If this is a GEP with a variable indices, we can't handle it. 1262 if (!GEP->hasAllConstantIndices()) 1263 return false; 1264 1265 // Compute the offset that this GEP adds to the pointer. 1266 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 1267 uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), 1268 &Indices[0], Indices.size()); 1269 // See if all uses can be converted. 1270 if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, Offset+GEPOffset, 1271 AllocaSize)) 1272 return false; 1273 IsNotTrivial = true; 1274 continue; 1275 } 1276 1277 // If this is a constant sized memset of a constant value (e.g. 0) we can 1278 // handle it. 1279 if (isa<MemSetInst>(User) && 1280 // Store of constant value. 1281 isa<ConstantInt>(User->getOperand(2)) && 1282 // Store with constant size. 1283 isa<ConstantInt>(User->getOperand(3))) { 1284 VecTy = Type::VoidTy; 1285 IsNotTrivial = true; 1286 continue; 1287 } 1288 1289 // Otherwise, we cannot handle this! 1290 return false; 1291 } 1292 1293 return true; 1294} 1295 1296 1297/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca 1298/// directly. This happens when we are converting an "integer union" to a 1299/// single integer scalar, or when we are converting a "vector union" to a 1300/// vector with insert/extractelement instructions. 1301/// 1302/// Offset is an offset from the original alloca, in bits that need to be 1303/// shifted to the right. By the end of this, there should be no uses of Ptr. 1304void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { 1305 while (!Ptr->use_empty()) { 1306 Instruction *User = cast<Instruction>(Ptr->use_back()); 1307 1308 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1309 LI->replaceAllUsesWith(ConvertUsesOfLoadToScalar(LI, NewAI, Offset)); 1310 LI->eraseFromParent(); 1311 continue; 1312 } 1313 1314 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1315 assert(SI->getOperand(0) != Ptr && "Consistency error!"); 1316 new StoreInst(ConvertUsesOfStoreToScalar(SI->getOperand(0), NewAI, 1317 Offset, SI), NewAI, SI); 1318 SI->eraseFromParent(); 1319 continue; 1320 } 1321 1322 if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 1323 ConvertUsesToScalar(CI, NewAI, Offset); 1324 CI->eraseFromParent(); 1325 continue; 1326 } 1327 1328 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 1329 // Compute the offset that this GEP adds to the pointer. 1330 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 1331 uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), 1332 &Indices[0], Indices.size()); 1333 ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); 1334 GEP->eraseFromParent(); 1335 continue; 1336 } 1337 1338 // If this is a constant sized memset of a constant value (e.g. 0) we can 1339 // transform it into a store of the expanded constant value. 1340 if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 1341 assert(MSI->getRawDest() == Ptr && "Consistency error!"); 1342 unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 1343 unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); 1344 1345 // Compute the value replicated the right number of times. 1346 APInt APVal(NumBytes*8, Val); 1347 1348 // Splat the value if non-zero. 1349 if (Val) 1350 for (unsigned i = 1; i != NumBytes; ++i) 1351 APVal |= APVal << 8; 1352 1353 new StoreInst(ConvertUsesOfStoreToScalar(ConstantInt::get(APVal), NewAI, 1354 Offset, MSI), NewAI, MSI); 1355 MSI->eraseFromParent(); 1356 continue; 1357 } 1358 1359 1360 assert(0 && "Unsupported operation!"); 1361 abort(); 1362 } 1363} 1364 1365/// ConvertUsesOfLoadToScalar - Convert all of the users of the specified load 1366/// to use the new alloca directly, returning the value that should replace the 1367/// load. This happens when we are converting an "integer union" to a single 1368/// integer scalar, or when we are converting a "vector union" to a vector with 1369/// insert/extractelement instructions. 1370/// 1371/// Offset is an offset from the original alloca, in bits that need to be 1372/// shifted to the right. By the end of this, there should be no uses of Ptr. 1373Value *SROA::ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI, 1374 uint64_t Offset) { 1375 // The load is a bit extract from NewAI shifted right by Offset bits. 1376 Value *NV = new LoadInst(NewAI, LI->getName(), LI); 1377 1378 // If the load is of the whole new alloca, no conversion is needed. 1379 if (NV->getType() == LI->getType() && Offset == 0) 1380 return NV; 1381 1382 // If the result alloca is a vector type, this is either an element 1383 // access or a bitcast to another vector type of the same size. 1384 if (const VectorType *VTy = dyn_cast<VectorType>(NV->getType())) { 1385 if (isa<VectorType>(LI->getType())) 1386 return new BitCastInst(NV, LI->getType(), LI->getName(), LI); 1387 1388 // Otherwise it must be an element access. 1389 unsigned Elt = 0; 1390 if (Offset) { 1391 unsigned EltSize = TD->getTypePaddedSizeInBits(VTy->getElementType()); 1392 Elt = Offset/EltSize; 1393 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 1394 } 1395 // Return the element extracted out of it. 1396 Value *V = new ExtractElementInst(NV, ConstantInt::get(Type::Int32Ty, Elt), 1397 "tmp", LI); 1398 if (V->getType() != LI->getType()) 1399 V = new BitCastInst(V, LI->getType(), "tmp", LI); 1400 return V; 1401 } 1402 1403 // Otherwise, this must be a union that was converted to an integer value. 1404 const IntegerType *NTy = cast<IntegerType>(NV->getType()); 1405 1406 // If this is a big-endian system and the load is narrower than the 1407 // full alloca type, we need to do a shift to get the right bits. 1408 int ShAmt = 0; 1409 if (TD->isBigEndian()) { 1410 // On big-endian machines, the lowest bit is stored at the bit offset 1411 // from the pointer given by getTypeStoreSizeInBits. This matters for 1412 // integers with a bitwidth that is not a multiple of 8. 1413 ShAmt = TD->getTypeStoreSizeInBits(NTy) - 1414 TD->getTypeStoreSizeInBits(LI->getType()) - Offset; 1415 } else { 1416 ShAmt = Offset; 1417 } 1418 1419 // Note: we support negative bitwidths (with shl) which are not defined. 1420 // We do this to support (f.e.) loads off the end of a structure where 1421 // only some bits are used. 1422 if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) 1423 NV = BinaryOperator::CreateLShr(NV, 1424 ConstantInt::get(NV->getType(), ShAmt), 1425 LI->getName(), LI); 1426 else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) 1427 NV = BinaryOperator::CreateShl(NV, 1428 ConstantInt::get(NV->getType(), -ShAmt), 1429 LI->getName(), LI); 1430 1431 // Finally, unconditionally truncate the integer to the right width. 1432 unsigned LIBitWidth = TD->getTypeSizeInBits(LI->getType()); 1433 if (LIBitWidth < NTy->getBitWidth()) 1434 NV = new TruncInst(NV, IntegerType::get(LIBitWidth), 1435 LI->getName(), LI); 1436 1437 // If the result is an integer, this is a trunc or bitcast. 1438 if (isa<IntegerType>(LI->getType())) { 1439 // Should be done. 1440 } else if (LI->getType()->isFloatingPoint() || 1441 isa<VectorType>(LI->getType())) { 1442 // Just do a bitcast, we know the sizes match up. 1443 NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI); 1444 } else { 1445 // Otherwise must be a pointer. 1446 NV = new IntToPtrInst(NV, LI->getType(), LI->getName(), LI); 1447 } 1448 assert(NV->getType() == LI->getType() && "Didn't convert right?"); 1449 return NV; 1450} 1451 1452 1453/// ConvertUsesOfStoreToScalar - Convert the specified store to a load+store 1454/// pair of the new alloca directly, returning the value that should be stored 1455/// to the alloca. This happens when we are converting an "integer union" to a 1456/// single integer scalar, or when we are converting a "vector union" to a 1457/// vector with insert/extractelement instructions. 1458/// 1459/// Offset is an offset from the original alloca, in bits that need to be 1460/// shifted to the right. By the end of this, there should be no uses of Ptr. 1461Value *SROA::ConvertUsesOfStoreToScalar(Value *SV, AllocaInst *NewAI, 1462 uint64_t Offset, Instruction *IP) { 1463 1464 // Convert the stored type to the actual type, shift it left to insert 1465 // then 'or' into place. 1466 const Type *AllocaType = NewAI->getType()->getElementType(); 1467 if (SV->getType() == AllocaType && Offset == 0) 1468 return SV; 1469 1470 if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { 1471 Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", IP); 1472 1473 // If the result alloca is a vector type, this is either an element 1474 // access or a bitcast to another vector type. 1475 if (isa<VectorType>(SV->getType())) { 1476 SV = new BitCastInst(SV, AllocaType, SV->getName(), IP); 1477 } else { 1478 // Must be an element insertion. 1479 unsigned Elt = Offset/TD->getTypePaddedSizeInBits(VTy->getElementType()); 1480 1481 if (SV->getType() != VTy->getElementType()) 1482 SV = new BitCastInst(SV, VTy->getElementType(), "tmp", IP); 1483 1484 SV = InsertElementInst::Create(Old, SV, 1485 ConstantInt::get(Type::Int32Ty, Elt), 1486 "tmp", IP); 1487 } 1488 return SV; 1489 } 1490 1491 1492 Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", IP); 1493 1494 // If SV is a float, convert it to the appropriate integer type. 1495 // If it is a pointer, do the same, and also handle ptr->ptr casts 1496 // here. 1497 unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType()); 1498 unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); 1499 unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); 1500 unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); 1501 if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType())) 1502 SV = new BitCastInst(SV, IntegerType::get(SrcWidth), SV->getName(), IP); 1503 else if (isa<PointerType>(SV->getType())) 1504 SV = new PtrToIntInst(SV, TD->getIntPtrType(), SV->getName(), IP); 1505 1506 // Zero extend or truncate the value if needed. 1507 if (SV->getType() != AllocaType) { 1508 if (SV->getType()->getPrimitiveSizeInBits() < 1509 AllocaType->getPrimitiveSizeInBits()) 1510 SV = new ZExtInst(SV, AllocaType, SV->getName(), IP); 1511 else { 1512 // Truncation may be needed if storing more than the alloca can hold 1513 // (undefined behavior). 1514 SV = new TruncInst(SV, AllocaType, SV->getName(), IP); 1515 SrcWidth = DestWidth; 1516 SrcStoreWidth = DestStoreWidth; 1517 } 1518 } 1519 1520 // If this is a big-endian system and the store is narrower than the 1521 // full alloca type, we need to do a shift to get the right bits. 1522 int ShAmt = 0; 1523 if (TD->isBigEndian()) { 1524 // On big-endian machines, the lowest bit is stored at the bit offset 1525 // from the pointer given by getTypeStoreSizeInBits. This matters for 1526 // integers with a bitwidth that is not a multiple of 8. 1527 ShAmt = DestStoreWidth - SrcStoreWidth - Offset; 1528 } else { 1529 ShAmt = Offset; 1530 } 1531 1532 // Note: we support negative bitwidths (with shr) which are not defined. 1533 // We do this to support (f.e.) stores off the end of a structure where 1534 // only some bits in the structure are set. 1535 APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); 1536 if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { 1537 SV = BinaryOperator::CreateShl(SV, 1538 ConstantInt::get(SV->getType(), ShAmt), 1539 SV->getName(), IP); 1540 Mask <<= ShAmt; 1541 } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { 1542 SV = BinaryOperator::CreateLShr(SV, 1543 ConstantInt::get(SV->getType(), -ShAmt), 1544 SV->getName(), IP); 1545 Mask = Mask.lshr(-ShAmt); 1546 } 1547 1548 // Mask out the bits we are about to insert from the old value, and or 1549 // in the new bits. 1550 if (SrcWidth != DestWidth) { 1551 assert(DestWidth > SrcWidth); 1552 Old = BinaryOperator::CreateAnd(Old, ConstantInt::get(~Mask), 1553 Old->getName()+".mask", IP); 1554 SV = BinaryOperator::CreateOr(Old, SV, SV->getName()+".ins", IP); 1555 } 1556 return SV; 1557} 1558 1559 1560 1561/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to 1562/// some part of a constant global variable. This intentionally only accepts 1563/// constant expressions because we don't can't rewrite arbitrary instructions. 1564static bool PointsToConstantGlobal(Value *V) { 1565 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 1566 return GV->isConstant(); 1567 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 1568 if (CE->getOpcode() == Instruction::BitCast || 1569 CE->getOpcode() == Instruction::GetElementPtr) 1570 return PointsToConstantGlobal(CE->getOperand(0)); 1571 return false; 1572} 1573 1574/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 1575/// pointer to an alloca. Ignore any reads of the pointer, return false if we 1576/// see any stores or other unknown uses. If we see pointer arithmetic, keep 1577/// track of whether it moves the pointer (with isOffset) but otherwise traverse 1578/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 1579/// the alloca, and if the source pointer is a pointer to a constant global, we 1580/// can optimize this. 1581static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, 1582 bool isOffset) { 1583 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1584 if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) 1585 // Ignore non-volatile loads, they are always ok. 1586 if (!LI->isVolatile()) 1587 continue; 1588 1589 if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) { 1590 // If uses of the bitcast are ok, we are ok. 1591 if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) 1592 return false; 1593 continue; 1594 } 1595 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { 1596 // If the GEP has all zero indices, it doesn't offset the pointer. If it 1597 // doesn't, it does. 1598 if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, 1599 isOffset || !GEP->hasAllZeroIndices())) 1600 return false; 1601 continue; 1602 } 1603 1604 // If this is isn't our memcpy/memmove, reject it as something we can't 1605 // handle. 1606 if (!isa<MemCpyInst>(*UI) && !isa<MemMoveInst>(*UI)) 1607 return false; 1608 1609 // If we already have seen a copy, reject the second one. 1610 if (TheCopy) return false; 1611 1612 // If the pointer has been offset from the start of the alloca, we can't 1613 // safely handle this. 1614 if (isOffset) return false; 1615 1616 // If the memintrinsic isn't using the alloca as the dest, reject it. 1617 if (UI.getOperandNo() != 1) return false; 1618 1619 MemIntrinsic *MI = cast<MemIntrinsic>(*UI); 1620 1621 // If the source of the memcpy/move is not a constant global, reject it. 1622 if (!PointsToConstantGlobal(MI->getOperand(2))) 1623 return false; 1624 1625 // Otherwise, the transform is safe. Remember the copy instruction. 1626 TheCopy = MI; 1627 } 1628 return true; 1629} 1630 1631/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 1632/// modified by a copy from a constant global. If we can prove this, we can 1633/// replace any uses of the alloca with uses of the global directly. 1634Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocationInst *AI) { 1635 Instruction *TheCopy = 0; 1636 if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) 1637 return TheCopy; 1638 return 0; 1639} 1640