ScalarReplAggregates.cpp revision 794fd75c67a2cdc128d67342c6d88a504d186896
1//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by the LLVM research group and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This transformation implements the well known scalar replacement of 11// aggregates transformation. This xform breaks up alloca instructions of 12// aggregate type (structure or array) into individual alloca instructions for 13// each member (if possible). Then, if possible, it transforms the individual 14// alloca instructions into nice clean scalar SSA form. 15// 16// This combines a simple SRoA algorithm with the Mem2Reg algorithm because 17// often interact, especially for C++ programs. As such, iterating between 18// SRoA, then Mem2Reg until we run out of things to promote works well. 19// 20//===----------------------------------------------------------------------===// 21 22#define DEBUG_TYPE "scalarrepl" 23#include "llvm/Transforms/Scalar.h" 24#include "llvm/Constants.h" 25#include "llvm/DerivedTypes.h" 26#include "llvm/Function.h" 27#include "llvm/GlobalVariable.h" 28#include "llvm/Instructions.h" 29#include "llvm/IntrinsicInst.h" 30#include "llvm/Pass.h" 31#include "llvm/Analysis/Dominators.h" 32#include "llvm/Target/TargetData.h" 33#include "llvm/Transforms/Utils/PromoteMemToReg.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/GetElementPtrTypeIterator.h" 36#include "llvm/Support/MathExtras.h" 37#include "llvm/Support/Compiler.h" 38#include "llvm/ADT/SmallVector.h" 39#include "llvm/ADT/Statistic.h" 40#include "llvm/ADT/StringExtras.h" 41using namespace llvm; 42 43STATISTIC(NumReplaced, "Number of allocas broken up"); 44STATISTIC(NumPromoted, "Number of allocas promoted"); 45STATISTIC(NumConverted, "Number of aggregates converted to scalar"); 46STATISTIC(NumGlobals, "Number of allocas copied from constant global"); 47 48namespace { 49 struct VISIBILITY_HIDDEN SROA : public FunctionPass { 50 static const int ID; // Pass identifcation, replacement for typeid 51 SROA() : FunctionPass((intptr_t)&ID) {} 52 53 bool runOnFunction(Function &F); 54 55 bool performScalarRepl(Function &F); 56 bool performPromotion(Function &F); 57 58 // getAnalysisUsage - This pass does not require any passes, but we know it 59 // will not alter the CFG, so say so. 60 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 61 AU.addRequired<ETForest>(); 62 AU.addRequired<DominanceFrontier>(); 63 AU.addRequired<TargetData>(); 64 AU.setPreservesCFG(); 65 } 66 67 private: 68 int isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI); 69 int isSafeUseOfAllocation(Instruction *User, AllocationInst *AI); 70 bool isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI); 71 bool isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI); 72 int isSafeAllocaToScalarRepl(AllocationInst *AI); 73 void DoScalarReplacement(AllocationInst *AI, 74 std::vector<AllocationInst*> &WorkList); 75 void CanonicalizeAllocaUsers(AllocationInst *AI); 76 AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base); 77 78 void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, 79 SmallVector<AllocaInst*, 32> &NewElts); 80 81 const Type *CanConvertToScalar(Value *V, bool &IsNotTrivial); 82 void ConvertToScalar(AllocationInst *AI, const Type *Ty); 83 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset); 84 static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI); 85 }; 86 87 const int SROA::ID = 0; 88 RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); 89} 90 91// Public interface to the ScalarReplAggregates pass 92FunctionPass *llvm::createScalarReplAggregatesPass() { return new SROA(); } 93 94 95bool SROA::runOnFunction(Function &F) { 96 bool Changed = performPromotion(F); 97 while (1) { 98 bool LocalChange = performScalarRepl(F); 99 if (!LocalChange) break; // No need to repromote if no scalarrepl 100 Changed = true; 101 LocalChange = performPromotion(F); 102 if (!LocalChange) break; // No need to re-scalarrepl if no promotion 103 } 104 105 return Changed; 106} 107 108 109bool SROA::performPromotion(Function &F) { 110 std::vector<AllocaInst*> Allocas; 111 ETForest &ET = getAnalysis<ETForest>(); 112 DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); 113 114 BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 115 116 bool Changed = false; 117 118 while (1) { 119 Allocas.clear(); 120 121 // Find allocas that are safe to promote, by looking at all instructions in 122 // the entry node 123 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 124 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 125 if (isAllocaPromotable(AI)) 126 Allocas.push_back(AI); 127 128 if (Allocas.empty()) break; 129 130 PromoteMemToReg(Allocas, ET, DF); 131 NumPromoted += Allocas.size(); 132 Changed = true; 133 } 134 135 return Changed; 136} 137 138// performScalarRepl - This algorithm is a simple worklist driven algorithm, 139// which runs on all of the malloc/alloca instructions in the function, removing 140// them if they are only used by getelementptr instructions. 141// 142bool SROA::performScalarRepl(Function &F) { 143 std::vector<AllocationInst*> WorkList; 144 145 // Scan the entry basic block, adding any alloca's and mallocs to the worklist 146 BasicBlock &BB = F.getEntryBlock(); 147 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 148 if (AllocationInst *A = dyn_cast<AllocationInst>(I)) 149 WorkList.push_back(A); 150 151 // Process the worklist 152 bool Changed = false; 153 while (!WorkList.empty()) { 154 AllocationInst *AI = WorkList.back(); 155 WorkList.pop_back(); 156 157 // Handle dead allocas trivially. These can be formed by SROA'ing arrays 158 // with unused elements. 159 if (AI->use_empty()) { 160 AI->eraseFromParent(); 161 continue; 162 } 163 164 // If we can turn this aggregate value (potentially with casts) into a 165 // simple scalar value that can be mem2reg'd into a register value. 166 bool IsNotTrivial = false; 167 if (const Type *ActualType = CanConvertToScalar(AI, IsNotTrivial)) 168 if (IsNotTrivial && ActualType != Type::VoidTy) { 169 ConvertToScalar(AI, ActualType); 170 Changed = true; 171 continue; 172 } 173 174 // Check to see if we can perform the core SROA transformation. We cannot 175 // transform the allocation instruction if it is an array allocation 176 // (allocations OF arrays are ok though), and an allocation of a scalar 177 // value cannot be decomposed at all. 178 if (!AI->isArrayAllocation() && 179 (isa<StructType>(AI->getAllocatedType()) || 180 isa<ArrayType>(AI->getAllocatedType()))) { 181 // Check that all of the users of the allocation are capable of being 182 // transformed. 183 switch (isSafeAllocaToScalarRepl(AI)) { 184 default: assert(0 && "Unexpected value!"); 185 case 0: // Not safe to scalar replace. 186 break; 187 case 1: // Safe, but requires cleanup/canonicalizations first 188 CanonicalizeAllocaUsers(AI); 189 // FALL THROUGH. 190 case 3: // Safe to scalar replace. 191 DoScalarReplacement(AI, WorkList); 192 Changed = true; 193 continue; 194 } 195 } 196 197 // Check to see if this allocation is only modified by a memcpy/memmove from 198 // a constant global. If this is the case, we can change all users to use 199 // the constant global instead. This is commonly produced by the CFE by 200 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 201 // is only subsequently read. 202 if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { 203 DOUT << "Found alloca equal to global: " << *AI; 204 DOUT << " memcpy = " << *TheCopy; 205 Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); 206 AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); 207 TheCopy->eraseFromParent(); // Don't mutate the global. 208 AI->eraseFromParent(); 209 ++NumGlobals; 210 Changed = true; 211 continue; 212 } 213 214 // Otherwise, couldn't process this. 215 } 216 217 return Changed; 218} 219 220/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 221/// predicate, do SROA now. 222void SROA::DoScalarReplacement(AllocationInst *AI, 223 std::vector<AllocationInst*> &WorkList) { 224 DOUT << "Found inst to SROA: " << *AI; 225 SmallVector<AllocaInst*, 32> ElementAllocas; 226 if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 227 ElementAllocas.reserve(ST->getNumContainedTypes()); 228 for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 229 AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 230 AI->getAlignment(), 231 AI->getName() + "." + utostr(i), AI); 232 ElementAllocas.push_back(NA); 233 WorkList.push_back(NA); // Add to worklist for recursive processing 234 } 235 } else { 236 const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 237 ElementAllocas.reserve(AT->getNumElements()); 238 const Type *ElTy = AT->getElementType(); 239 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 240 AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), 241 AI->getName() + "." + utostr(i), AI); 242 ElementAllocas.push_back(NA); 243 WorkList.push_back(NA); // Add to worklist for recursive processing 244 } 245 } 246 247 // Now that we have created the alloca instructions that we want to use, 248 // expand the getelementptr instructions to use them. 249 // 250 while (!AI->use_empty()) { 251 Instruction *User = cast<Instruction>(AI->use_back()); 252 if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) { 253 RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); 254 BCInst->eraseFromParent(); 255 continue; 256 } 257 258 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); 259 // We now know that the GEP is of the form: GEP <ptr>, 0, <cst> 260 unsigned Idx = 261 (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 262 263 assert(Idx < ElementAllocas.size() && "Index out of range?"); 264 AllocaInst *AllocaToUse = ElementAllocas[Idx]; 265 266 Value *RepValue; 267 if (GEPI->getNumOperands() == 3) { 268 // Do not insert a new getelementptr instruction with zero indices, only 269 // to have it optimized out later. 270 RepValue = AllocaToUse; 271 } else { 272 // We are indexing deeply into the structure, so we still need a 273 // getelement ptr instruction to finish the indexing. This may be 274 // expanded itself once the worklist is rerun. 275 // 276 SmallVector<Value*, 8> NewArgs; 277 NewArgs.push_back(Constant::getNullValue(Type::Int32Ty)); 278 NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); 279 RepValue = new GetElementPtrInst(AllocaToUse, &NewArgs[0], 280 NewArgs.size(), "", GEPI); 281 RepValue->takeName(GEPI); 282 } 283 284 // If this GEP is to the start of the aggregate, check for memcpys. 285 if (Idx == 0) { 286 bool IsStartOfAggregateGEP = true; 287 for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) { 288 if (!isa<ConstantInt>(GEPI->getOperand(i))) { 289 IsStartOfAggregateGEP = false; 290 break; 291 } 292 if (!cast<ConstantInt>(GEPI->getOperand(i))->isZero()) { 293 IsStartOfAggregateGEP = false; 294 break; 295 } 296 } 297 298 if (IsStartOfAggregateGEP) 299 RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); 300 } 301 302 303 // Move all of the users over to the new GEP. 304 GEPI->replaceAllUsesWith(RepValue); 305 // Delete the old GEP 306 GEPI->eraseFromParent(); 307 } 308 309 // Finally, delete the Alloca instruction 310 AI->eraseFromParent(); 311 NumReplaced++; 312} 313 314 315/// isSafeElementUse - Check to see if this use is an allowed use for a 316/// getelementptr instruction of an array aggregate allocation. isFirstElt 317/// indicates whether Ptr is known to the start of the aggregate. 318/// 319int SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI) { 320 for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 321 I != E; ++I) { 322 Instruction *User = cast<Instruction>(*I); 323 switch (User->getOpcode()) { 324 case Instruction::Load: break; 325 case Instruction::Store: 326 // Store is ok if storing INTO the pointer, not storing the pointer 327 if (User->getOperand(0) == Ptr) return 0; 328 break; 329 case Instruction::GetElementPtr: { 330 GetElementPtrInst *GEP = cast<GetElementPtrInst>(User); 331 bool AreAllZeroIndices = isFirstElt; 332 if (GEP->getNumOperands() > 1) { 333 if (!isa<ConstantInt>(GEP->getOperand(1)) || 334 !cast<ConstantInt>(GEP->getOperand(1))->isZero()) 335 return 0; // Using pointer arithmetic to navigate the array. 336 337 if (AreAllZeroIndices) { 338 for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) { 339 if (!isa<ConstantInt>(GEP->getOperand(i)) || 340 !cast<ConstantInt>(GEP->getOperand(i))->isZero()) { 341 AreAllZeroIndices = false; 342 break; 343 } 344 } 345 } 346 } 347 if (!isSafeElementUse(GEP, AreAllZeroIndices, AI)) return 0; 348 break; 349 } 350 case Instruction::BitCast: 351 if (isFirstElt && 352 isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI)) 353 break; 354 DOUT << " Transformation preventing inst: " << *User; 355 return 0; 356 case Instruction::Call: 357 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 358 if (isFirstElt && isSafeMemIntrinsicOnAllocation(MI, AI)) 359 break; 360 } 361 DOUT << " Transformation preventing inst: " << *User; 362 return 0; 363 default: 364 DOUT << " Transformation preventing inst: " << *User; 365 return 0; 366 } 367 } 368 return 3; // All users look ok :) 369} 370 371/// AllUsersAreLoads - Return true if all users of this value are loads. 372static bool AllUsersAreLoads(Value *Ptr) { 373 for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 374 I != E; ++I) 375 if (cast<Instruction>(*I)->getOpcode() != Instruction::Load) 376 return false; 377 return true; 378} 379 380/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an 381/// aggregate allocation. 382/// 383int SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI) { 384 if (BitCastInst *C = dyn_cast<BitCastInst>(User)) 385 return isSafeUseOfBitCastedAllocation(C, AI) ? 3 : 0; 386 if (!isa<GetElementPtrInst>(User)) return 0; 387 388 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); 389 gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); 390 391 // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>". 392 if (I == E || 393 I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) 394 return 0; 395 396 ++I; 397 if (I == E) return 0; // ran out of GEP indices?? 398 399 bool IsAllZeroIndices = true; 400 401 // If this is a use of an array allocation, do a bit more checking for sanity. 402 if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 403 uint64_t NumElements = AT->getNumElements(); 404 405 if (ConstantInt *Idx = dyn_cast<ConstantInt>(I.getOperand())) { 406 IsAllZeroIndices &= Idx->isZero(); 407 408 // Check to make sure that index falls within the array. If not, 409 // something funny is going on, so we won't do the optimization. 410 // 411 if (Idx->getZExtValue() >= NumElements) 412 return 0; 413 414 // We cannot scalar repl this level of the array unless any array 415 // sub-indices are in-range constants. In particular, consider: 416 // A[0][i]. We cannot know that the user isn't doing invalid things like 417 // allowing i to index an out-of-range subscript that accesses A[1]. 418 // 419 // Scalar replacing *just* the outer index of the array is probably not 420 // going to be a win anyway, so just give up. 421 for (++I; I != E && (isa<ArrayType>(*I) || isa<VectorType>(*I)); ++I) { 422 uint64_t NumElements; 423 if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*I)) 424 NumElements = SubArrayTy->getNumElements(); 425 else 426 NumElements = cast<VectorType>(*I)->getNumElements(); 427 428 ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand()); 429 if (!IdxVal) return 0; 430 if (IdxVal->getZExtValue() >= NumElements) 431 return 0; 432 IsAllZeroIndices &= IdxVal->isZero(); 433 } 434 435 } else { 436 IsAllZeroIndices = 0; 437 438 // If this is an array index and the index is not constant, we cannot 439 // promote... that is unless the array has exactly one or two elements in 440 // it, in which case we CAN promote it, but we have to canonicalize this 441 // out if this is the only problem. 442 if ((NumElements == 1 || NumElements == 2) && 443 AllUsersAreLoads(GEPI)) 444 return 1; // Canonicalization required! 445 return 0; 446 } 447 } 448 449 // If there are any non-simple uses of this getelementptr, make sure to reject 450 // them. 451 return isSafeElementUse(GEPI, IsAllZeroIndices, AI); 452} 453 454/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory 455/// intrinsic can be promoted by SROA. At this point, we know that the operand 456/// of the memintrinsic is a pointer to the beginning of the allocation. 457bool SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI){ 458 // If not constant length, give up. 459 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 460 if (!Length) return false; 461 462 // If not the whole aggregate, give up. 463 const TargetData &TD = getAnalysis<TargetData>(); 464 if (Length->getZExtValue() != TD.getTypeSize(AI->getType()->getElementType())) 465 return false; 466 467 // We only know about memcpy/memset/memmove. 468 if (!isa<MemCpyInst>(MI) && !isa<MemSetInst>(MI) && !isa<MemMoveInst>(MI)) 469 return false; 470 // Otherwise, we can transform it. 471 return true; 472} 473 474/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast 475/// are 476bool SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI) { 477 for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); 478 UI != E; ++UI) { 479 if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) { 480 if (!isSafeUseOfBitCastedAllocation(BCU, AI)) 481 return false; 482 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { 483 if (!isSafeMemIntrinsicOnAllocation(MI, AI)) 484 return false; 485 } else { 486 return false; 487 } 488 } 489 return true; 490} 491 492/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes 493/// to its first element. Transform users of the cast to use the new values 494/// instead. 495void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, 496 SmallVector<AllocaInst*, 32> &NewElts) { 497 Constant *Zero = Constant::getNullValue(Type::Int32Ty); 498 const TargetData &TD = getAnalysis<TargetData>(); 499 500 Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); 501 while (UI != UE) { 502 if (BitCastInst *BCU = dyn_cast<BitCastInst>(*UI)) { 503 RewriteBitCastUserOfAlloca(BCU, AI, NewElts); 504 ++UI; 505 BCU->eraseFromParent(); 506 continue; 507 } 508 509 // Otherwise, must be memcpy/memmove/memset of the entire aggregate. Split 510 // into one per element. 511 MemIntrinsic *MI = dyn_cast<MemIntrinsic>(*UI); 512 513 // If it's not a mem intrinsic, it must be some other user of a gep of the 514 // first pointer. Just leave these alone. 515 if (!MI) { 516 ++UI; 517 continue; 518 } 519 520 // If this is a memcpy/memmove, construct the other pointer as the 521 // appropriate type. 522 Value *OtherPtr = 0; 523 if (MemCpyInst *MCI = dyn_cast<MemCpyInst>(MI)) { 524 if (BCInst == MCI->getRawDest()) 525 OtherPtr = MCI->getRawSource(); 526 else { 527 assert(BCInst == MCI->getRawSource()); 528 OtherPtr = MCI->getRawDest(); 529 } 530 } else if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { 531 if (BCInst == MMI->getRawDest()) 532 OtherPtr = MMI->getRawSource(); 533 else { 534 assert(BCInst == MMI->getRawSource()); 535 OtherPtr = MMI->getRawDest(); 536 } 537 } 538 539 // If there is an other pointer, we want to convert it to the same pointer 540 // type as AI has, so we can GEP through it. 541 if (OtherPtr) { 542 // It is likely that OtherPtr is a bitcast, if so, remove it. 543 if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) 544 OtherPtr = BC->getOperand(0); 545 if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) 546 if (BCE->getOpcode() == Instruction::BitCast) 547 OtherPtr = BCE->getOperand(0); 548 549 // If the pointer is not the right type, insert a bitcast to the right 550 // type. 551 if (OtherPtr->getType() != AI->getType()) 552 OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), 553 MI); 554 } 555 556 // Process each element of the aggregate. 557 Value *TheFn = MI->getOperand(0); 558 const Type *BytePtrTy = MI->getRawDest()->getType(); 559 bool SROADest = MI->getRawDest() == BCInst; 560 561 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 562 // If this is a memcpy/memmove, emit a GEP of the other element address. 563 Value *OtherElt = 0; 564 if (OtherPtr) { 565 OtherElt = new GetElementPtrInst(OtherPtr, Zero, 566 ConstantInt::get(Type::Int32Ty, i), 567 OtherPtr->getNameStr()+"."+utostr(i), 568 MI); 569 } 570 571 Value *EltPtr = NewElts[i]; 572 const Type *EltTy =cast<PointerType>(EltPtr->getType())->getElementType(); 573 574 // If we got down to a scalar, insert a load or store as appropriate. 575 if (EltTy->isFirstClassType()) { 576 if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) { 577 Value *Elt = new LoadInst(SROADest ? OtherElt : EltPtr, "tmp", 578 MI); 579 new StoreInst(Elt, SROADest ? EltPtr : OtherElt, MI); 580 continue; 581 } else { 582 assert(isa<MemSetInst>(MI)); 583 584 // If the stored element is zero (common case), just store a null 585 // constant. 586 Constant *StoreVal; 587 if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { 588 if (CI->isZero()) { 589 StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 590 } else { 591 // If EltTy is a packed type, get the element type. 592 const Type *ValTy = EltTy; 593 if (const VectorType *VTy = dyn_cast<VectorType>(ValTy)) 594 ValTy = VTy->getElementType(); 595 596 // Construct an integer with the right value. 597 unsigned EltSize = TD.getTypeSize(ValTy); 598 APInt OneVal(EltSize*8, CI->getZExtValue()); 599 APInt TotalVal(OneVal); 600 // Set each byte. 601 for (unsigned i = 0; i != EltSize-1; ++i) { 602 TotalVal = TotalVal.shl(8); 603 TotalVal |= OneVal; 604 } 605 606 // Convert the integer value to the appropriate type. 607 StoreVal = ConstantInt::get(TotalVal); 608 if (isa<PointerType>(ValTy)) 609 StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 610 else if (ValTy->isFloatingPoint()) 611 StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 612 assert(StoreVal->getType() == ValTy && "Type mismatch!"); 613 614 // If the requested value was a vector constant, create it. 615 if (EltTy != ValTy) { 616 unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); 617 SmallVector<Constant*, 16> Elts(NumElts, StoreVal); 618 StoreVal = ConstantVector::get(&Elts[0], NumElts); 619 } 620 } 621 new StoreInst(StoreVal, EltPtr, MI); 622 continue; 623 } 624 // Otherwise, if we're storing a byte variable, use a memset call for 625 // this element. 626 } 627 } 628 629 // Cast the element pointer to BytePtrTy. 630 if (EltPtr->getType() != BytePtrTy) 631 EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI); 632 633 // Cast the other pointer (if we have one) to BytePtrTy. 634 if (OtherElt && OtherElt->getType() != BytePtrTy) 635 OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), 636 MI); 637 638 unsigned EltSize = TD.getTypeSize(EltTy); 639 640 // Finally, insert the meminst for this element. 641 if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) { 642 Value *Ops[] = { 643 SROADest ? EltPtr : OtherElt, // Dest ptr 644 SROADest ? OtherElt : EltPtr, // Src ptr 645 ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 646 Zero // Align 647 }; 648 new CallInst(TheFn, Ops, 4, "", MI); 649 } else { 650 assert(isa<MemSetInst>(MI)); 651 Value *Ops[] = { 652 EltPtr, MI->getOperand(2), // Dest, Value, 653 ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 654 Zero // Align 655 }; 656 new CallInst(TheFn, Ops, 4, "", MI); 657 } 658 } 659 660 // Finally, MI is now dead, as we've modified its actions to occur on all of 661 // the elements of the aggregate. 662 ++UI; 663 MI->eraseFromParent(); 664 } 665} 666 667 668/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 669/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 670/// or 1 if safe after canonicalization has been performed. 671/// 672int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { 673 // Loop over the use list of the alloca. We can only transform it if all of 674 // the users are safe to transform. 675 // 676 int isSafe = 3; 677 for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); 678 I != E; ++I) { 679 isSafe &= isSafeUseOfAllocation(cast<Instruction>(*I), AI); 680 if (isSafe == 0) { 681 DOUT << "Cannot transform: " << *AI << " due to user: " << **I; 682 return 0; 683 } 684 } 685 // If we require cleanup, isSafe is now 1, otherwise it is 3. 686 return isSafe; 687} 688 689/// CanonicalizeAllocaUsers - If SROA reported that it can promote the specified 690/// allocation, but only if cleaned up, perform the cleanups required. 691void SROA::CanonicalizeAllocaUsers(AllocationInst *AI) { 692 // At this point, we know that the end result will be SROA'd and promoted, so 693 // we can insert ugly code if required so long as sroa+mem2reg will clean it 694 // up. 695 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 696 UI != E; ) { 697 GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI++); 698 if (!GEPI) continue; 699 gep_type_iterator I = gep_type_begin(GEPI); 700 ++I; 701 702 if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 703 uint64_t NumElements = AT->getNumElements(); 704 705 if (!isa<ConstantInt>(I.getOperand())) { 706 if (NumElements == 1) { 707 GEPI->setOperand(2, Constant::getNullValue(Type::Int32Ty)); 708 } else { 709 assert(NumElements == 2 && "Unhandled case!"); 710 // All users of the GEP must be loads. At each use of the GEP, insert 711 // two loads of the appropriate indexed GEP and select between them. 712 Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), 713 Constant::getNullValue(I.getOperand()->getType()), 714 "isone", GEPI); 715 // Insert the new GEP instructions, which are properly indexed. 716 SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end()); 717 Indices[1] = Constant::getNullValue(Type::Int32Ty); 718 Value *ZeroIdx = new GetElementPtrInst(GEPI->getOperand(0), 719 &Indices[0], Indices.size(), 720 GEPI->getName()+".0", GEPI); 721 Indices[1] = ConstantInt::get(Type::Int32Ty, 1); 722 Value *OneIdx = new GetElementPtrInst(GEPI->getOperand(0), 723 &Indices[0], Indices.size(), 724 GEPI->getName()+".1", GEPI); 725 // Replace all loads of the variable index GEP with loads from both 726 // indexes and a select. 727 while (!GEPI->use_empty()) { 728 LoadInst *LI = cast<LoadInst>(GEPI->use_back()); 729 Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI); 730 Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI); 731 Value *R = new SelectInst(IsOne, One, Zero, LI->getName(), LI); 732 LI->replaceAllUsesWith(R); 733 LI->eraseFromParent(); 734 } 735 GEPI->eraseFromParent(); 736 } 737 } 738 } 739 } 740} 741 742/// MergeInType - Add the 'In' type to the accumulated type so far. If the 743/// types are incompatible, return true, otherwise update Accum and return 744/// false. 745/// 746/// There are three cases we handle here: 747/// 1) An effectively-integer union, where the pieces are stored into as 748/// smaller integers (common with byte swap and other idioms). 749/// 2) A union of vector types of the same size and potentially its elements. 750/// Here we turn element accesses into insert/extract element operations. 751/// 3) A union of scalar types, such as int/float or int/pointer. Here we 752/// merge together into integers, allowing the xform to work with #1 as 753/// well. 754static bool MergeInType(const Type *In, const Type *&Accum, 755 const TargetData &TD) { 756 // If this is our first type, just use it. 757 const VectorType *PTy; 758 if (Accum == Type::VoidTy || In == Accum) { 759 Accum = In; 760 } else if (In == Type::VoidTy) { 761 // Noop. 762 } else if (In->isInteger() && Accum->isInteger()) { // integer union. 763 // Otherwise pick whichever type is larger. 764 if (cast<IntegerType>(In)->getBitWidth() > 765 cast<IntegerType>(Accum)->getBitWidth()) 766 Accum = In; 767 } else if (isa<PointerType>(In) && isa<PointerType>(Accum)) { 768 // Pointer unions just stay as one of the pointers. 769 } else if (isa<VectorType>(In) || isa<VectorType>(Accum)) { 770 if ((PTy = dyn_cast<VectorType>(Accum)) && 771 PTy->getElementType() == In) { 772 // Accum is a vector, and we are accessing an element: ok. 773 } else if ((PTy = dyn_cast<VectorType>(In)) && 774 PTy->getElementType() == Accum) { 775 // In is a vector, and accum is an element: ok, remember In. 776 Accum = In; 777 } else if ((PTy = dyn_cast<VectorType>(In)) && isa<VectorType>(Accum) && 778 PTy->getBitWidth() == cast<VectorType>(Accum)->getBitWidth()) { 779 // Two vectors of the same size: keep Accum. 780 } else { 781 // Cannot insert an short into a <4 x int> or handle 782 // <2 x int> -> <4 x int> 783 return true; 784 } 785 } else { 786 // Pointer/FP/Integer unions merge together as integers. 787 switch (Accum->getTypeID()) { 788 case Type::PointerTyID: Accum = TD.getIntPtrType(); break; 789 case Type::FloatTyID: Accum = Type::Int32Ty; break; 790 case Type::DoubleTyID: Accum = Type::Int64Ty; break; 791 default: 792 assert(Accum->isInteger() && "Unknown FP type!"); 793 break; 794 } 795 796 switch (In->getTypeID()) { 797 case Type::PointerTyID: In = TD.getIntPtrType(); break; 798 case Type::FloatTyID: In = Type::Int32Ty; break; 799 case Type::DoubleTyID: In = Type::Int64Ty; break; 800 default: 801 assert(In->isInteger() && "Unknown FP type!"); 802 break; 803 } 804 return MergeInType(In, Accum, TD); 805 } 806 return false; 807} 808 809/// getUIntAtLeastAsBitAs - Return an unsigned integer type that is at least 810/// as big as the specified type. If there is no suitable type, this returns 811/// null. 812const Type *getUIntAtLeastAsBitAs(unsigned NumBits) { 813 if (NumBits > 64) return 0; 814 if (NumBits > 32) return Type::Int64Ty; 815 if (NumBits > 16) return Type::Int32Ty; 816 if (NumBits > 8) return Type::Int16Ty; 817 return Type::Int8Ty; 818} 819 820/// CanConvertToScalar - V is a pointer. If we can convert the pointee to a 821/// single scalar integer type, return that type. Further, if the use is not 822/// a completely trivial use that mem2reg could promote, set IsNotTrivial. If 823/// there are no uses of this pointer, return Type::VoidTy to differentiate from 824/// failure. 825/// 826const Type *SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial) { 827 const Type *UsedType = Type::VoidTy; // No uses, no forced type. 828 const TargetData &TD = getAnalysis<TargetData>(); 829 const PointerType *PTy = cast<PointerType>(V->getType()); 830 831 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 832 Instruction *User = cast<Instruction>(*UI); 833 834 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 835 if (MergeInType(LI->getType(), UsedType, TD)) 836 return 0; 837 838 } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 839 // Storing the pointer, not into the value? 840 if (SI->getOperand(0) == V) return 0; 841 842 // NOTE: We could handle storing of FP imms into integers here! 843 844 if (MergeInType(SI->getOperand(0)->getType(), UsedType, TD)) 845 return 0; 846 } else if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 847 IsNotTrivial = true; 848 const Type *SubTy = CanConvertToScalar(CI, IsNotTrivial); 849 if (!SubTy || MergeInType(SubTy, UsedType, TD)) return 0; 850 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 851 // Check to see if this is stepping over an element: GEP Ptr, int C 852 if (GEP->getNumOperands() == 2 && isa<ConstantInt>(GEP->getOperand(1))) { 853 unsigned Idx = cast<ConstantInt>(GEP->getOperand(1))->getZExtValue(); 854 unsigned ElSize = TD.getTypeSize(PTy->getElementType()); 855 unsigned BitOffset = Idx*ElSize*8; 856 if (BitOffset > 64 || !isPowerOf2_32(ElSize)) return 0; 857 858 IsNotTrivial = true; 859 const Type *SubElt = CanConvertToScalar(GEP, IsNotTrivial); 860 if (SubElt == 0) return 0; 861 if (SubElt != Type::VoidTy && SubElt->isInteger()) { 862 const Type *NewTy = 863 getUIntAtLeastAsBitAs(TD.getTypeSize(SubElt)*8+BitOffset); 864 if (NewTy == 0 || MergeInType(NewTy, UsedType, TD)) return 0; 865 continue; 866 } 867 } else if (GEP->getNumOperands() == 3 && 868 isa<ConstantInt>(GEP->getOperand(1)) && 869 isa<ConstantInt>(GEP->getOperand(2)) && 870 cast<ConstantInt>(GEP->getOperand(1))->isZero()) { 871 // We are stepping into an element, e.g. a structure or an array: 872 // GEP Ptr, int 0, uint C 873 const Type *AggTy = PTy->getElementType(); 874 unsigned Idx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); 875 876 if (const ArrayType *ATy = dyn_cast<ArrayType>(AggTy)) { 877 if (Idx >= ATy->getNumElements()) return 0; // Out of range. 878 } else if (const VectorType *VectorTy = dyn_cast<VectorType>(AggTy)) { 879 // Getting an element of the packed vector. 880 if (Idx >= VectorTy->getNumElements()) return 0; // Out of range. 881 882 // Merge in the vector type. 883 if (MergeInType(VectorTy, UsedType, TD)) return 0; 884 885 const Type *SubTy = CanConvertToScalar(GEP, IsNotTrivial); 886 if (SubTy == 0) return 0; 887 888 if (SubTy != Type::VoidTy && MergeInType(SubTy, UsedType, TD)) 889 return 0; 890 891 // We'll need to change this to an insert/extract element operation. 892 IsNotTrivial = true; 893 continue; // Everything looks ok 894 895 } else if (isa<StructType>(AggTy)) { 896 // Structs are always ok. 897 } else { 898 return 0; 899 } 900 const Type *NTy = getUIntAtLeastAsBitAs(TD.getTypeSize(AggTy)*8); 901 if (NTy == 0 || MergeInType(NTy, UsedType, TD)) return 0; 902 const Type *SubTy = CanConvertToScalar(GEP, IsNotTrivial); 903 if (SubTy == 0) return 0; 904 if (SubTy != Type::VoidTy && MergeInType(SubTy, UsedType, TD)) 905 return 0; 906 continue; // Everything looks ok 907 } 908 return 0; 909 } else { 910 // Cannot handle this! 911 return 0; 912 } 913 } 914 915 return UsedType; 916} 917 918/// ConvertToScalar - The specified alloca passes the CanConvertToScalar 919/// predicate and is non-trivial. Convert it to something that can be trivially 920/// promoted into a register by mem2reg. 921void SROA::ConvertToScalar(AllocationInst *AI, const Type *ActualTy) { 922 DOUT << "CONVERT TO SCALAR: " << *AI << " TYPE = " 923 << *ActualTy << "\n"; 924 ++NumConverted; 925 926 BasicBlock *EntryBlock = AI->getParent(); 927 assert(EntryBlock == &EntryBlock->getParent()->getEntryBlock() && 928 "Not in the entry block!"); 929 EntryBlock->getInstList().remove(AI); // Take the alloca out of the program. 930 931 // Create and insert the alloca. 932 AllocaInst *NewAI = new AllocaInst(ActualTy, 0, AI->getName(), 933 EntryBlock->begin()); 934 ConvertUsesToScalar(AI, NewAI, 0); 935 delete AI; 936} 937 938 939/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca 940/// directly. This happens when we are converting an "integer union" to a 941/// single integer scalar, or when we are converting a "vector union" to a 942/// vector with insert/extractelement instructions. 943/// 944/// Offset is an offset from the original alloca, in bits that need to be 945/// shifted to the right. By the end of this, there should be no uses of Ptr. 946void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) { 947 const TargetData &TD = getAnalysis<TargetData>(); 948 while (!Ptr->use_empty()) { 949 Instruction *User = cast<Instruction>(Ptr->use_back()); 950 951 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 952 // The load is a bit extract from NewAI shifted right by Offset bits. 953 Value *NV = new LoadInst(NewAI, LI->getName(), LI); 954 if (NV->getType() == LI->getType()) { 955 // We win, no conversion needed. 956 } else if (const VectorType *PTy = dyn_cast<VectorType>(NV->getType())) { 957 // If the result alloca is a vector type, this is either an element 958 // access or a bitcast to another vector type. 959 if (isa<VectorType>(LI->getType())) { 960 NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI); 961 } else { 962 // Must be an element access. 963 unsigned Elt = Offset/(TD.getTypeSize(PTy->getElementType())*8); 964 NV = new ExtractElementInst( 965 NV, ConstantInt::get(Type::Int32Ty, Elt), "tmp", LI); 966 } 967 } else if (isa<PointerType>(NV->getType())) { 968 assert(isa<PointerType>(LI->getType())); 969 // Must be ptr->ptr cast. Anything else would result in NV being 970 // an integer. 971 NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI); 972 } else { 973 const IntegerType *NTy = cast<IntegerType>(NV->getType()); 974 unsigned LIBitWidth = TD.getTypeSizeInBits(LI->getType()); 975 976 // If this is a big-endian system and the load is narrower than the 977 // full alloca type, we need to do a shift to get the right bits. 978 int ShAmt = 0; 979 if (TD.isBigEndian()) { 980 ShAmt = NTy->getBitWidth()-LIBitWidth-Offset; 981 } else { 982 ShAmt = Offset; 983 } 984 985 // Note: we support negative bitwidths (with shl) which are not defined. 986 // We do this to support (f.e.) loads off the end of a structure where 987 // only some bits are used. 988 if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) 989 NV = BinaryOperator::createLShr(NV, 990 ConstantInt::get(NV->getType(),ShAmt), 991 LI->getName(), LI); 992 else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) 993 NV = BinaryOperator::createShl(NV, 994 ConstantInt::get(NV->getType(),-ShAmt), 995 LI->getName(), LI); 996 997 // Finally, unconditionally truncate the integer to the right width. 998 if (LIBitWidth < NTy->getBitWidth()) 999 NV = new TruncInst(NV, IntegerType::get(LIBitWidth), 1000 LI->getName(), LI); 1001 1002 // If the result is an integer, this is a trunc or bitcast. 1003 if (isa<IntegerType>(LI->getType())) { 1004 assert(NV->getType() == LI->getType() && "Truncate wasn't enough?"); 1005 } else if (LI->getType()->isFloatingPoint()) { 1006 // Just do a bitcast, we know the sizes match up. 1007 NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI); 1008 } else { 1009 // Otherwise must be a pointer. 1010 NV = new IntToPtrInst(NV, LI->getType(), LI->getName(), LI); 1011 } 1012 } 1013 LI->replaceAllUsesWith(NV); 1014 LI->eraseFromParent(); 1015 } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1016 assert(SI->getOperand(0) != Ptr && "Consistency error!"); 1017 1018 // Convert the stored type to the actual type, shift it left to insert 1019 // then 'or' into place. 1020 Value *SV = SI->getOperand(0); 1021 const Type *AllocaType = NewAI->getType()->getElementType(); 1022 if (SV->getType() == AllocaType) { 1023 // All is well. 1024 } else if (const VectorType *PTy = dyn_cast<VectorType>(AllocaType)) { 1025 Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", SI); 1026 1027 // If the result alloca is a vector type, this is either an element 1028 // access or a bitcast to another vector type. 1029 if (isa<VectorType>(SV->getType())) { 1030 SV = new BitCastInst(SV, AllocaType, SV->getName(), SI); 1031 } else { 1032 // Must be an element insertion. 1033 unsigned Elt = Offset/(TD.getTypeSize(PTy->getElementType())*8); 1034 SV = new InsertElementInst(Old, SV, 1035 ConstantInt::get(Type::Int32Ty, Elt), 1036 "tmp", SI); 1037 } 1038 } else if (isa<PointerType>(AllocaType)) { 1039 // If the alloca type is a pointer, then all the elements must be 1040 // pointers. 1041 if (SV->getType() != AllocaType) 1042 SV = new BitCastInst(SV, AllocaType, SV->getName(), SI); 1043 } else { 1044 Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", SI); 1045 1046 // If SV is a float, convert it to the appropriate integer type. 1047 // If it is a pointer, do the same, and also handle ptr->ptr casts 1048 // here. 1049 unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType()); 1050 unsigned DestWidth = AllocaType->getPrimitiveSizeInBits(); 1051 if (SV->getType()->isFloatingPoint()) 1052 SV = new BitCastInst(SV, IntegerType::get(SrcWidth), 1053 SV->getName(), SI); 1054 else if (isa<PointerType>(SV->getType())) 1055 SV = new PtrToIntInst(SV, TD.getIntPtrType(), SV->getName(), SI); 1056 1057 // Always zero extend the value if needed. 1058 if (SV->getType() != AllocaType) 1059 SV = new ZExtInst(SV, AllocaType, SV->getName(), SI); 1060 1061 // If this is a big-endian system and the store is narrower than the 1062 // full alloca type, we need to do a shift to get the right bits. 1063 int ShAmt = 0; 1064 if (TD.isBigEndian()) { 1065 ShAmt = DestWidth-SrcWidth-Offset; 1066 } else { 1067 ShAmt = Offset; 1068 } 1069 1070 // Note: we support negative bitwidths (with shr) which are not defined. 1071 // We do this to support (f.e.) stores off the end of a structure where 1072 // only some bits in the structure are set. 1073 APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); 1074 if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { 1075 SV = BinaryOperator::createShl(SV, 1076 ConstantInt::get(SV->getType(), ShAmt), 1077 SV->getName(), SI); 1078 Mask <<= ShAmt; 1079 } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { 1080 SV = BinaryOperator::createLShr(SV, 1081 ConstantInt::get(SV->getType(),-ShAmt), 1082 SV->getName(), SI); 1083 Mask = Mask.lshr(ShAmt); 1084 } 1085 1086 // Mask out the bits we are about to insert from the old value, and or 1087 // in the new bits. 1088 if (SrcWidth != DestWidth) { 1089 assert(DestWidth > SrcWidth); 1090 Old = BinaryOperator::createAnd(Old, ConstantInt::get(~Mask), 1091 Old->getName()+".mask", SI); 1092 SV = BinaryOperator::createOr(Old, SV, SV->getName()+".ins", SI); 1093 } 1094 } 1095 new StoreInst(SV, NewAI, SI); 1096 SI->eraseFromParent(); 1097 1098 } else if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 1099 ConvertUsesToScalar(CI, NewAI, Offset); 1100 CI->eraseFromParent(); 1101 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 1102 const PointerType *AggPtrTy = 1103 cast<PointerType>(GEP->getOperand(0)->getType()); 1104 const TargetData &TD = getAnalysis<TargetData>(); 1105 unsigned AggSizeInBits = TD.getTypeSize(AggPtrTy->getElementType())*8; 1106 1107 // Check to see if this is stepping over an element: GEP Ptr, int C 1108 unsigned NewOffset = Offset; 1109 if (GEP->getNumOperands() == 2) { 1110 unsigned Idx = cast<ConstantInt>(GEP->getOperand(1))->getZExtValue(); 1111 unsigned BitOffset = Idx*AggSizeInBits; 1112 1113 NewOffset += BitOffset; 1114 } else if (GEP->getNumOperands() == 3) { 1115 // We know that operand #2 is zero. 1116 unsigned Idx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); 1117 const Type *AggTy = AggPtrTy->getElementType(); 1118 if (const SequentialType *SeqTy = dyn_cast<SequentialType>(AggTy)) { 1119 unsigned ElSizeBits = TD.getTypeSize(SeqTy->getElementType())*8; 1120 1121 NewOffset += ElSizeBits*Idx; 1122 } else if (const StructType *STy = dyn_cast<StructType>(AggTy)) { 1123 unsigned EltBitOffset = 1124 TD.getStructLayout(STy)->getElementOffset(Idx)*8; 1125 1126 NewOffset += EltBitOffset; 1127 } else { 1128 assert(0 && "Unsupported operation!"); 1129 abort(); 1130 } 1131 } else { 1132 assert(0 && "Unsupported operation!"); 1133 abort(); 1134 } 1135 ConvertUsesToScalar(GEP, NewAI, NewOffset); 1136 GEP->eraseFromParent(); 1137 } else { 1138 assert(0 && "Unsupported operation!"); 1139 abort(); 1140 } 1141 } 1142} 1143 1144 1145/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to 1146/// some part of a constant global variable. This intentionally only accepts 1147/// constant expressions because we don't can't rewrite arbitrary instructions. 1148static bool PointsToConstantGlobal(Value *V) { 1149 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 1150 return GV->isConstant(); 1151 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 1152 if (CE->getOpcode() == Instruction::BitCast || 1153 CE->getOpcode() == Instruction::GetElementPtr) 1154 return PointsToConstantGlobal(CE->getOperand(0)); 1155 return false; 1156} 1157 1158/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 1159/// pointer to an alloca. Ignore any reads of the pointer, return false if we 1160/// see any stores or other unknown uses. If we see pointer arithmetic, keep 1161/// track of whether it moves the pointer (with isOffset) but otherwise traverse 1162/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 1163/// the alloca, and if the source pointer is a pointer to a constant global, we 1164/// can optimize this. 1165static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, 1166 bool isOffset) { 1167 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1168 if (isa<LoadInst>(*UI)) { 1169 // Ignore loads, they are always ok. 1170 continue; 1171 } 1172 if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) { 1173 // If uses of the bitcast are ok, we are ok. 1174 if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) 1175 return false; 1176 continue; 1177 } 1178 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { 1179 // If the GEP has all zero indices, it doesn't offset the pointer. If it 1180 // doesn't, it does. 1181 if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, 1182 isOffset || !GEP->hasAllZeroIndices())) 1183 return false; 1184 continue; 1185 } 1186 1187 // If this is isn't our memcpy/memmove, reject it as something we can't 1188 // handle. 1189 if (!isa<MemCpyInst>(*UI) && !isa<MemMoveInst>(*UI)) 1190 return false; 1191 1192 // If we already have seen a copy, reject the second one. 1193 if (TheCopy) return false; 1194 1195 // If the pointer has been offset from the start of the alloca, we can't 1196 // safely handle this. 1197 if (isOffset) return false; 1198 1199 // If the memintrinsic isn't using the alloca as the dest, reject it. 1200 if (UI.getOperandNo() != 1) return false; 1201 1202 MemIntrinsic *MI = cast<MemIntrinsic>(*UI); 1203 1204 // If the source of the memcpy/move is not a constant global, reject it. 1205 if (!PointsToConstantGlobal(MI->getOperand(2))) 1206 return false; 1207 1208 // Otherwise, the transform is safe. Remember the copy instruction. 1209 TheCopy = MI; 1210 } 1211 return true; 1212} 1213 1214/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 1215/// modified by a copy from a constant global. If we can prove this, we can 1216/// replace any uses of the alloca with uses of the global directly. 1217Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocationInst *AI) { 1218 Instruction *TheCopy = 0; 1219 if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) 1220 return TheCopy; 1221 return 0; 1222} 1223