1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the visit functions for load, store and alloca. 11// 12//===----------------------------------------------------------------------===// 13 14#include "InstCombineInternal.h" 15#include "llvm/ADT/Statistic.h" 16#include "llvm/Analysis/Loads.h" 17#include "llvm/IR/DataLayout.h" 18#include "llvm/IR/LLVMContext.h" 19#include "llvm/IR/IntrinsicInst.h" 20#include "llvm/IR/MDBuilder.h" 21#include "llvm/Transforms/Utils/BasicBlockUtils.h" 22#include "llvm/Transforms/Utils/Local.h" 23using namespace llvm; 24 25#define DEBUG_TYPE "instcombine" 26 27STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 28STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 29 30/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 31/// some part of a constant global variable. This intentionally only accepts 32/// constant expressions because we can't rewrite arbitrary instructions. 33static bool pointsToConstantGlobal(Value *V) { 34 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 35 return GV->isConstant(); 36 37 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 38 if (CE->getOpcode() == Instruction::BitCast || 39 CE->getOpcode() == Instruction::AddrSpaceCast || 40 CE->getOpcode() == Instruction::GetElementPtr) 41 return pointsToConstantGlobal(CE->getOperand(0)); 42 } 43 return false; 44} 45 46/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 47/// pointer to an alloca. Ignore any reads of the pointer, return false if we 48/// see any stores or other unknown uses. If we see pointer arithmetic, keep 49/// track of whether it moves the pointer (with IsOffset) but otherwise traverse 50/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 51/// the alloca, and if the source pointer is a pointer to a constant global, we 52/// can optimize this. 53static bool 54isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 55 SmallVectorImpl<Instruction *> &ToDelete) { 56 // We track lifetime intrinsics as we encounter them. If we decide to go 57 // ahead and replace the value with the global, this lets the caller quickly 58 // eliminate the markers. 59 60 SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect; 61 ValuesToInspect.push_back(std::make_pair(V, false)); 62 while (!ValuesToInspect.empty()) { 63 auto ValuePair = ValuesToInspect.pop_back_val(); 64 const bool IsOffset = ValuePair.second; 65 for (auto &U : ValuePair.first->uses()) { 66 Instruction *I = cast<Instruction>(U.getUser()); 67 68 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 69 // Ignore non-volatile loads, they are always ok. 70 if (!LI->isSimple()) return false; 71 continue; 72 } 73 74 if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { 75 // If uses of the bitcast are ok, we are ok. 76 ValuesToInspect.push_back(std::make_pair(I, IsOffset)); 77 continue; 78 } 79 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 80 // If the GEP has all zero indices, it doesn't offset the pointer. If it 81 // doesn't, it does. 82 ValuesToInspect.push_back( 83 std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices())); 84 continue; 85 } 86 87 if (auto CS = CallSite(I)) { 88 // If this is the function being called then we treat it like a load and 89 // ignore it. 90 if (CS.isCallee(&U)) 91 continue; 92 93 // Inalloca arguments are clobbered by the call. 94 unsigned ArgNo = CS.getArgumentNo(&U); 95 if (CS.isInAllocaArgument(ArgNo)) 96 return false; 97 98 // If this is a readonly/readnone call site, then we know it is just a 99 // load (but one that potentially returns the value itself), so we can 100 // ignore it if we know that the value isn't captured. 101 if (CS.onlyReadsMemory() && 102 (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 103 continue; 104 105 // If this is being passed as a byval argument, the caller is making a 106 // copy, so it is only a read of the alloca. 107 if (CS.isByValArgument(ArgNo)) 108 continue; 109 } 110 111 // Lifetime intrinsics can be handled by the caller. 112 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 113 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 114 II->getIntrinsicID() == Intrinsic::lifetime_end) { 115 assert(II->use_empty() && "Lifetime markers have no result to use!"); 116 ToDelete.push_back(II); 117 continue; 118 } 119 } 120 121 // If this is isn't our memcpy/memmove, reject it as something we can't 122 // handle. 123 MemTransferInst *MI = dyn_cast<MemTransferInst>(I); 124 if (!MI) 125 return false; 126 127 // If the transfer is using the alloca as a source of the transfer, then 128 // ignore it since it is a load (unless the transfer is volatile). 129 if (U.getOperandNo() == 1) { 130 if (MI->isVolatile()) return false; 131 continue; 132 } 133 134 // If we already have seen a copy, reject the second one. 135 if (TheCopy) return false; 136 137 // If the pointer has been offset from the start of the alloca, we can't 138 // safely handle this. 139 if (IsOffset) return false; 140 141 // If the memintrinsic isn't using the alloca as the dest, reject it. 142 if (U.getOperandNo() != 0) return false; 143 144 // If the source of the memcpy/move is not a constant global, reject it. 145 if (!pointsToConstantGlobal(MI->getSource())) 146 return false; 147 148 // Otherwise, the transform is safe. Remember the copy instruction. 149 TheCopy = MI; 150 } 151 } 152 return true; 153} 154 155/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 156/// modified by a copy from a constant global. If we can prove this, we can 157/// replace any uses of the alloca with uses of the global directly. 158static MemTransferInst * 159isOnlyCopiedFromConstantGlobal(AllocaInst *AI, 160 SmallVectorImpl<Instruction *> &ToDelete) { 161 MemTransferInst *TheCopy = nullptr; 162 if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 163 return TheCopy; 164 return nullptr; 165} 166 167static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { 168 // Check for array size of 1 (scalar allocation). 169 if (!AI.isArrayAllocation()) { 170 // i32 1 is the canonical array size for scalar allocations. 171 if (AI.getArraySize()->getType()->isIntegerTy(32)) 172 return nullptr; 173 174 // Canonicalize it. 175 Value *V = IC.Builder->getInt32(1); 176 AI.setOperand(0, V); 177 return &AI; 178 } 179 180 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 181 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 182 Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 183 AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName()); 184 New->setAlignment(AI.getAlignment()); 185 186 // Scan to the end of the allocation instructions, to skip over a block of 187 // allocas if possible...also skip interleaved debug info 188 // 189 BasicBlock::iterator It = New; 190 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) 191 ++It; 192 193 // Now that I is pointing to the first non-allocation-inst in the block, 194 // insert our getelementptr instruction... 195 // 196 Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); 197 Value *NullIdx = Constant::getNullValue(IdxTy); 198 Value *Idx[2] = {NullIdx, NullIdx}; 199 Instruction *GEP = 200 GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); 201 IC.InsertNewInstBefore(GEP, *It); 202 203 // Now make everything use the getelementptr instead of the original 204 // allocation. 205 return IC.ReplaceInstUsesWith(AI, GEP); 206 } 207 208 if (isa<UndefValue>(AI.getArraySize())) 209 return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 210 211 // Ensure that the alloca array size argument has type intptr_t, so that 212 // any casting is exposed early. 213 Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); 214 if (AI.getArraySize()->getType() != IntPtrTy) { 215 Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); 216 AI.setOperand(0, V); 217 return &AI; 218 } 219 220 return nullptr; 221} 222 223Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 224 if (auto *I = simplifyAllocaArraySize(*this, AI)) 225 return I; 226 227 if (AI.getAllocatedType()->isSized()) { 228 // If the alignment is 0 (unspecified), assign it the preferred alignment. 229 if (AI.getAlignment() == 0) 230 AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType())); 231 232 // Move all alloca's of zero byte objects to the entry block and merge them 233 // together. Note that we only do this for alloca's, because malloc should 234 // allocate and return a unique pointer, even for a zero byte allocation. 235 if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) { 236 // For a zero sized alloca there is no point in doing an array allocation. 237 // This is helpful if the array size is a complicated expression not used 238 // elsewhere. 239 if (AI.isArrayAllocation()) { 240 AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 241 return &AI; 242 } 243 244 // Get the first instruction in the entry block. 245 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 246 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 247 if (FirstInst != &AI) { 248 // If the entry block doesn't start with a zero-size alloca then move 249 // this one to the start of the entry block. There is no problem with 250 // dominance as the array size was forced to a constant earlier already. 251 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 252 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 253 DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 254 AI.moveBefore(FirstInst); 255 return &AI; 256 } 257 258 // If the alignment of the entry block alloca is 0 (unspecified), 259 // assign it the preferred alignment. 260 if (EntryAI->getAlignment() == 0) 261 EntryAI->setAlignment( 262 DL.getPrefTypeAlignment(EntryAI->getAllocatedType())); 263 // Replace this zero-sized alloca with the one at the start of the entry 264 // block after ensuring that the address will be aligned enough for both 265 // types. 266 unsigned MaxAlign = std::max(EntryAI->getAlignment(), 267 AI.getAlignment()); 268 EntryAI->setAlignment(MaxAlign); 269 if (AI.getType() != EntryAI->getType()) 270 return new BitCastInst(EntryAI, AI.getType()); 271 return ReplaceInstUsesWith(AI, EntryAI); 272 } 273 } 274 } 275 276 if (AI.getAlignment()) { 277 // Check to see if this allocation is only modified by a memcpy/memmove from 278 // a constant global whose alignment is equal to or exceeds that of the 279 // allocation. If this is the case, we can change all users to use 280 // the constant global instead. This is commonly produced by the CFE by 281 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 282 // is only subsequently read. 283 SmallVector<Instruction *, 4> ToDelete; 284 if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 285 unsigned SourceAlign = getOrEnforceKnownAlignment( 286 Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT); 287 if (AI.getAlignment() <= SourceAlign) { 288 DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 289 DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 290 for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 291 EraseInstFromFunction(*ToDelete[i]); 292 Constant *TheSrc = cast<Constant>(Copy->getSource()); 293 Constant *Cast 294 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); 295 Instruction *NewI = ReplaceInstUsesWith(AI, Cast); 296 EraseInstFromFunction(*Copy); 297 ++NumGlobalCopies; 298 return NewI; 299 } 300 } 301 } 302 303 // At last, use the generic allocation site handler to aggressively remove 304 // unused allocas. 305 return visitAllocSite(AI); 306} 307 308/// \brief Helper to combine a load to a new type. 309/// 310/// This just does the work of combining a load to a new type. It handles 311/// metadata, etc., and returns the new instruction. The \c NewTy should be the 312/// loaded *value* type. This will convert it to a pointer, cast the operand to 313/// that pointer type, load it, etc. 314/// 315/// Note that this will create all of the instructions with whatever insert 316/// point the \c InstCombiner currently is using. 317static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy) { 318 Value *Ptr = LI.getPointerOperand(); 319 unsigned AS = LI.getPointerAddressSpace(); 320 SmallVector<std::pair<unsigned, MDNode *>, 8> MD; 321 LI.getAllMetadata(MD); 322 323 LoadInst *NewLoad = IC.Builder->CreateAlignedLoad( 324 IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)), 325 LI.getAlignment(), LI.getName()); 326 MDBuilder MDB(NewLoad->getContext()); 327 for (const auto &MDPair : MD) { 328 unsigned ID = MDPair.first; 329 MDNode *N = MDPair.second; 330 // Note, essentially every kind of metadata should be preserved here! This 331 // routine is supposed to clone a load instruction changing *only its type*. 332 // The only metadata it makes sense to drop is metadata which is invalidated 333 // when the pointer type changes. This should essentially never be the case 334 // in LLVM, but we explicitly switch over only known metadata to be 335 // conservatively correct. If you are adding metadata to LLVM which pertains 336 // to loads, you almost certainly want to add it here. 337 switch (ID) { 338 case LLVMContext::MD_dbg: 339 case LLVMContext::MD_tbaa: 340 case LLVMContext::MD_prof: 341 case LLVMContext::MD_fpmath: 342 case LLVMContext::MD_tbaa_struct: 343 case LLVMContext::MD_invariant_load: 344 case LLVMContext::MD_alias_scope: 345 case LLVMContext::MD_noalias: 346 case LLVMContext::MD_nontemporal: 347 case LLVMContext::MD_mem_parallel_loop_access: 348 // All of these directly apply. 349 NewLoad->setMetadata(ID, N); 350 break; 351 352 case LLVMContext::MD_nonnull: 353 // This only directly applies if the new type is also a pointer. 354 if (NewTy->isPointerTy()) { 355 NewLoad->setMetadata(ID, N); 356 break; 357 } 358 // If it's integral now, translate it to !range metadata. 359 if (NewTy->isIntegerTy()) { 360 auto *ITy = cast<IntegerType>(NewTy); 361 auto *NullInt = ConstantExpr::getPtrToInt( 362 ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy); 363 auto *NonNullInt = 364 ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); 365 NewLoad->setMetadata(LLVMContext::MD_range, 366 MDB.createRange(NonNullInt, NullInt)); 367 } 368 break; 369 370 case LLVMContext::MD_range: 371 // FIXME: It would be nice to propagate this in some way, but the type 372 // conversions make it hard. If the new type is a pointer, we could 373 // translate it to !nonnull metadata. 374 break; 375 } 376 } 377 return NewLoad; 378} 379 380/// \brief Combine a store to a new type. 381/// 382/// Returns the newly created store instruction. 383static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) { 384 Value *Ptr = SI.getPointerOperand(); 385 unsigned AS = SI.getPointerAddressSpace(); 386 SmallVector<std::pair<unsigned, MDNode *>, 8> MD; 387 SI.getAllMetadata(MD); 388 389 StoreInst *NewStore = IC.Builder->CreateAlignedStore( 390 V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), 391 SI.getAlignment()); 392 for (const auto &MDPair : MD) { 393 unsigned ID = MDPair.first; 394 MDNode *N = MDPair.second; 395 // Note, essentially every kind of metadata should be preserved here! This 396 // routine is supposed to clone a store instruction changing *only its 397 // type*. The only metadata it makes sense to drop is metadata which is 398 // invalidated when the pointer type changes. This should essentially 399 // never be the case in LLVM, but we explicitly switch over only known 400 // metadata to be conservatively correct. If you are adding metadata to 401 // LLVM which pertains to stores, you almost certainly want to add it 402 // here. 403 switch (ID) { 404 case LLVMContext::MD_dbg: 405 case LLVMContext::MD_tbaa: 406 case LLVMContext::MD_prof: 407 case LLVMContext::MD_fpmath: 408 case LLVMContext::MD_tbaa_struct: 409 case LLVMContext::MD_alias_scope: 410 case LLVMContext::MD_noalias: 411 case LLVMContext::MD_nontemporal: 412 case LLVMContext::MD_mem_parallel_loop_access: 413 // All of these directly apply. 414 NewStore->setMetadata(ID, N); 415 break; 416 417 case LLVMContext::MD_invariant_load: 418 case LLVMContext::MD_nonnull: 419 case LLVMContext::MD_range: 420 // These don't apply for stores. 421 break; 422 } 423 } 424 425 return NewStore; 426} 427 428/// \brief Combine loads to match the type of value their uses after looking 429/// through intervening bitcasts. 430/// 431/// The core idea here is that if the result of a load is used in an operation, 432/// we should load the type most conducive to that operation. For example, when 433/// loading an integer and converting that immediately to a pointer, we should 434/// instead directly load a pointer. 435/// 436/// However, this routine must never change the width of a load or the number of 437/// loads as that would introduce a semantic change. This combine is expected to 438/// be a semantic no-op which just allows loads to more closely model the types 439/// of their consuming operations. 440/// 441/// Currently, we also refuse to change the precise type used for an atomic load 442/// or a volatile load. This is debatable, and might be reasonable to change 443/// later. However, it is risky in case some backend or other part of LLVM is 444/// relying on the exact type loaded to select appropriate atomic operations. 445static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { 446 // FIXME: We could probably with some care handle both volatile and atomic 447 // loads here but it isn't clear that this is important. 448 if (!LI.isSimple()) 449 return nullptr; 450 451 if (LI.use_empty()) 452 return nullptr; 453 454 Type *Ty = LI.getType(); 455 const DataLayout &DL = IC.getDataLayout(); 456 457 // Try to canonicalize loads which are only ever stored to operate over 458 // integers instead of any other type. We only do this when the loaded type 459 // is sized and has a size exactly the same as its store size and the store 460 // size is a legal integer type. 461 if (!Ty->isIntegerTy() && Ty->isSized() && 462 DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) && 463 DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) { 464 if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) { 465 auto *SI = dyn_cast<StoreInst>(U); 466 return SI && SI->getPointerOperand() != &LI; 467 })) { 468 LoadInst *NewLoad = combineLoadToNewType( 469 IC, LI, 470 Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty))); 471 // Replace all the stores with stores of the newly loaded value. 472 for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { 473 auto *SI = cast<StoreInst>(*UI++); 474 IC.Builder->SetInsertPoint(SI); 475 combineStoreToNewValue(IC, *SI, NewLoad); 476 IC.EraseInstFromFunction(*SI); 477 } 478 assert(LI.use_empty() && "Failed to remove all users of the load!"); 479 // Return the old load so the combiner can delete it safely. 480 return &LI; 481 } 482 } 483 484 // Fold away bit casts of the loaded value by loading the desired type. 485 if (LI.hasOneUse()) 486 if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) { 487 LoadInst *NewLoad = combineLoadToNewType(IC, LI, BC->getDestTy()); 488 BC->replaceAllUsesWith(NewLoad); 489 IC.EraseInstFromFunction(*BC); 490 return &LI; 491 } 492 493 // FIXME: We should also canonicalize loads of vectors when their elements are 494 // cast to other types. 495 return nullptr; 496} 497 498// If we can determine that all possible objects pointed to by the provided 499// pointer value are, not only dereferenceable, but also definitively less than 500// or equal to the provided maximum size, then return true. Otherwise, return 501// false (constant global values and allocas fall into this category). 502// 503// FIXME: This should probably live in ValueTracking (or similar). 504static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, 505 const DataLayout &DL) { 506 SmallPtrSet<Value *, 4> Visited; 507 SmallVector<Value *, 4> Worklist(1, V); 508 509 do { 510 Value *P = Worklist.pop_back_val(); 511 P = P->stripPointerCasts(); 512 513 if (!Visited.insert(P).second) 514 continue; 515 516 if (SelectInst *SI = dyn_cast<SelectInst>(P)) { 517 Worklist.push_back(SI->getTrueValue()); 518 Worklist.push_back(SI->getFalseValue()); 519 continue; 520 } 521 522 if (PHINode *PN = dyn_cast<PHINode>(P)) { 523 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 524 Worklist.push_back(PN->getIncomingValue(i)); 525 continue; 526 } 527 528 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) { 529 if (GA->mayBeOverridden()) 530 return false; 531 Worklist.push_back(GA->getAliasee()); 532 continue; 533 } 534 535 // If we know how big this object is, and it is less than MaxSize, continue 536 // searching. Otherwise, return false. 537 if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) { 538 if (!AI->getAllocatedType()->isSized()) 539 return false; 540 541 ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize()); 542 if (!CS) 543 return false; 544 545 uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType()); 546 // Make sure that, even if the multiplication below would wrap as an 547 // uint64_t, we still do the right thing. 548 if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize)) 549 return false; 550 continue; 551 } 552 553 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { 554 if (!GV->hasDefinitiveInitializer() || !GV->isConstant()) 555 return false; 556 557 uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType()); 558 if (InitSize > MaxSize) 559 return false; 560 continue; 561 } 562 563 return false; 564 } while (!Worklist.empty()); 565 566 return true; 567} 568 569// If we're indexing into an object of a known size, and the outer index is 570// not a constant, but having any value but zero would lead to undefined 571// behavior, replace it with zero. 572// 573// For example, if we have: 574// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4 575// ... 576// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x 577// ... = load i32* %arrayidx, align 4 578// Then we know that we can replace %x in the GEP with i64 0. 579// 580// FIXME: We could fold any GEP index to zero that would cause UB if it were 581// not zero. Currently, we only handle the first such index. Also, we could 582// also search through non-zero constant indices if we kept track of the 583// offsets those indices implied. 584static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, 585 Instruction *MemI, unsigned &Idx) { 586 if (GEPI->getNumOperands() < 2) 587 return false; 588 589 // Find the first non-zero index of a GEP. If all indices are zero, return 590 // one past the last index. 591 auto FirstNZIdx = [](const GetElementPtrInst *GEPI) { 592 unsigned I = 1; 593 for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) { 594 Value *V = GEPI->getOperand(I); 595 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) 596 if (CI->isZero()) 597 continue; 598 599 break; 600 } 601 602 return I; 603 }; 604 605 // Skip through initial 'zero' indices, and find the corresponding pointer 606 // type. See if the next index is not a constant. 607 Idx = FirstNZIdx(GEPI); 608 if (Idx == GEPI->getNumOperands()) 609 return false; 610 if (isa<Constant>(GEPI->getOperand(Idx))) 611 return false; 612 613 SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx); 614 Type *AllocTy = GetElementPtrInst::getIndexedType( 615 cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType()) 616 ->getElementType(), 617 Ops); 618 if (!AllocTy || !AllocTy->isSized()) 619 return false; 620 const DataLayout &DL = IC.getDataLayout(); 621 uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy); 622 623 // If there are more indices after the one we might replace with a zero, make 624 // sure they're all non-negative. If any of them are negative, the overall 625 // address being computed might be before the base address determined by the 626 // first non-zero index. 627 auto IsAllNonNegative = [&]() { 628 for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) { 629 bool KnownNonNegative, KnownNegative; 630 IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative, 631 KnownNegative, 0, MemI); 632 if (KnownNonNegative) 633 continue; 634 return false; 635 } 636 637 return true; 638 }; 639 640 // FIXME: If the GEP is not inbounds, and there are extra indices after the 641 // one we'll replace, those could cause the address computation to wrap 642 // (rendering the IsAllNonNegative() check below insufficient). We can do 643 // better, ignoring zero indicies (and other indicies we can prove small 644 // enough not to wrap). 645 if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds()) 646 return false; 647 648 // Note that isObjectSizeLessThanOrEq will return true only if the pointer is 649 // also known to be dereferenceable. 650 return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) && 651 IsAllNonNegative(); 652} 653 654// If we're indexing into an object with a variable index for the memory 655// access, but the object has only one element, we can assume that the index 656// will always be zero. If we replace the GEP, return it. 657template <typename T> 658static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, 659 T &MemI) { 660 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) { 661 unsigned Idx; 662 if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) { 663 Instruction *NewGEPI = GEPI->clone(); 664 NewGEPI->setOperand(Idx, 665 ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0)); 666 NewGEPI->insertBefore(GEPI); 667 MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI); 668 return NewGEPI; 669 } 670 } 671 672 return nullptr; 673} 674 675Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 676 Value *Op = LI.getOperand(0); 677 678 // Try to canonicalize the loaded type. 679 if (Instruction *Res = combineLoadToOperationType(*this, LI)) 680 return Res; 681 682 // Attempt to improve the alignment. 683 unsigned KnownAlign = getOrEnforceKnownAlignment( 684 Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT); 685 unsigned LoadAlign = LI.getAlignment(); 686 unsigned EffectiveLoadAlign = 687 LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); 688 689 if (KnownAlign > EffectiveLoadAlign) 690 LI.setAlignment(KnownAlign); 691 else if (LoadAlign == 0) 692 LI.setAlignment(EffectiveLoadAlign); 693 694 // Replace GEP indices if possible. 695 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) { 696 Worklist.Add(NewGEPI); 697 return &LI; 698 } 699 700 // None of the following transforms are legal for volatile/atomic loads. 701 // FIXME: Some of it is okay for atomic loads; needs refactoring. 702 if (!LI.isSimple()) return nullptr; 703 704 // Do really simple store-to-load forwarding and load CSE, to catch cases 705 // where there are several consecutive memory accesses to the same location, 706 // separated by a few arithmetic operations. 707 BasicBlock::iterator BBI = &LI; 708 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 709 return ReplaceInstUsesWith( 710 LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), 711 LI.getName() + ".cast")); 712 713 // load(gep null, ...) -> unreachable 714 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 715 const Value *GEPI0 = GEPI->getOperand(0); 716 // TODO: Consider a target hook for valid address spaces for this xform. 717 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 718 // Insert a new store to null instruction before the load to indicate 719 // that this code is not reachable. We do this instead of inserting 720 // an unreachable instruction directly because we cannot modify the 721 // CFG. 722 new StoreInst(UndefValue::get(LI.getType()), 723 Constant::getNullValue(Op->getType()), &LI); 724 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 725 } 726 } 727 728 // load null/undef -> unreachable 729 // TODO: Consider a target hook for valid address spaces for this xform. 730 if (isa<UndefValue>(Op) || 731 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 732 // Insert a new store to null instruction before the load to indicate that 733 // this code is not reachable. We do this instead of inserting an 734 // unreachable instruction directly because we cannot modify the CFG. 735 new StoreInst(UndefValue::get(LI.getType()), 736 Constant::getNullValue(Op->getType()), &LI); 737 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 738 } 739 740 if (Op->hasOneUse()) { 741 // Change select and PHI nodes to select values instead of addresses: this 742 // helps alias analysis out a lot, allows many others simplifications, and 743 // exposes redundancy in the code. 744 // 745 // Note that we cannot do the transformation unless we know that the 746 // introduced loads cannot trap! Something like this is valid as long as 747 // the condition is always false: load (select bool %C, int* null, int* %G), 748 // but it would not be valid if we transformed it to load from null 749 // unconditionally. 750 // 751 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 752 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 753 unsigned Align = LI.getAlignment(); 754 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) && 755 isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) { 756 LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 757 SI->getOperand(1)->getName()+".val"); 758 LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 759 SI->getOperand(2)->getName()+".val"); 760 V1->setAlignment(Align); 761 V2->setAlignment(Align); 762 return SelectInst::Create(SI->getCondition(), V1, V2); 763 } 764 765 // load (select (cond, null, P)) -> load P 766 if (isa<ConstantPointerNull>(SI->getOperand(1)) && 767 LI.getPointerAddressSpace() == 0) { 768 LI.setOperand(0, SI->getOperand(2)); 769 return &LI; 770 } 771 772 // load (select (cond, P, null)) -> load P 773 if (isa<ConstantPointerNull>(SI->getOperand(2)) && 774 LI.getPointerAddressSpace() == 0) { 775 LI.setOperand(0, SI->getOperand(1)); 776 return &LI; 777 } 778 } 779 } 780 return nullptr; 781} 782 783/// \brief Combine stores to match the type of value being stored. 784/// 785/// The core idea here is that the memory does not have any intrinsic type and 786/// where we can we should match the type of a store to the type of value being 787/// stored. 788/// 789/// However, this routine must never change the width of a store or the number of 790/// stores as that would introduce a semantic change. This combine is expected to 791/// be a semantic no-op which just allows stores to more closely model the types 792/// of their incoming values. 793/// 794/// Currently, we also refuse to change the precise type used for an atomic or 795/// volatile store. This is debatable, and might be reasonable to change later. 796/// However, it is risky in case some backend or other part of LLVM is relying 797/// on the exact type stored to select appropriate atomic operations. 798/// 799/// \returns true if the store was successfully combined away. This indicates 800/// the caller must erase the store instruction. We have to let the caller erase 801/// the store instruction sas otherwise there is no way to signal whether it was 802/// combined or not: IC.EraseInstFromFunction returns a null pointer. 803static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { 804 // FIXME: We could probably with some care handle both volatile and atomic 805 // stores here but it isn't clear that this is important. 806 if (!SI.isSimple()) 807 return false; 808 809 Value *V = SI.getValueOperand(); 810 811 // Fold away bit casts of the stored value by storing the original type. 812 if (auto *BC = dyn_cast<BitCastInst>(V)) { 813 V = BC->getOperand(0); 814 combineStoreToNewValue(IC, SI, V); 815 return true; 816 } 817 818 // FIXME: We should also canonicalize loads of vectors when their elements are 819 // cast to other types. 820 return false; 821} 822 823static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { 824 // FIXME: We could probably with some care handle both volatile and atomic 825 // stores here but it isn't clear that this is important. 826 if (!SI.isSimple()) 827 return false; 828 829 Value *V = SI.getValueOperand(); 830 Type *T = V->getType(); 831 832 if (!T->isAggregateType()) 833 return false; 834 835 if (StructType *ST = dyn_cast<StructType>(T)) { 836 // If the struct only have one element, we unpack. 837 if (ST->getNumElements() == 1) { 838 V = IC.Builder->CreateExtractValue(V, 0); 839 combineStoreToNewValue(IC, SI, V); 840 return true; 841 } 842 } 843 844 return false; 845} 846 847/// equivalentAddressValues - Test if A and B will obviously have the same 848/// value. This includes recognizing that %t0 and %t1 will have the same 849/// value in code like this: 850/// %t0 = getelementptr \@a, 0, 3 851/// store i32 0, i32* %t0 852/// %t1 = getelementptr \@a, 0, 3 853/// %t2 = load i32* %t1 854/// 855static bool equivalentAddressValues(Value *A, Value *B) { 856 // Test if the values are trivially equivalent. 857 if (A == B) return true; 858 859 // Test if the values come form identical arithmetic instructions. 860 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 861 // its only used to compare two uses within the same basic block, which 862 // means that they'll always either have the same value or one of them 863 // will have an undefined value. 864 if (isa<BinaryOperator>(A) || 865 isa<CastInst>(A) || 866 isa<PHINode>(A) || 867 isa<GetElementPtrInst>(A)) 868 if (Instruction *BI = dyn_cast<Instruction>(B)) 869 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 870 return true; 871 872 // Otherwise they may not be equivalent. 873 return false; 874} 875 876Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 877 Value *Val = SI.getOperand(0); 878 Value *Ptr = SI.getOperand(1); 879 880 // Try to canonicalize the stored type. 881 if (combineStoreToValueType(*this, SI)) 882 return EraseInstFromFunction(SI); 883 884 // Attempt to improve the alignment. 885 unsigned KnownAlign = getOrEnforceKnownAlignment( 886 Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT); 887 unsigned StoreAlign = SI.getAlignment(); 888 unsigned EffectiveStoreAlign = 889 StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); 890 891 if (KnownAlign > EffectiveStoreAlign) 892 SI.setAlignment(KnownAlign); 893 else if (StoreAlign == 0) 894 SI.setAlignment(EffectiveStoreAlign); 895 896 // Try to canonicalize the stored type. 897 if (unpackStoreToAggregate(*this, SI)) 898 return EraseInstFromFunction(SI); 899 900 // Replace GEP indices if possible. 901 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { 902 Worklist.Add(NewGEPI); 903 return &SI; 904 } 905 906 // Don't hack volatile/atomic stores. 907 // FIXME: Some bits are legal for atomic stores; needs refactoring. 908 if (!SI.isSimple()) return nullptr; 909 910 // If the RHS is an alloca with a single use, zapify the store, making the 911 // alloca dead. 912 if (Ptr->hasOneUse()) { 913 if (isa<AllocaInst>(Ptr)) 914 return EraseInstFromFunction(SI); 915 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 916 if (isa<AllocaInst>(GEP->getOperand(0))) { 917 if (GEP->getOperand(0)->hasOneUse()) 918 return EraseInstFromFunction(SI); 919 } 920 } 921 } 922 923 // Do really simple DSE, to catch cases where there are several consecutive 924 // stores to the same location, separated by a few arithmetic operations. This 925 // situation often occurs with bitfield accesses. 926 BasicBlock::iterator BBI = &SI; 927 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 928 --ScanInsts) { 929 --BBI; 930 // Don't count debug info directives, lest they affect codegen, 931 // and we skip pointer-to-pointer bitcasts, which are NOPs. 932 if (isa<DbgInfoIntrinsic>(BBI) || 933 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 934 ScanInsts++; 935 continue; 936 } 937 938 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 939 // Prev store isn't volatile, and stores to the same location? 940 if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 941 SI.getOperand(1))) { 942 ++NumDeadStore; 943 ++BBI; 944 EraseInstFromFunction(*PrevSI); 945 continue; 946 } 947 break; 948 } 949 950 // If this is a load, we have to stop. However, if the loaded value is from 951 // the pointer we're loading and is producing the pointer we're storing, 952 // then *this* store is dead (X = load P; store X -> P). 953 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 954 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 955 LI->isSimple()) 956 return EraseInstFromFunction(SI); 957 958 // Otherwise, this is a load from some other location. Stores before it 959 // may not be dead. 960 break; 961 } 962 963 // Don't skip over loads or things that can modify memory. 964 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 965 break; 966 } 967 968 // store X, null -> turns into 'unreachable' in SimplifyCFG 969 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 970 if (!isa<UndefValue>(Val)) { 971 SI.setOperand(0, UndefValue::get(Val->getType())); 972 if (Instruction *U = dyn_cast<Instruction>(Val)) 973 Worklist.Add(U); // Dropped a use. 974 } 975 return nullptr; // Do not modify these! 976 } 977 978 // store undef, Ptr -> noop 979 if (isa<UndefValue>(Val)) 980 return EraseInstFromFunction(SI); 981 982 // If this store is the last instruction in the basic block (possibly 983 // excepting debug info instructions), and if the block ends with an 984 // unconditional branch, try to move it to the successor block. 985 BBI = &SI; 986 do { 987 ++BBI; 988 } while (isa<DbgInfoIntrinsic>(BBI) || 989 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 990 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 991 if (BI->isUnconditional()) 992 if (SimplifyStoreAtEndOfBlock(SI)) 993 return nullptr; // xform done! 994 995 return nullptr; 996} 997 998/// SimplifyStoreAtEndOfBlock - Turn things like: 999/// if () { *P = v1; } else { *P = v2 } 1000/// into a phi node with a store in the successor. 1001/// 1002/// Simplify things like: 1003/// *P = v1; if () { *P = v2; } 1004/// into a phi node with a store in the successor. 1005/// 1006bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 1007 BasicBlock *StoreBB = SI.getParent(); 1008 1009 // Check to see if the successor block has exactly two incoming edges. If 1010 // so, see if the other predecessor contains a store to the same location. 1011 // if so, insert a PHI node (if needed) and move the stores down. 1012 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 1013 1014 // Determine whether Dest has exactly two predecessors and, if so, compute 1015 // the other predecessor. 1016 pred_iterator PI = pred_begin(DestBB); 1017 BasicBlock *P = *PI; 1018 BasicBlock *OtherBB = nullptr; 1019 1020 if (P != StoreBB) 1021 OtherBB = P; 1022 1023 if (++PI == pred_end(DestBB)) 1024 return false; 1025 1026 P = *PI; 1027 if (P != StoreBB) { 1028 if (OtherBB) 1029 return false; 1030 OtherBB = P; 1031 } 1032 if (++PI != pred_end(DestBB)) 1033 return false; 1034 1035 // Bail out if all the relevant blocks aren't distinct (this can happen, 1036 // for example, if SI is in an infinite loop) 1037 if (StoreBB == DestBB || OtherBB == DestBB) 1038 return false; 1039 1040 // Verify that the other block ends in a branch and is not otherwise empty. 1041 BasicBlock::iterator BBI = OtherBB->getTerminator(); 1042 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 1043 if (!OtherBr || BBI == OtherBB->begin()) 1044 return false; 1045 1046 // If the other block ends in an unconditional branch, check for the 'if then 1047 // else' case. there is an instruction before the branch. 1048 StoreInst *OtherStore = nullptr; 1049 if (OtherBr->isUnconditional()) { 1050 --BBI; 1051 // Skip over debugging info. 1052 while (isa<DbgInfoIntrinsic>(BBI) || 1053 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 1054 if (BBI==OtherBB->begin()) 1055 return false; 1056 --BBI; 1057 } 1058 // If this isn't a store, isn't a store to the same location, or is not the 1059 // right kind of store, bail out. 1060 OtherStore = dyn_cast<StoreInst>(BBI); 1061 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 1062 !SI.isSameOperationAs(OtherStore)) 1063 return false; 1064 } else { 1065 // Otherwise, the other block ended with a conditional branch. If one of the 1066 // destinations is StoreBB, then we have the if/then case. 1067 if (OtherBr->getSuccessor(0) != StoreBB && 1068 OtherBr->getSuccessor(1) != StoreBB) 1069 return false; 1070 1071 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 1072 // if/then triangle. See if there is a store to the same ptr as SI that 1073 // lives in OtherBB. 1074 for (;; --BBI) { 1075 // Check to see if we find the matching store. 1076 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 1077 if (OtherStore->getOperand(1) != SI.getOperand(1) || 1078 !SI.isSameOperationAs(OtherStore)) 1079 return false; 1080 break; 1081 } 1082 // If we find something that may be using or overwriting the stored 1083 // value, or if we run out of instructions, we can't do the xform. 1084 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 1085 BBI == OtherBB->begin()) 1086 return false; 1087 } 1088 1089 // In order to eliminate the store in OtherBr, we have to 1090 // make sure nothing reads or overwrites the stored value in 1091 // StoreBB. 1092 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 1093 // FIXME: This should really be AA driven. 1094 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 1095 return false; 1096 } 1097 } 1098 1099 // Insert a PHI node now if we need it. 1100 Value *MergedVal = OtherStore->getOperand(0); 1101 if (MergedVal != SI.getOperand(0)) { 1102 PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 1103 PN->addIncoming(SI.getOperand(0), SI.getParent()); 1104 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 1105 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 1106 } 1107 1108 // Advance to a place where it is safe to insert the new store and 1109 // insert it. 1110 BBI = DestBB->getFirstInsertionPt(); 1111 StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 1112 SI.isVolatile(), 1113 SI.getAlignment(), 1114 SI.getOrdering(), 1115 SI.getSynchScope()); 1116 InsertNewInstBefore(NewSI, *BBI); 1117 NewSI->setDebugLoc(OtherStore->getDebugLoc()); 1118 1119 // If the two stores had AA tags, merge them. 1120 AAMDNodes AATags; 1121 SI.getAAMetadata(AATags); 1122 if (AATags) { 1123 OtherStore->getAAMetadata(AATags, /* Merge = */ true); 1124 NewSI->setAAMetadata(AATags); 1125 } 1126 1127 // Nuke the old stores. 1128 EraseInstFromFunction(SI); 1129 EraseInstFromFunction(*OtherStore); 1130 return true; 1131} 1132