InstCombineLoadStoreAlloca.cpp revision 05d62537276197b3351d9887f4967590b6a3b901
1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the visit functions for load, store and alloca. 11// 12//===----------------------------------------------------------------------===// 13 14#include "InstCombine.h" 15#include "llvm/IntrinsicInst.h" 16#include "llvm/Target/TargetData.h" 17#include "llvm/Transforms/Utils/BasicBlockUtils.h" 18#include "llvm/Transforms/Utils/Local.h" 19#include "llvm/ADT/Statistic.h" 20using namespace llvm; 21 22STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 23 24Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 25 // Ensure that the alloca array size argument has type intptr_t, so that 26 // any casting is exposed early. 27 if (TD) { 28 const Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); 29 if (AI.getArraySize()->getType() != IntPtrTy) { 30 Value *V = Builder->CreateIntCast(AI.getArraySize(), 31 IntPtrTy, false); 32 AI.setOperand(0, V); 33 return &AI; 34 } 35 } 36 37 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 38 if (AI.isArrayAllocation()) { // Check C != 1 39 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 40 const Type *NewTy = 41 ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 42 assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); 43 AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); 44 New->setAlignment(AI.getAlignment()); 45 46 // Scan to the end of the allocation instructions, to skip over a block of 47 // allocas if possible...also skip interleaved debug info 48 // 49 BasicBlock::iterator It = New; 50 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 51 52 // Now that I is pointing to the first non-allocation-inst in the block, 53 // insert our getelementptr instruction... 54 // 55 Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); 56 Value *Idx[2]; 57 Idx[0] = NullIdx; 58 Idx[1] = NullIdx; 59 Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, 60 New->getName()+".sub", It); 61 62 // Now make everything use the getelementptr instead of the original 63 // allocation. 64 return ReplaceInstUsesWith(AI, V); 65 } else if (isa<UndefValue>(AI.getArraySize())) { 66 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 67 } 68 } 69 70 if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { 71 // If alloca'ing a zero byte object, replace the alloca with a null pointer. 72 // Note that we only do this for alloca's, because malloc should allocate 73 // and return a unique pointer, even for a zero byte allocation. 74 if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) 75 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 76 77 // If the alignment is 0 (unspecified), assign it the preferred alignment. 78 if (AI.getAlignment() == 0) 79 AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); 80 } 81 82 return 0; 83} 84 85 86/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 87static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 88 const TargetData *TD) { 89 User *CI = cast<User>(LI.getOperand(0)); 90 Value *CastOp = CI->getOperand(0); 91 92 const PointerType *DestTy = cast<PointerType>(CI->getType()); 93 const Type *DestPTy = DestTy->getElementType(); 94 if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 95 96 // If the address spaces don't match, don't eliminate the cast. 97 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 98 return 0; 99 100 const Type *SrcPTy = SrcTy->getElementType(); 101 102 if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 103 DestPTy->isVectorTy()) { 104 // If the source is an array, the code below will not succeed. Check to 105 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 106 // constants. 107 if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 108 if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 109 if (ASrcTy->getNumElements() != 0) { 110 Value *Idxs[2]; 111 Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); 112 Idxs[1] = Idxs[0]; 113 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); 114 SrcTy = cast<PointerType>(CastOp->getType()); 115 SrcPTy = SrcTy->getElementType(); 116 } 117 118 if (IC.getTargetData() && 119 (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 120 SrcPTy->isVectorTy()) && 121 // Do not allow turning this into a load of an integer, which is then 122 // casted to a pointer, this pessimizes pointer analysis a lot. 123 (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) && 124 IC.getTargetData()->getTypeSizeInBits(SrcPTy) == 125 IC.getTargetData()->getTypeSizeInBits(DestPTy)) { 126 127 // Okay, we are casting from one integer or pointer type to another of 128 // the same size. Instead of casting the pointer before the load, cast 129 // the result of the loaded value. 130 LoadInst *NewLoad = 131 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 132 NewLoad->setAlignment(LI.getAlignment()); 133 // Now cast the result of the load. 134 return new BitCastInst(NewLoad, LI.getType()); 135 } 136 } 137 } 138 return 0; 139} 140 141Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 142 Value *Op = LI.getOperand(0); 143 144 // Attempt to improve the alignment. 145 if (TD) { 146 unsigned KnownAlign = 147 GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); 148 if (KnownAlign > 149 (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : 150 LI.getAlignment())) 151 LI.setAlignment(KnownAlign); 152 } 153 154 // load (cast X) --> cast (load X) iff safe. 155 if (isa<CastInst>(Op)) 156 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 157 return Res; 158 159 // None of the following transforms are legal for volatile loads. 160 if (LI.isVolatile()) return 0; 161 162 // Do really simple store-to-load forwarding and load CSE, to catch cases 163 // where there are several consequtive memory accesses to the same location, 164 // separated by a few arithmetic operations. 165 BasicBlock::iterator BBI = &LI; 166 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 167 return ReplaceInstUsesWith(LI, AvailableVal); 168 169 // load(gep null, ...) -> unreachable 170 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 171 const Value *GEPI0 = GEPI->getOperand(0); 172 // TODO: Consider a target hook for valid address spaces for this xform. 173 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 174 // Insert a new store to null instruction before the load to indicate 175 // that this code is not reachable. We do this instead of inserting 176 // an unreachable instruction directly because we cannot modify the 177 // CFG. 178 new StoreInst(UndefValue::get(LI.getType()), 179 Constant::getNullValue(Op->getType()), &LI); 180 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 181 } 182 } 183 184 // load null/undef -> unreachable 185 // TODO: Consider a target hook for valid address spaces for this xform. 186 if (isa<UndefValue>(Op) || 187 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 188 // Insert a new store to null instruction before the load to indicate that 189 // this code is not reachable. We do this instead of inserting an 190 // unreachable instruction directly because we cannot modify the CFG. 191 new StoreInst(UndefValue::get(LI.getType()), 192 Constant::getNullValue(Op->getType()), &LI); 193 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 194 } 195 196 // Instcombine load (constantexpr_cast global) -> cast (load global) 197 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 198 if (CE->isCast()) 199 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 200 return Res; 201 202 if (Op->hasOneUse()) { 203 // Change select and PHI nodes to select values instead of addresses: this 204 // helps alias analysis out a lot, allows many others simplifications, and 205 // exposes redundancy in the code. 206 // 207 // Note that we cannot do the transformation unless we know that the 208 // introduced loads cannot trap! Something like this is valid as long as 209 // the condition is always false: load (select bool %C, int* null, int* %G), 210 // but it would not be valid if we transformed it to load from null 211 // unconditionally. 212 // 213 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 214 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 215 unsigned Align = LI.getAlignment(); 216 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && 217 isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { 218 LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 219 SI->getOperand(1)->getName()+".val"); 220 LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 221 SI->getOperand(2)->getName()+".val"); 222 V1->setAlignment(Align); 223 V2->setAlignment(Align); 224 return SelectInst::Create(SI->getCondition(), V1, V2); 225 } 226 227 // load (select (cond, null, P)) -> load P 228 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 229 if (C->isNullValue()) { 230 LI.setOperand(0, SI->getOperand(2)); 231 return &LI; 232 } 233 234 // load (select (cond, P, null)) -> load P 235 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 236 if (C->isNullValue()) { 237 LI.setOperand(0, SI->getOperand(1)); 238 return &LI; 239 } 240 } 241 } 242 return 0; 243} 244 245/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 246/// when possible. This makes it generally easy to do alias analysis and/or 247/// SROA/mem2reg of the memory object. 248static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 249 User *CI = cast<User>(SI.getOperand(1)); 250 Value *CastOp = CI->getOperand(0); 251 252 const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); 253 const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 254 if (SrcTy == 0) return 0; 255 256 const Type *SrcPTy = SrcTy->getElementType(); 257 258 if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 259 return 0; 260 261 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 262 /// to its first element. This allows us to handle things like: 263 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 264 /// on 32-bit hosts. 265 SmallVector<Value*, 4> NewGEPIndices; 266 267 // If the source is an array, the code below will not succeed. Check to 268 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 269 // constants. 270 if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 271 // Index through pointer. 272 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 273 NewGEPIndices.push_back(Zero); 274 275 while (1) { 276 if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) { 277 if (!STy->getNumElements()) /* Struct can be empty {} */ 278 break; 279 NewGEPIndices.push_back(Zero); 280 SrcPTy = STy->getElementType(0); 281 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 282 NewGEPIndices.push_back(Zero); 283 SrcPTy = ATy->getElementType(); 284 } else { 285 break; 286 } 287 } 288 289 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 290 } 291 292 if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 293 return 0; 294 295 // If the pointers point into different address spaces or if they point to 296 // values with different sizes, we can't do the transformation. 297 if (!IC.getTargetData() || 298 SrcTy->getAddressSpace() != 299 cast<PointerType>(CI->getType())->getAddressSpace() || 300 IC.getTargetData()->getTypeSizeInBits(SrcPTy) != 301 IC.getTargetData()->getTypeSizeInBits(DestPTy)) 302 return 0; 303 304 // Okay, we are casting from one integer or pointer type to another of 305 // the same size. Instead of casting the pointer before 306 // the store, cast the value to be stored. 307 Value *NewCast; 308 Value *SIOp0 = SI.getOperand(0); 309 Instruction::CastOps opcode = Instruction::BitCast; 310 const Type* CastSrcTy = SIOp0->getType(); 311 const Type* CastDstTy = SrcPTy; 312 if (CastDstTy->isPointerTy()) { 313 if (CastSrcTy->isIntegerTy()) 314 opcode = Instruction::IntToPtr; 315 } else if (CastDstTy->isIntegerTy()) { 316 if (SIOp0->getType()->isPointerTy()) 317 opcode = Instruction::PtrToInt; 318 } 319 320 // SIOp0 is a pointer to aggregate and this is a store to the first field, 321 // emit a GEP to index into its first field. 322 if (!NewGEPIndices.empty()) 323 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), 324 NewGEPIndices.end()); 325 326 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 327 SIOp0->getName()+".c"); 328 return new StoreInst(NewCast, CastOp); 329} 330 331/// equivalentAddressValues - Test if A and B will obviously have the same 332/// value. This includes recognizing that %t0 and %t1 will have the same 333/// value in code like this: 334/// %t0 = getelementptr \@a, 0, 3 335/// store i32 0, i32* %t0 336/// %t1 = getelementptr \@a, 0, 3 337/// %t2 = load i32* %t1 338/// 339static bool equivalentAddressValues(Value *A, Value *B) { 340 // Test if the values are trivially equivalent. 341 if (A == B) return true; 342 343 // Test if the values come form identical arithmetic instructions. 344 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 345 // its only used to compare two uses within the same basic block, which 346 // means that they'll always either have the same value or one of them 347 // will have an undefined value. 348 if (isa<BinaryOperator>(A) || 349 isa<CastInst>(A) || 350 isa<PHINode>(A) || 351 isa<GetElementPtrInst>(A)) 352 if (Instruction *BI = dyn_cast<Instruction>(B)) 353 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 354 return true; 355 356 // Otherwise they may not be equivalent. 357 return false; 358} 359 360// If this instruction has two uses, one of which is a llvm.dbg.declare, 361// return the llvm.dbg.declare. 362DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { 363 if (!V->hasNUses(2)) 364 return 0; 365 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); 366 UI != E; ++UI) { 367 if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI)) 368 return DI; 369 if (isa<BitCastInst>(UI) && UI->hasOneUse()) { 370 if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin())) 371 return DI; 372 } 373 } 374 return 0; 375} 376 377Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 378 Value *Val = SI.getOperand(0); 379 Value *Ptr = SI.getOperand(1); 380 381 // If the RHS is an alloca with a single use, zapify the store, making the 382 // alloca dead. 383 // If the RHS is an alloca with a two uses, the other one being a 384 // llvm.dbg.declare, zapify the store and the declare, making the 385 // alloca dead. We must do this to prevent declares from affecting 386 // codegen. 387 if (!SI.isVolatile()) { 388 if (Ptr->hasOneUse()) { 389 if (isa<AllocaInst>(Ptr)) 390 return EraseInstFromFunction(SI); 391 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 392 if (isa<AllocaInst>(GEP->getOperand(0))) { 393 if (GEP->getOperand(0)->hasOneUse()) 394 return EraseInstFromFunction(SI); 395 if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { 396 EraseInstFromFunction(*DI); 397 return EraseInstFromFunction(SI); 398 } 399 } 400 } 401 } 402 if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { 403 EraseInstFromFunction(*DI); 404 return EraseInstFromFunction(SI); 405 } 406 } 407 408 // Attempt to improve the alignment. 409 if (TD) { 410 unsigned KnownAlign = 411 GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); 412 if (KnownAlign > 413 (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : 414 SI.getAlignment())) 415 SI.setAlignment(KnownAlign); 416 } 417 418 // Do really simple DSE, to catch cases where there are several consecutive 419 // stores to the same location, separated by a few arithmetic operations. This 420 // situation often occurs with bitfield accesses. 421 BasicBlock::iterator BBI = &SI; 422 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 423 --ScanInsts) { 424 --BBI; 425 // Don't count debug info directives, lest they affect codegen, 426 // and we skip pointer-to-pointer bitcasts, which are NOPs. 427 if (isa<DbgInfoIntrinsic>(BBI) || 428 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 429 ScanInsts++; 430 continue; 431 } 432 433 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 434 // Prev store isn't volatile, and stores to the same location? 435 if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), 436 SI.getOperand(1))) { 437 ++NumDeadStore; 438 ++BBI; 439 EraseInstFromFunction(*PrevSI); 440 continue; 441 } 442 break; 443 } 444 445 // If this is a load, we have to stop. However, if the loaded value is from 446 // the pointer we're loading and is producing the pointer we're storing, 447 // then *this* store is dead (X = load P; store X -> P). 448 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 449 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 450 !SI.isVolatile()) 451 return EraseInstFromFunction(SI); 452 453 // Otherwise, this is a load from some other location. Stores before it 454 // may not be dead. 455 break; 456 } 457 458 // Don't skip over loads or things that can modify memory. 459 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 460 break; 461 } 462 463 464 if (SI.isVolatile()) return 0; // Don't hack volatile stores. 465 466 // store X, null -> turns into 'unreachable' in SimplifyCFG 467 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 468 if (!isa<UndefValue>(Val)) { 469 SI.setOperand(0, UndefValue::get(Val->getType())); 470 if (Instruction *U = dyn_cast<Instruction>(Val)) 471 Worklist.Add(U); // Dropped a use. 472 } 473 return 0; // Do not modify these! 474 } 475 476 // store undef, Ptr -> noop 477 if (isa<UndefValue>(Val)) 478 return EraseInstFromFunction(SI); 479 480 // If the pointer destination is a cast, see if we can fold the cast into the 481 // source instead. 482 if (isa<CastInst>(Ptr)) 483 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 484 return Res; 485 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 486 if (CE->isCast()) 487 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 488 return Res; 489 490 491 // If this store is the last instruction in the basic block (possibly 492 // excepting debug info instructions), and if the block ends with an 493 // unconditional branch, try to move it to the successor block. 494 BBI = &SI; 495 do { 496 ++BBI; 497 } while (isa<DbgInfoIntrinsic>(BBI) || 498 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 499 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 500 if (BI->isUnconditional()) 501 if (SimplifyStoreAtEndOfBlock(SI)) 502 return 0; // xform done! 503 504 return 0; 505} 506 507/// SimplifyStoreAtEndOfBlock - Turn things like: 508/// if () { *P = v1; } else { *P = v2 } 509/// into a phi node with a store in the successor. 510/// 511/// Simplify things like: 512/// *P = v1; if () { *P = v2; } 513/// into a phi node with a store in the successor. 514/// 515bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 516 BasicBlock *StoreBB = SI.getParent(); 517 518 // Check to see if the successor block has exactly two incoming edges. If 519 // so, see if the other predecessor contains a store to the same location. 520 // if so, insert a PHI node (if needed) and move the stores down. 521 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 522 523 // Determine whether Dest has exactly two predecessors and, if so, compute 524 // the other predecessor. 525 pred_iterator PI = pred_begin(DestBB); 526 BasicBlock *OtherBB = 0; 527 if (*PI != StoreBB) 528 OtherBB = *PI; 529 ++PI; 530 if (PI == pred_end(DestBB)) 531 return false; 532 533 if (*PI != StoreBB) { 534 if (OtherBB) 535 return false; 536 OtherBB = *PI; 537 } 538 if (++PI != pred_end(DestBB)) 539 return false; 540 541 // Bail out if all the relevant blocks aren't distinct (this can happen, 542 // for example, if SI is in an infinite loop) 543 if (StoreBB == DestBB || OtherBB == DestBB) 544 return false; 545 546 // Verify that the other block ends in a branch and is not otherwise empty. 547 BasicBlock::iterator BBI = OtherBB->getTerminator(); 548 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 549 if (!OtherBr || BBI == OtherBB->begin()) 550 return false; 551 552 // If the other block ends in an unconditional branch, check for the 'if then 553 // else' case. there is an instruction before the branch. 554 StoreInst *OtherStore = 0; 555 if (OtherBr->isUnconditional()) { 556 --BBI; 557 // Skip over debugging info. 558 while (isa<DbgInfoIntrinsic>(BBI) || 559 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 560 if (BBI==OtherBB->begin()) 561 return false; 562 --BBI; 563 } 564 // If this isn't a store, isn't a store to the same location, or if the 565 // alignments differ, bail out. 566 OtherStore = dyn_cast<StoreInst>(BBI); 567 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 568 OtherStore->getAlignment() != SI.getAlignment()) 569 return false; 570 } else { 571 // Otherwise, the other block ended with a conditional branch. If one of the 572 // destinations is StoreBB, then we have the if/then case. 573 if (OtherBr->getSuccessor(0) != StoreBB && 574 OtherBr->getSuccessor(1) != StoreBB) 575 return false; 576 577 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 578 // if/then triangle. See if there is a store to the same ptr as SI that 579 // lives in OtherBB. 580 for (;; --BBI) { 581 // Check to see if we find the matching store. 582 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 583 if (OtherStore->getOperand(1) != SI.getOperand(1) || 584 OtherStore->getAlignment() != SI.getAlignment()) 585 return false; 586 break; 587 } 588 // If we find something that may be using or overwriting the stored 589 // value, or if we run out of instructions, we can't do the xform. 590 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 591 BBI == OtherBB->begin()) 592 return false; 593 } 594 595 // In order to eliminate the store in OtherBr, we have to 596 // make sure nothing reads or overwrites the stored value in 597 // StoreBB. 598 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 599 // FIXME: This should really be AA driven. 600 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 601 return false; 602 } 603 } 604 605 // Insert a PHI node now if we need it. 606 Value *MergedVal = OtherStore->getOperand(0); 607 if (MergedVal != SI.getOperand(0)) { 608 PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); 609 PN->reserveOperandSpace(2); 610 PN->addIncoming(SI.getOperand(0), SI.getParent()); 611 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 612 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 613 } 614 615 // Advance to a place where it is safe to insert the new store and 616 // insert it. 617 BBI = DestBB->getFirstNonPHI(); 618 InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), 619 OtherStore->isVolatile(), 620 SI.getAlignment()), *BBI); 621 622 // Nuke the old stores. 623 EraseInstFromFunction(SI); 624 EraseInstFromFunction(*OtherStore); 625 return true; 626} 627