MemCpyOptimizer.cpp revision 5fdd6c8793462549e3593890ec61573da06e3346
1a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// 2a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 3a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// The LLVM Compiler Infrastructure 4a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 5a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// This file is distributed under the University of Illinois Open Source 6a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// License. See LICENSE.TXT for details. 7a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 8a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 9a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 10a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// This pass performs various transformations related to eliminating memcpy 11a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// calls, or transforming sets of stores into memset's. 12a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 13a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 14a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 15a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#define DEBUG_TYPE "memcpyopt" 16a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Transforms/Scalar.h" 17a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer#include "llvm/GlobalVariable.h" 18a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/IntrinsicInst.h" 19a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Instructions.h" 20a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/ADT/SmallVector.h" 21a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/ADT/Statistic.h" 22a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Analysis/Dominators.h" 23a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Analysis/AliasAnalysis.h" 24a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Analysis/MemoryDependenceAnalysis.h" 25bb89710dddf967199dfc56e8bf5d28b0003f2ee6Chris Lattner#include "llvm/Analysis/ValueTracking.h" 26b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner#include "llvm/Transforms/Utils/Local.h" 27a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Support/Debug.h" 28a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Support/GetElementPtrTypeIterator.h" 2961db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner#include "llvm/Support/IRBuilder.h" 30bdff548e4dd577a72094d57b282de4e765643b96Chris Lattner#include "llvm/Support/raw_ostream.h" 31a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Target/TargetData.h" 32149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner#include "llvm/Target/TargetLibraryInfo.h" 33a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include <list> 34a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonusing namespace llvm; 35a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 36a723d1e48f4a261512c28845c53eda569fa5218cOwen AndersonSTATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); 37a723d1e48f4a261512c28845c53eda569fa5218cOwen AndersonSTATISTIC(NumMemSetInfer, "Number of memsets inferred"); 3805cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan SandsSTATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); 39a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin KramerSTATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); 40a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 41a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonstatic int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, 4267a716ab818301fe28068754c879e568c44e62f8Chris Lattner bool &VariableIdxFound, const TargetData &TD){ 43a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Skip over the first indices. 44a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson gep_type_iterator GTI = gep_type_begin(GEP); 45a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = 1; i != Idx; ++i, ++GTI) 46a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /*skip along*/; 47a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 48a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Compute the offset implied by the rest of the indices. 49a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Offset = 0; 50a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { 51a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); 52a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (OpC == 0) 53a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return VariableIdxFound = true; 54a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (OpC->isZero()) continue; // No offset. 55a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 56a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Handle struct indices, which add their field offset to the pointer. 57a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 58a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 59a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson continue; 60a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 61a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 62a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Otherwise, we have a sequential type like an array or vector. Multiply 63a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the index by the ElementSize. 64777d2306b36816a53bc1ae1244c0dc7d998ae691Duncan Sands uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); 65a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Offset += Size*OpC->getSExtValue(); 66a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 67a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 68a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return Offset; 69a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 70a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 71a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a 72a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// constant offset, and return that constant offset. For example, Ptr1 might 73a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. 74a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonstatic bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, 7567a716ab818301fe28068754c879e568c44e62f8Chris Lattner const TargetData &TD) { 762d5c0cd197454408531cd53e4dd65a431e07ba6fChris Lattner Ptr1 = Ptr1->stripPointerCasts(); 772d5c0cd197454408531cd53e4dd65a431e07ba6fChris Lattner Ptr2 = Ptr2->stripPointerCasts(); 789fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1); 799fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2); 809fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner 819fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner bool VariableIdxFound = false; 829fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner 839fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner // If one pointer is a GEP and the other isn't, then see if the GEP is a 849fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner // constant offset from the base, as in "P" and "gep P, 1". 859fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { 869fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); 879fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner return !VariableIdxFound; 889fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner } 899fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner 909fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { 919fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); 929fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner return !VariableIdxFound; 939fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner } 949fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner 95a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical 96a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // base. After that base, they may have some number of common (and 97a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // potentially variable) indices. After that they handle some constant 98a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // offset, which determines their offset from each other. At this point, we 99a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // handle no other case. 100a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) 101a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 102a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 103a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Skip any common indices and track the GEP types. 104a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned Idx = 1; 105a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) 106a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) 107a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson break; 108a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 109a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); 110a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); 111a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (VariableIdxFound) return false; 112a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 113a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Offset = Offset2-Offset1; 114a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return true; 115a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 116a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 117a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 118a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value. 119a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// This allows us to analyze stores like: 120a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+1 121a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+0 122a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+3 123a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+2 124a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// which sometimes happens with stores to arrays of structs etc. When we see 125a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// the first store, we make a range [1, 2). The second store extends the range 126a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the 127a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// two ranges into [0, 3) which is memset'able. 128a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonnamespace { 129a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonstruct MemsetRange { 130a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Start/End - A semi range that describes the span that this range covers. 131a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // The range is closed at the start and open at the end: [Start, End). 132a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Start, End; 133a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 134a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// StartPtr - The getelementptr instruction that points to the start of the 135a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// range. 136a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Value *StartPtr; 137a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 138a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// Alignment - The known alignment of the first store. 139a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned Alignment; 140a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 141a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// TheStores - The actual stores that make up this range. 14206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner SmallVector<Instruction*, 16> TheStores; 143a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 144a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson bool isProfitableToUseMemset(const TargetData &TD) const; 145a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 146a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson}; 147a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} // end anon namespace 148a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 149a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonbool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { 150a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we found more than 8 stores to merge or 64 bytes, use memset. 151a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (TheStores.size() >= 8 || End-Start >= 64) return true; 15206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 15306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If there is nothing to merge, don't do anything. 15406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (TheStores.size() < 2) return false; 15506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 15606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If any of the stores are a memset, then it is always good to extend the 15706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // memset. 15806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner for (unsigned i = 0, e = TheStores.size(); i != e; ++i) 15906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (!isa<StoreInst>(TheStores[i])) 16006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner return true; 161a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 162a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Assume that the code generator is capable of merging pairs of stores 163a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // together if it wants to. 16406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (TheStores.size() == 2) return false; 165a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 166a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we have fewer than 8 stores, it can still be worthwhile to do this. 167a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // For example, merging 4 i8 stores into an i32 store is useful almost always. 168a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the 169a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memset will be split into 2 32-bit stores anyway) and doing so can 170a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // pessimize the llvm optimizer. 171a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 172a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Since we don't have perfect knowledge here, make some assumptions: assume 173a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the maximum GPR width is the same size as the pointer size and assume that 174a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // this width can be stored. If so, check to see whether we will end up 175a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // actually reducing the number of stores used. 176a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned Bytes = unsigned(End-Start); 177a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned NumPointerStores = Bytes/TD.getPointerSize(); 178a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 179a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Assume the remaining bytes if any are done a byte at a time. 180a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); 181a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 182a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we will reduce the # stores (according to this heuristic), do the 183a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 184a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // etc. 185a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return TheStores.size() > NumPointerStores+NumByteStores; 186a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 187a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 188a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 189a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonnamespace { 190a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonclass MemsetRanges { 191a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// Ranges - A sorted list of the memset ranges. We use std::list here 192a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// because each element is relatively large and expensive to copy. 193a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson std::list<MemsetRange> Ranges; 194a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson typedef std::list<MemsetRange>::iterator range_iterator; 19567a716ab818301fe28068754c879e568c44e62f8Chris Lattner const TargetData &TD; 196a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonpublic: 19767a716ab818301fe28068754c879e568c44e62f8Chris Lattner MemsetRanges(const TargetData &td) : TD(td) {} 198a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 199a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson typedef std::list<MemsetRange>::const_iterator const_iterator; 200a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson const_iterator begin() const { return Ranges.begin(); } 201a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson const_iterator end() const { return Ranges.end(); } 202a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson bool empty() const { return Ranges.empty(); } 203a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 20467a716ab818301fe28068754c879e568c44e62f8Chris Lattner void addInst(int64_t OffsetFromFirst, Instruction *Inst) { 20506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 20606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addStore(OffsetFromFirst, SI); 20706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner else 20806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); 20906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 21006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 21106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner void addStore(int64_t OffsetFromFirst, StoreInst *SI) { 21206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); 21306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 21406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addRange(OffsetFromFirst, StoreSize, 21506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner SI->getPointerOperand(), SI->getAlignment(), SI); 21606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 21706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 21806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { 21906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 22006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); 22167a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 22206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 22306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner void addRange(int64_t Start, int64_t Size, Value *Ptr, 22406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner unsigned Alignment, Instruction *Inst); 22506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 226a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson}; 227a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 228a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} // end anon namespace 229a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 230a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 23106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// addRange - Add a new store to the MemsetRanges data structure. This adds a 232a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// new range for the specified store at the specified offset, merging into 233a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// existing ranges as appropriate. 23406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// 23506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// Do a linear search of the ranges to see if this can be joined and/or to 23606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// find the insertion point in the list. We keep the ranges sorted for 23706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// simplicity here. This is a linear search of a linked list, which is ugly, 23806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// however the number of ranges is limited, so this won't get crazy slow. 23906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattnervoid MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, 24006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner unsigned Alignment, Instruction *Inst) { 24106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t End = Start+Size; 242a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson range_iterator I = Ranges.begin(), E = Ranges.end(); 243a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 244a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson while (I != E && Start > I->End) 245a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson ++I; 246a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 247a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // We now know that I == E, in which case we didn't find anything to merge 248a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // with, or that Start <= I->End. If End < I->Start or I == E, then we need 249a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // to insert a new range. Handle this now. 250a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (I == E || End < I->Start) { 251a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson MemsetRange &R = *Ranges.insert(I, MemsetRange()); 252a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson R.Start = Start; 253a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson R.End = End; 25406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner R.StartPtr = Ptr; 25506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner R.Alignment = Alignment; 25606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner R.TheStores.push_back(Inst); 257a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return; 258a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 25906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 260a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // This store overlaps with I, add it. 26106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner I->TheStores.push_back(Inst); 262a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 263a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // At this point, we may have an interval that completely contains our store. 264a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If so, just add it to the interval and return. 265a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (I->Start <= Start && I->End >= End) 266a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return; 267a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 268a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Now we know that Start <= I->End and End >= I->Start so the range overlaps 269a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // but is not entirely contained within the range. 270a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 271a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // See if the range extends the start of the range. In this case, it couldn't 272a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // possibly cause it to join the prior range, because otherwise we would have 273a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // stopped on *it*. 274a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (Start < I->Start) { 275a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->Start = Start; 27606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner I->StartPtr = Ptr; 27706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner I->Alignment = Alignment; 278a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 279a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 280a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Now we know that Start <= I->End and Start >= I->Start (so the startpoint 281a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // is in or right at the end of I), and that End >= I->Start. Extend I out to 282a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // End. 283a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (End > I->End) { 284a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->End = End; 2859c0f146d50ccc3ba780d4854b8e14422430013efNick Lewycky range_iterator NextI = I; 286a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson while (++NextI != E && End >= NextI->Start) { 287a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Merge the range in. 288a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); 289a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (NextI->End > I->End) 290a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->End = NextI->End; 291a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Ranges.erase(NextI); 292a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson NextI = I; 293a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 294a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 295a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 296a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 297a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 298a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// MemCpyOpt Pass 299a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 300a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 301a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonnamespace { 3023e8b6631e67e01e4960a7ba4668a50c596607473Chris Lattner class MemCpyOpt : public FunctionPass { 3032f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MemoryDependenceAnalysis *MD; 304149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner TargetLibraryInfo *TLI; 30567a716ab818301fe28068754c879e568c44e62f8Chris Lattner const TargetData *TD; 306a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson public: 307a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson static char ID; // Pass identification, replacement for typeid 308081c34b725980f995be9080eaec24cd3dfaaf065Owen Anderson MemCpyOpt() : FunctionPass(ID) { 309081c34b725980f995be9080eaec24cd3dfaaf065Owen Anderson initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); 3102f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD = 0; 311149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner TLI = 0; 312149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner TD = 0; 313081c34b725980f995be9080eaec24cd3dfaaf065Owen Anderson } 314a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 31567a716ab818301fe28068754c879e568c44e62f8Chris Lattner bool runOnFunction(Function &F); 31667a716ab818301fe28068754c879e568c44e62f8Chris Lattner 317a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson private: 318a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // This transformation requires dominator postdominator info 319a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson virtual void getAnalysisUsage(AnalysisUsage &AU) const { 320a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.setPreservesCFG(); 321a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.addRequired<DominatorTree>(); 322a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.addRequired<MemoryDependenceAnalysis>(); 323a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.addRequired<AliasAnalysis>(); 324149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner AU.addRequired<TargetLibraryInfo>(); 325a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.addPreserved<AliasAnalysis>(); 326a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.addPreserved<MemoryDependenceAnalysis>(); 327a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 328a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 329a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Helper fuctions 33061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); 331d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); 33261c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool processMemCpy(MemCpyInst *M); 333f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner bool processMemMove(MemMoveInst *M); 3346549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, 3356549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson uint64_t cpyLen, CallInst *C); 33643f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, 33743f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner uint64_t MSize); 3382f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner bool processByValArgument(CallSite CS, unsigned ArgNo); 33967a716ab818301fe28068754c879e568c44e62f8Chris Lattner Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, 34067a716ab818301fe28068754c879e568c44e62f8Chris Lattner Value *ByteVal); 34167a716ab818301fe28068754c879e568c44e62f8Chris Lattner 342a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson bool iterateOnFunction(Function &F); 343a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson }; 344a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 345a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson char MemCpyOpt::ID = 0; 346a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 347a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 348a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// createMemCpyOptPass - The public interface to this file... 349a723d1e48f4a261512c28845c53eda569fa5218cOwen AndersonFunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } 350a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 3512ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", 3522ab36d350293c77fc8941ce1023e4899df7e3a82Owen Anderson false, false) 3532ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_DEPENDENCY(DominatorTree) 3542ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 355149f5283f93ec85b96888c284f56099a72cc2731Chris LattnerINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 3562ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 3572ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", 3582ab36d350293c77fc8941ce1023e4899df7e3a82Owen Anderson false, false) 359a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 36067a716ab818301fe28068754c879e568c44e62f8Chris Lattner/// tryMergingIntoMemset - When scanning forward over instructions, we look for 361a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// some other patterns to fold away. In particular, this looks for stores to 362ab4c366274a582dd8146b2820c6b999cad5fce36Duncan Sands/// neighboring locations of memory. If it sees enough consecutive ones, it 36367a716ab818301fe28068754c879e568c44e62f8Chris Lattner/// attempts to merge them together into a memcpy/memset. 36467a716ab818301fe28068754c879e568c44e62f8Chris LattnerInstruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, 36567a716ab818301fe28068754c879e568c44e62f8Chris Lattner Value *StartPtr, Value *ByteVal) { 36667a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (TD == 0) return 0; 3676549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson 368a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Okay, so we now have a single store that can be splatable. Scan to find 369a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // all subsequent stores of the same value to offset from the same pointer. 370a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Join these together into ranges, so we can decide whether contiguous blocks 371a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // are stored. 3728942f9bb9f8bfb0d113db6d4a1ae7203dcf4510aDan Gohman MemsetRanges Ranges(*TD); 373a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 37467a716ab818301fe28068754c879e568c44e62f8Chris Lattner BasicBlock::iterator BI = StartInst; 375a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (++BI; !isa<TerminatorInst>(BI); ++BI) { 37606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { 37706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If the instruction is readnone, ignore it, otherwise bail out. We 37806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // don't even allow readonly here because we don't want something like: 379a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). 38006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) 38106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 38206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner continue; 38306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 38467a716ab818301fe28068754c879e568c44e62f8Chris Lattner 38506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { 38606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If this is a store, see if we can merge it in. 38706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (NextStore->isVolatile()) break; 38867a716ab818301fe28068754c879e568c44e62f8Chris Lattner 38906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // Check to see if this stored value is of the same byte-splattable value. 39006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) 39106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 39206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 39306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // Check to see if this store is to a constant offset from the start ptr. 39406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t Offset; 395f42685004c997a4a4728cbcd9e6be1ee1d6b418fChris Lattner if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), 396f42685004c997a4a4728cbcd9e6be1ee1d6b418fChris Lattner Offset, *TD)) 39706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 39806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 39906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner Ranges.addStore(Offset, NextStore); 40006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } else { 40106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner MemSetInst *MSI = cast<MemSetInst>(BI); 40206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 40306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (MSI->isVolatile() || ByteVal != MSI->getValue() || 40406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner !isa<ConstantInt>(MSI->getLength())) 40506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 40606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 40706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // Check to see if this store is to a constant offset from the start ptr. 40806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t Offset; 40906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD)) 41006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 41106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 41206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner Ranges.addMemSet(Offset, MSI); 41306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 414a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 41567a716ab818301fe28068754c879e568c44e62f8Chris Lattner 416a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we have no ranges, then we just had a single store with nothing that 417a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // could be merged in. This is a very common case of course. 418a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (Ranges.empty()) 41967a716ab818301fe28068754c879e568c44e62f8Chris Lattner return 0; 420a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 421a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we had at least one store that could be merged in, add the starting 422a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // store as well. We try to avoid this unless there is at least something 423a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // interesting as a small compile-time optimization. 42467a716ab818301fe28068754c879e568c44e62f8Chris Lattner Ranges.addInst(0, StartInst); 42567a716ab818301fe28068754c879e568c44e62f8Chris Lattner 42667a716ab818301fe28068754c879e568c44e62f8Chris Lattner // If we create any memsets, we put it right before the first instruction that 42767a716ab818301fe28068754c879e568c44e62f8Chris Lattner // isn't part of the memset block. This ensure that the memset is dominated 42867a716ab818301fe28068754c879e568c44e62f8Chris Lattner // by any addressing instruction needed by the start of the block. 42967a716ab818301fe28068754c879e568c44e62f8Chris Lattner IRBuilder<> Builder(BI); 43067a716ab818301fe28068754c879e568c44e62f8Chris Lattner 431a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Now that we have full information about ranges, loop over the ranges and 432a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // emit memset's for anything big enough to be worthwhile. 43367a716ab818301fe28068754c879e568c44e62f8Chris Lattner Instruction *AMemSet = 0; 434a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); 435a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I != E; ++I) { 436a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson const MemsetRange &Range = *I; 43767a716ab818301fe28068754c879e568c44e62f8Chris Lattner 438a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (Range.TheStores.size() == 1) continue; 439a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 440a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If it is profitable to lower this range to memset, do so now. 4418942f9bb9f8bfb0d113db6d4a1ae7203dcf4510aDan Gohman if (!Range.isProfitableToUseMemset(*TD)) 442a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson continue; 443a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 44467a716ab818301fe28068754c879e568c44e62f8Chris Lattner // Otherwise, we do want to transform this! Create a new memset. 445a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Get the starting pointer of the block. 446a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson StartPtr = Range.StartPtr; 44767a716ab818301fe28068754c879e568c44e62f8Chris Lattner 44820adc9dc4650313f017b27d9818eb2176238113dMon P Wang // Determine alignment 44920adc9dc4650313f017b27d9818eb2176238113dMon P Wang unsigned Alignment = Range.Alignment; 45020adc9dc4650313f017b27d9818eb2176238113dMon P Wang if (Alignment == 0) { 45120adc9dc4650313f017b27d9818eb2176238113dMon P Wang const Type *EltType = 45267a716ab818301fe28068754c879e568c44e62f8Chris Lattner cast<PointerType>(StartPtr->getType())->getElementType(); 45320adc9dc4650313f017b27d9818eb2176238113dMon P Wang Alignment = TD->getABITypeAlignment(EltType); 45420adc9dc4650313f017b27d9818eb2176238113dMon P Wang } 45567a716ab818301fe28068754c879e568c44e62f8Chris Lattner 45667a716ab818301fe28068754c879e568c44e62f8Chris Lattner AMemSet = 45761db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); 45861db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner 459cb33fd17cce475a1d47b2695e311b6934ad0ef86David Greene DEBUG(dbgs() << "Replace stores:\n"; 460a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) 4612f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner dbgs() << *Range.TheStores[i] << '\n'; 46267a716ab818301fe28068754c879e568c44e62f8Chris Lattner dbgs() << "With: " << *AMemSet << '\n'); 463b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel 464b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel if (!Range.TheStores.empty()) 465b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); 466b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel 467a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Zap all the stores. 46806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner for (SmallVector<Instruction*, 16>::const_iterator 469ff1e98c72ae5f2aa805112925fd5c06049aa8e79Chris Lattner SI = Range.TheStores.begin(), 4708a629577f89869f9810065dc61afd7e4302d3e46Chris Lattner SE = Range.TheStores.end(); SI != SE; ++SI) { 4718a629577f89869f9810065dc61afd7e4302d3e46Chris Lattner MD->removeInstruction(*SI); 472a8bd65835be9e1ce07f5006e92625ec4e9fa387aOwen Anderson (*SI)->eraseFromParent(); 4738a629577f89869f9810065dc61afd7e4302d3e46Chris Lattner } 474a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson ++NumMemSetInfer; 475a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 476a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 47767a716ab818301fe28068754c879e568c44e62f8Chris Lattner return AMemSet; 47867a716ab818301fe28068754c879e568c44e62f8Chris Lattner} 47967a716ab818301fe28068754c879e568c44e62f8Chris Lattner 48067a716ab818301fe28068754c879e568c44e62f8Chris Lattner 48167a716ab818301fe28068754c879e568c44e62f8Chris Lattnerbool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { 48267a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (SI->isVolatile()) return false; 48367a716ab818301fe28068754c879e568c44e62f8Chris Lattner 48467a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (TD == 0) return false; 48567a716ab818301fe28068754c879e568c44e62f8Chris Lattner 48667a716ab818301fe28068754c879e568c44e62f8Chris Lattner // Detect cases where we're performing call slot forwarding, but 48767a716ab818301fe28068754c879e568c44e62f8Chris Lattner // happen to be using a load-store pair to implement it, rather than 48867a716ab818301fe28068754c879e568c44e62f8Chris Lattner // a memcpy. 48967a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { 4905d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman if (!LI->isVolatile() && LI->hasOneUse() && 4915d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman LI->getParent() == SI->getParent()) { 49270d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman MemDepResult ldep = MD->getDependency(LI); 49367a716ab818301fe28068754c879e568c44e62f8Chris Lattner CallInst *C = 0; 49470d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) 49570d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman C = dyn_cast<CallInst>(ldep.getInst()); 49670d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman 49770d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman if (C) { 49870d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman // Check that nothing touches the dest of the "copy" between 49970d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman // the call and the store. 5005d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 5015d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman AliasAnalysis::Location StoreLoc = AA.getLocation(SI); 5025d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman for (BasicBlock::iterator I = --BasicBlock::iterator(SI), 5035d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman E = C; I != E; --I) { 5045d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { 50570d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman C = 0; 5065d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman break; 5075d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman } 50870d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman } 50970d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman } 51070d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman 51167a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (C) { 51267a716ab818301fe28068754c879e568c44e62f8Chris Lattner bool changed = performCallSlotOptzn(LI, 51367a716ab818301fe28068754c879e568c44e62f8Chris Lattner SI->getPointerOperand()->stripPointerCasts(), 51467a716ab818301fe28068754c879e568c44e62f8Chris Lattner LI->getPointerOperand()->stripPointerCasts(), 51567a716ab818301fe28068754c879e568c44e62f8Chris Lattner TD->getTypeStoreSize(SI->getOperand(0)->getType()), C); 51667a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (changed) { 51767a716ab818301fe28068754c879e568c44e62f8Chris Lattner MD->removeInstruction(SI); 51867a716ab818301fe28068754c879e568c44e62f8Chris Lattner SI->eraseFromParent(); 519f42685004c997a4a4728cbcd9e6be1ee1d6b418fChris Lattner MD->removeInstruction(LI); 52067a716ab818301fe28068754c879e568c44e62f8Chris Lattner LI->eraseFromParent(); 52167a716ab818301fe28068754c879e568c44e62f8Chris Lattner ++NumMemCpyInstr; 52267a716ab818301fe28068754c879e568c44e62f8Chris Lattner return true; 52367a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 52467a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 52567a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 52667a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 52767a716ab818301fe28068754c879e568c44e62f8Chris Lattner 52867a716ab818301fe28068754c879e568c44e62f8Chris Lattner // There are two cases that are interesting for this code to handle: memcpy 52967a716ab818301fe28068754c879e568c44e62f8Chris Lattner // and memset. Right now we only handle memset. 53067a716ab818301fe28068754c879e568c44e62f8Chris Lattner 53167a716ab818301fe28068754c879e568c44e62f8Chris Lattner // Ensure that the value being stored is something that can be memset'able a 53267a716ab818301fe28068754c879e568c44e62f8Chris Lattner // byte at a time like "0" or "-1" or any width, as well as things like 53367a716ab818301fe28068754c879e568c44e62f8Chris Lattner // 0xA0A0A0A0 and 0.0. 53467a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) 53567a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), 53667a716ab818301fe28068754c879e568c44e62f8Chris Lattner ByteVal)) { 53767a716ab818301fe28068754c879e568c44e62f8Chris Lattner BBI = I; // Don't invalidate iterator. 53867a716ab818301fe28068754c879e568c44e62f8Chris Lattner return true; 53967a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 54067a716ab818301fe28068754c879e568c44e62f8Chris Lattner 54167a716ab818301fe28068754c879e568c44e62f8Chris Lattner return false; 542a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 543a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 544d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattnerbool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { 545d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner // See if there is another memset or store neighboring this memset which 546d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner // allows us to widen out the memset to do a single larger store. 5470468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile()) 5480468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(), 5490468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner MSI->getValue())) { 5500468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner BBI = I; // Don't invalidate iterator. 5510468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner return true; 5520468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner } 553d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner return false; 554d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner} 555d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner 556a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 557a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// performCallSlotOptzn - takes a memcpy and a call that it depends on, 558a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// and checks for the possibility of a call slot optimization by having 559a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// the call write its result directly into the destination of the memcpy. 5606549121c660dfd18361cd3daf6c766bee80d3097Owen Andersonbool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, 5616549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson Value *cpyDest, Value *cpySrc, 5626549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson uint64_t cpyLen, CallInst *C) { 563a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // The general transformation to keep in mind is 564a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 565a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // call @func(..., src, ...) 566a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memcpy(dest, src, ...) 567a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 568a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // -> 569a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 570a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memcpy(dest, src, ...) 571a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // call @func(..., dest, ...) 572a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 573a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Since moving the memcpy is technically awkward, we additionally check that 574a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // src only holds uninitialized values at the moment of the call, meaning that 575a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the memcpy can be discarded rather than moved. 576a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 577a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Deliberately get the source and destination with bitcasts stripped away, 578a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // because we'll need to do type comparisons based on the underlying type. 5797d3056b16038a6a09c452c0dfcc3c8f4e421506aGabor Greif CallSite CS(C); 580a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 581a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Require that src be an alloca. This simplifies the reasoning considerably. 58261c6ba85715fdcb66f746678879984151f1e5485Chris Lattner AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); 583a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!srcAlloca) 584a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 585a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 586a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Check that all of src is copied to dest. 58767a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (TD == 0) return false; 588a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 58961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); 590a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!srcArraySize) 591a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 592a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 5938942f9bb9f8bfb0d113db6d4a1ae7203dcf4510aDan Gohman uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * 594a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson srcArraySize->getZExtValue(); 595a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 5966549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson if (cpyLen < srcSize) 597a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 598a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 599a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Check that accessing the first srcSize bytes of dest will not cause a 600a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // trap. Otherwise the transform is invalid since it might cause a trap 601a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // to occur earlier than it otherwise would. 60261c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { 603a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // The destination is an alloca. Check it is larger than srcSize. 60461c6ba85715fdcb66f746678879984151f1e5485Chris Lattner ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); 605a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!destArraySize) 606a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 607a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 6088942f9bb9f8bfb0d113db6d4a1ae7203dcf4510aDan Gohman uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * 609a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson destArraySize->getZExtValue(); 610a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 611a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (destSize < srcSize) 612a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 61361c6ba85715fdcb66f746678879984151f1e5485Chris Lattner } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { 614a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If the destination is an sret parameter then only accesses that are 615a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // outside of the returned struct type can trap. 616a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!A->hasStructRetAttr()) 617a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 618a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 61961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); 6208942f9bb9f8bfb0d113db6d4a1ae7203dcf4510aDan Gohman uint64_t destSize = TD->getTypeAllocSize(StructTy); 621a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 622a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (destSize < srcSize) 623a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 624a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } else { 625a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 626a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 627a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 628a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Check that src is not accessed except via the call and the memcpy. This 629a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // guarantees that it holds only undefined values when passed in (so the final 630a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memcpy can be dropped), that it is not read or written between the call and 631a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the memcpy, and that writing beyond the end of it is undefined. 632a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), 633a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson srcAlloca->use_end()); 634a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson while (!srcUseList.empty()) { 635321a813c536e2f1f2f05bbe78a7fbf64046f0557Dan Gohman User *UI = srcUseList.pop_back_val(); 636a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 637009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson if (isa<BitCastInst>(UI)) { 638a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); 639a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I != E; ++I) 640a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson srcUseList.push_back(*I); 64161c6ba85715fdcb66f746678879984151f1e5485Chris Lattner } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) { 642009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson if (G->hasAllZeroIndices()) 643009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); 644009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson I != E; ++I) 645009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson srcUseList.push_back(*I); 646009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson else 647009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson return false; 648a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } else if (UI != C && UI != cpy) { 649a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 650a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 651a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 652a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 653a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Since we're changing the parameter to the callsite, we need to make sure 654a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // that what would be the new parameter dominates the callsite. 65561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner DominatorTree &DT = getAnalysis<DominatorTree>(); 65661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) 657a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!DT.dominates(cpyDestInst, C)) 658a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 659a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 660a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // In addition to knowing that the call does not access src in some 661a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // unexpected manner, for example via a global, which we deduce from 662a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the use analysis, we also need to know that it does not sneakily 663a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // access dest. We rely on AA to figure this out for us. 66461c6ba85715fdcb66f746678879984151f1e5485Chris Lattner AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 66506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef) 666a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 667a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 668a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // All the checks have passed, so do the transformation. 66912cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson bool changedArgument = false; 670a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = 0; i < CS.arg_size(); ++i) 671009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { 672a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (cpySrc->getType() != cpyDest->getType()) 6737cbd8a3e92221437048b484d5ef9c0a22d0f8c58Gabor Greif cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), 674a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson cpyDest->getName(), C); 67512cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson changedArgument = true; 67661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (CS.getArgument(i)->getType() == cpyDest->getType()) 677009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson CS.setArgument(i, cpyDest); 67861c6ba85715fdcb66f746678879984151f1e5485Chris Lattner else 67961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 68061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner CS.getArgument(i)->getType(), cpyDest->getName(), C)); 681a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 682a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 68312cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson if (!changedArgument) 68412cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson return false; 68512cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson 686a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Drop any cached information about the call, because we may have changed 687a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // its dependence information by changing its parameter. 6882f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(C); 689a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 6902f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Remove the memcpy. 6912f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(cpy); 692fe60104ac97f3a8736dcfbfdf9547c7b7cc7b951Dan Gohman ++NumMemCpyInstr; 693a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 694a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return true; 695a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 696a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 69743f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// processMemCpyMemCpyDependence - We've found that the (upward scanning) 69843f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to 69943f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// copy from MDep's input if we can. MSize is the size of M's copy. 70043f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// 70143f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattnerbool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, 70243f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner uint64_t MSize) { 703a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // We can only transforms memcpy's where the dest of one is the source of the 70443f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // other. 7052f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (M->getSource() != MDep->getDest() || MDep->isVolatile()) 706a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 707a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 708f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // If dep instruction is reading from our current input, then it is a noop 709f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // transfer and substituting the input won't change this instruction. Just 710f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // ignore the input and let someone else zap MDep. This handles cases like: 711f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // memcpy(a <- a) 712f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // memcpy(b <- a) 713f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner if (M->getSource() == MDep->getSource()) 714f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner return false; 715f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner 7167a2bdde0a0eebcd2125055e0eacaca040f0b766cChris Lattner // Second, the length of the memcpy's must be the same, or the preceding one 717a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // must be larger than the following one. 7188fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); 7198fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); 7208fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) 7218fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman return false; 722a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 7232f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 724604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner 725604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // Verify that the copied-from memory doesn't change in between the two 726604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // transfers. For example, in: 727604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // memcpy(a <- b) 728604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // *b = 42; 729604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // memcpy(c <- a) 730604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // It would be invalid to transform the second memcpy into memcpy(c <- b). 731604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // 732604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // TODO: If the code between M and MDep is transparent to the destination "c", 733604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // then we could still perform the xform by moving M up to the first memcpy. 734604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // 735604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // NOTE: This is conservative, it will stop on any read from the source loc, 736604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // not just the defining memcpy. 737604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MemDepResult SourceDep = 738604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MD->getPointerDependencyFrom(AA.getLocationForSource(MDep), 739604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner false, M, M->getParent()); 740604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) 741604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner return false; 7425a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner 7435a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner // If the dest of the second might alias the source of the first, then the 7445a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner // source and dest might overlap. We still want to eliminate the intermediate 7455a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner // value, but we have to generate a memmove instead of memcpy. 74661db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner bool UseMemMove = false; 74761db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) 74861db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner UseMemMove = true; 749a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 7502f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // If all checks passed, then we can transform M. 75143f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 75204fcbf954fef6d9866b5120f406e7401dc9aa29fEric Christopher // Make sure to use the lesser of the alignment of the source and the dest 75304fcbf954fef6d9866b5120f406e7401dc9aa29fEric Christopher // since we're changing where we're reading from, but don't want to increase 75404fcbf954fef6d9866b5120f406e7401dc9aa29fEric Christopher // the alignment past what can be read from or written to. 755c69a00047013a0e2e07ae44c38e013a7d905b10eEric Christopher // TODO: Is this worth it if we're creating a less aligned memcpy? For 756c69a00047013a0e2e07ae44c38e013a7d905b10eEric Christopher // example we could be moving from movaps -> movq on x86. 757d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); 75861db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner 75961db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner IRBuilder<> Builder(M); 76061db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner if (UseMemMove) 76161db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), 76261db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Align, M->isVolatile()); 76361db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner else 76461db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), 76561db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Align, M->isVolatile()); 766d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner 767604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // Remove the instruction we're replacing. 7682f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(M); 769d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner M->eraseFromParent(); 770d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner ++NumMemCpyInstr; 771d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner return true; 772a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 773a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 77443f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 77543f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// processMemCpy - perform simplification of memcpy's. If we have memcpy A 77643f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite 77743f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// B to be a memcpy from X to Z (or potentially a memmove, depending on 77843f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// circumstances). This allows later passes to remove the first memcpy 77943f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// altogether. 78043f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattnerbool MemCpyOpt::processMemCpy(MemCpyInst *M) { 7812f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // We can only optimize statically-sized memcpy's that are non-volatile. 7822f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); 7832f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (CopySize == 0 || M->isVolatile()) return false; 78443f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 7858fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner // If the source and destination of the memcpy are the same, then zap it. 7868fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner if (M->getSource() == M->getDest()) { 7878fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner MD->removeInstruction(M); 7888fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner M->eraseFromParent(); 7898fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner return false; 7908fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner } 791a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer 792a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer // If copying from a constant, try to turn the memcpy into a memset. 79349c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) 7943fed0d917d23f4151cb8d98f122edd3229daf6abBenjamin Kramer if (GV->isConstant() && GV->hasDefinitiveInitializer()) 79549c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { 79661db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner IRBuilder<> Builder(M); 79761db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize, 79861db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner M->getAlignment(), false); 79949c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer MD->removeInstruction(M); 80049c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer M->eraseFromParent(); 80149c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer ++NumCpyToSet; 80249c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer return true; 80349c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer } 804a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer 80543f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // The are two possible optimizations we can do for memcpy: 80643f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // a) memcpy-memcpy xform which exposes redundance for DSE. 80743f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // b) call-memcpy xform for return slot optimization. 8082f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MemDepResult DepInfo = MD->getDependency(M); 8092f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (!DepInfo.isClobber()) 81043f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner return false; 81143f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 8122f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst())) 8132f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); 81443f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 8152f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { 8168fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner if (performCallSlotOptzn(M, M->getDest(), M->getSource(), 8178fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner CopySize->getZExtValue(), C)) { 818f42685004c997a4a4728cbcd9e6be1ee1d6b418fChris Lattner MD->removeInstruction(M); 8198fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner M->eraseFromParent(); 8208fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner return true; 8218fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner } 82243f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner } 823d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner 82443f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner return false; 82543f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner} 82643f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 827f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst 828f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner/// are guaranteed not to alias. 829f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattnerbool MemCpyOpt::processMemMove(MemMoveInst *M) { 830f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 831f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner 832149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner if (!TLI->has(LibFunc::memmove)) 833149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner return false; 834149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner 835f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // See if the pointers alias. 83661db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) 837f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner return false; 838f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner 839cb33fd17cce475a1d47b2695e311b6934ad0ef86David Greene DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); 840f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner 841f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // If not, then we know we can transform this. 842f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner Module *Mod = M->getParent()->getParent()->getParent(); 8435fdd6c8793462549e3593890ec61573da06e3346Jay Foad Type *ArgTys[3] = { M->getRawDest()->getType(), 8445fdd6c8793462549e3593890ec61573da06e3346Jay Foad M->getRawSource()->getType(), 8455fdd6c8793462549e3593890ec61573da06e3346Jay Foad M->getLength()->getType() }; 846a399781289092fcdceb58b21174229f4373c4191Gabor Greif M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, 847a399781289092fcdceb58b21174229f4373c4191Gabor Greif ArgTys, 3)); 84805cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan Sands 849f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // MemDep may have over conservative information about this instruction, just 850f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // conservatively flush it from the cache. 8512f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(M); 85205cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan Sands 85305cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan Sands ++NumMoveToCpy; 854f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner return true; 855f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner} 856f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner 8572f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner/// processByValArgument - This is called on every byval argument in call sites. 8582f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattnerbool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { 85967a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (TD == 0) return false; 860f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner 861604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // Find out what feeds this byval argument. 8622f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner Value *ByValArg = CS.getArgument(ArgNo); 863b5a3196f809e8edb2e9fef09de1de3d382cb852fChris Lattner const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType(); 864b5a3196f809e8edb2e9fef09de1de3d382cb852fChris Lattner uint64_t ByValSize = TD->getTypeAllocSize(ByValTy); 865604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MemDepResult DepInfo = 866604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), 867604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner true, CS.getInstruction(), 868604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner CS.getInstruction()->getParent()); 8692f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (!DepInfo.isClobber()) 8702f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return false; 8712f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 8722f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by 8732f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // a memcpy, see if we can byval from the source of the memcpy instead of the 8742f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // result. 8752f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); 8762f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (MDep == 0 || MDep->isVolatile() || 8772f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ByValArg->stripPointerCasts() != MDep->getDest()) 8782f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return false; 8792f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 8802f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // The length of the memcpy must be larger or equal to the size of the byval. 8812f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); 882604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) 8832f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return false; 8842f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 885b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // Get the alignment of the byval. If the call doesn't specify the alignment, 886b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // then it is some target specific value that we can't know. 8872f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); 888b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner if (ByValAlign == 0) return false; 889b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner 890b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // If it is greater than the memcpy, then we check to see if we can force the 891b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // source of the memcpy to the alignment we need. If we fail, we bail out. 892b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner if (MDep->getAlignment() < ByValAlign && 893b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) 894b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner return false; 8952f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 8962f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Verify that the copied-from memory doesn't change in between the memcpy and 8972f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // the byval call. 8982f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // memcpy(a <- b) 8992f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // *b = 42; 9002f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // foo(*a) 9012f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // It would be invalid to transform the second memcpy into foo(*b). 902604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // 903604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // NOTE: This is conservative, it will stop on any read from the source loc, 904604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // not just the defining memcpy. 905604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MemDepResult SourceDep = 906604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep), 907604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner false, CS.getInstruction(), MDep->getParent()); 908604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) 909604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner return false; 910604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner 9112f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner Value *TmpCast = MDep->getSource(); 9122f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (MDep->getSource()->getType() != ByValArg->getType()) 9132f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), 9142f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner "tmpcast", CS.getInstruction()); 915604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner 9162f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" 9172f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner << " " << *MDep << "\n" 9182f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner << " " << *CS.getInstruction() << "\n"); 9192f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 9202f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Otherwise we're good! Update the byval argument. 9212f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner CS.setArgument(ArgNo, TmpCast); 9222f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ++NumMemCpyInstr; 9232f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return true; 9242f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner} 9252f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 9262f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner/// iterateOnFunction - Executes one iteration of MemCpyOpt. 927a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonbool MemCpyOpt::iterateOnFunction(Function &F) { 92861c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool MadeChange = false; 929a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 93061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner // Walk all instruction in the function. 931a8bd65835be9e1ce07f5006e92625ec4e9fa387aOwen Anderson for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { 9322f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { 93361c6ba85715fdcb66f746678879984151f1e5485Chris Lattner // Avoid invalidating the iterator. 93461c6ba85715fdcb66f746678879984151f1e5485Chris Lattner Instruction *I = BI++; 935a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 9362f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner bool RepeatInstruction = false; 9372f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 938a8bd65835be9e1ce07f5006e92625ec4e9fa387aOwen Anderson if (StoreInst *SI = dyn_cast<StoreInst>(I)) 93961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner MadeChange |= processStore(SI, BI); 940d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (MemSetInst *M = dyn_cast<MemSetInst>(I)) 941d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner RepeatInstruction = processMemSet(M, BI); 942d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) 9432f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner RepeatInstruction = processMemCpy(M); 944d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) 9452f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner RepeatInstruction = processMemMove(M); 946d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (CallSite CS = (Value*)I) { 9472f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 9482f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (CS.paramHasAttr(i+1, Attribute::ByVal)) 9492f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MadeChange |= processByValArgument(CS, i); 9502f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner } 9512f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 9522f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Reprocess the instruction if desired. 9532f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (RepeatInstruction) { 9548a629577f89869f9810065dc61afd7e4302d3e46Chris Lattner if (BI != BB->begin()) --BI; 9552f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MadeChange = true; 956f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner } 957a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 958a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 959a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 96061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner return MadeChange; 961a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 96261c6ba85715fdcb66f746678879984151f1e5485Chris Lattner 96361c6ba85715fdcb66f746678879984151f1e5485Chris Lattner// MemCpyOpt::runOnFunction - This is the main transformation entry point for a 96461c6ba85715fdcb66f746678879984151f1e5485Chris Lattner// function. 96561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner// 96661c6ba85715fdcb66f746678879984151f1e5485Chris Lattnerbool MemCpyOpt::runOnFunction(Function &F) { 96761c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool MadeChange = false; 9682f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD = &getAnalysis<MemoryDependenceAnalysis>(); 96967a716ab818301fe28068754c879e568c44e62f8Chris Lattner TD = getAnalysisIfAvailable<TargetData>(); 970149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner TLI = &getAnalysis<TargetLibraryInfo>(); 971149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner 972149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner // If we don't have at least memset and memcpy, there is little point of doing 973149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner // anything here. These are required by a freestanding implementation, so if 974149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner // even they are disabled, there is no point in trying hard. 975149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) 976149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner return false; 977149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner 97861c6ba85715fdcb66f746678879984151f1e5485Chris Lattner while (1) { 97961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (!iterateOnFunction(F)) 98061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner break; 98161c6ba85715fdcb66f746678879984151f1e5485Chris Lattner MadeChange = true; 98261c6ba85715fdcb66f746678879984151f1e5485Chris Lattner } 98361c6ba85715fdcb66f746678879984151f1e5485Chris Lattner 9842f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD = 0; 98561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner return MadeChange; 98661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner} 987