MemCpyOptimizer.cpp revision 37ed9c199ca639565f6ce88105f9e39e898d82d0
1a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// 2a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 3a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// The LLVM Compiler Infrastructure 4a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 5a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// This file is distributed under the University of Illinois Open Source 6a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// License. See LICENSE.TXT for details. 7a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 8a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 9a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 10a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// This pass performs various transformations related to eliminating memcpy 11a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// calls, or transforming sets of stores into memset's. 12a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 13a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 14a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 15a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Transforms/Scalar.h" 16a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/ADT/SmallVector.h" 17a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/ADT/Statistic.h" 18a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Analysis/AliasAnalysis.h" 1937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Analysis/AssumptionTracker.h" 20a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Analysis/MemoryDependenceAnalysis.h" 21bb89710dddf967199dfc56e8bf5d28b0003f2ee6Chris Lattner#include "llvm/Analysis/ValueTracking.h" 220b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h" 2336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Dominators.h" 2436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/GetElementPtrTypeIterator.h" 250b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/GlobalVariable.h" 260b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IRBuilder.h" 270b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Instructions.h" 280b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IntrinsicInst.h" 29a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Support/Debug.h" 30bdff548e4dd577a72094d57b282de4e765643b96Chris Lattner#include "llvm/Support/raw_ostream.h" 31149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner#include "llvm/Target/TargetLibraryInfo.h" 3206cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Transforms/Utils/Local.h" 33a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include <list> 34a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonusing namespace llvm; 35a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 36dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "memcpyopt" 37dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 38a723d1e48f4a261512c28845c53eda569fa5218cOwen AndersonSTATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); 39a723d1e48f4a261512c28845c53eda569fa5218cOwen AndersonSTATISTIC(NumMemSetInfer, "Number of memsets inferred"); 4005cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan SandsSTATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); 41a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin KramerSTATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); 42a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 4339acdb0200ff78065699509fccfc605f86237350Benjamin Kramerstatic int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, 443574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow bool &VariableIdxFound, const DataLayout &TD){ 45a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Skip over the first indices. 46a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson gep_type_iterator GTI = gep_type_begin(GEP); 47a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = 1; i != Idx; ++i, ++GTI) 48a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /*skip along*/; 49a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 50a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Compute the offset implied by the rest of the indices. 51a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Offset = 0; 52a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { 53a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); 54dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!OpC) 55a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return VariableIdxFound = true; 56a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (OpC->isZero()) continue; // No offset. 57a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 58a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Handle struct indices, which add their field offset to the pointer. 59db125cfaf57cc83e7dd7453de2d509bc8efd0e5eChris Lattner if (StructType *STy = dyn_cast<StructType>(*GTI)) { 60a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 61a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson continue; 62a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 63a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 64a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Otherwise, we have a sequential type like an array or vector. Multiply 65a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the index by the ElementSize. 66777d2306b36816a53bc1ae1244c0dc7d998ae691Duncan Sands uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); 67a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Offset += Size*OpC->getSExtValue(); 68a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 69a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 70a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return Offset; 71a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 72a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 73a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a 74a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// constant offset, and return that constant offset. For example, Ptr1 might 75a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. 76a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonstatic bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, 773574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow const DataLayout &TD) { 782d5c0cd197454408531cd53e4dd65a431e07ba6fChris Lattner Ptr1 = Ptr1->stripPointerCasts(); 792d5c0cd197454408531cd53e4dd65a431e07ba6fChris Lattner Ptr2 = Ptr2->stripPointerCasts(); 8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Handle the trivial case first. 8236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Ptr1 == Ptr2) { 8336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Offset = 0; 8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 8536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 8636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 8739acdb0200ff78065699509fccfc605f86237350Benjamin Kramer GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1); 8839acdb0200ff78065699509fccfc605f86237350Benjamin Kramer GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2); 89a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 909fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner bool VariableIdxFound = false; 919fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner 929fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner // If one pointer is a GEP and the other isn't, then see if the GEP is a 939fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner // constant offset from the base, as in "P" and "gep P, 1". 94dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { 959fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); 969fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner return !VariableIdxFound; 979fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner } 989fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner 99dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { 1009fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); 1019fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner return !VariableIdxFound; 1029fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner } 103a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 104a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical 105a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // base. After that base, they may have some number of common (and 106a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // potentially variable) indices. After that they handle some constant 107a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // offset, which determines their offset from each other. At this point, we 108a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // handle no other case. 109a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) 110a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 111a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 112a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Skip any common indices and track the GEP types. 113a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned Idx = 1; 114a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) 115a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) 116a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson break; 117a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 118a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); 119a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); 120a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (VariableIdxFound) return false; 121a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 122a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Offset = Offset2-Offset1; 123a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return true; 124a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 125a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 126a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 127a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value. 128a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// This allows us to analyze stores like: 129a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+1 130a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+0 131a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+3 132a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+2 133a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// which sometimes happens with stores to arrays of structs etc. When we see 134a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// the first store, we make a range [1, 2). The second store extends the range 135a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the 136a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// two ranges into [0, 3) which is memset'able. 137a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonnamespace { 138a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonstruct MemsetRange { 139a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Start/End - A semi range that describes the span that this range covers. 140a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem // The range is closed at the start and open at the end: [Start, End). 141a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Start, End; 142a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 143a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// StartPtr - The getelementptr instruction that points to the start of the 144a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// range. 145a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Value *StartPtr; 146a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 147a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// Alignment - The known alignment of the first store. 148a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned Alignment; 149a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 150a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// TheStores - The actual stores that make up this range. 15106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner SmallVector<Instruction*, 16> TheStores; 152a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1533574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow bool isProfitableToUseMemset(const DataLayout &TD) const; 154a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 155a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson}; 156a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} // end anon namespace 157a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 1583574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmowbool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const { 159a4b6fd5be08ad6a1cb2d07aba2814aaa2a626c8dChad Rosier // If we found more than 4 stores to merge or 16 bytes, use memset. 160d8bd26ee240bbc10a230d2ea8f8975d1fd32ba7cChad Rosier if (TheStores.size() >= 4 || End-Start >= 16) return true; 16106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 16206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If there is nothing to merge, don't do anything. 16306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (TheStores.size() < 2) return false; 164a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 16506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If any of the stores are a memset, then it is always good to extend the 16606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // memset. 16706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner for (unsigned i = 0, e = TheStores.size(); i != e; ++i) 16806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (!isa<StoreInst>(TheStores[i])) 16906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner return true; 170a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 171a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Assume that the code generator is capable of merging pairs of stores 172a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // together if it wants to. 17306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (TheStores.size() == 2) return false; 174a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 175a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we have fewer than 8 stores, it can still be worthwhile to do this. 176a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // For example, merging 4 i8 stores into an i32 store is useful almost always. 177a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the 178a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memset will be split into 2 32-bit stores anyway) and doing so can 179a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // pessimize the llvm optimizer. 180a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 181a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Since we don't have perfect knowledge here, make some assumptions: assume 1824b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault // the maximum GPR width is the same size as the largest legal integer 1834b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault // size. If so, check to see whether we will end up actually reducing the 1844b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault // number of stores used. 185a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned Bytes = unsigned(End-Start); 1864b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault unsigned MaxIntSize = TD.getLargestLegalIntTypeSize(); 1874b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault if (MaxIntSize == 0) 1884b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault MaxIntSize = 1; 1894b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault unsigned NumPointerStores = Bytes / MaxIntSize; 190a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 191a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Assume the remaining bytes if any are done a byte at a time. 1924b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize; 193a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 194a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we will reduce the # stores (according to this heuristic), do the 195a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 196a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // etc. 197a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return TheStores.size() > NumPointerStores+NumByteStores; 198a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem} 199a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 200a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 201a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonnamespace { 202a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonclass MemsetRanges { 203a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// Ranges - A sorted list of the memset ranges. We use std::list here 204a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// because each element is relatively large and expensive to copy. 205a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson std::list<MemsetRange> Ranges; 206a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson typedef std::list<MemsetRange>::iterator range_iterator; 20736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const DataLayout &DL; 208a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonpublic: 20936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MemsetRanges(const DataLayout &DL) : DL(DL) {} 210a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 211a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson typedef std::list<MemsetRange>::const_iterator const_iterator; 212a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson const_iterator begin() const { return Ranges.begin(); } 213a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson const_iterator end() const { return Ranges.end(); } 214a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson bool empty() const { return Ranges.empty(); } 215a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 21667a716ab818301fe28068754c879e568c44e62f8Chris Lattner void addInst(int64_t OffsetFromFirst, Instruction *Inst) { 21706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 21806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addStore(OffsetFromFirst, SI); 21906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner else 22006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); 22106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 22206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 22306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner void addStore(int64_t OffsetFromFirst, StoreInst *SI) { 22436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); 225a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 22606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addRange(OffsetFromFirst, StoreSize, 22706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner SI->getPointerOperand(), SI->getAlignment(), SI); 22806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 229a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 23006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { 23106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 23206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); 23367a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 234a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 23506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner void addRange(int64_t Start, int64_t Size, Value *Ptr, 23606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner unsigned Alignment, Instruction *Inst); 23706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 238a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson}; 239a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 240a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} // end anon namespace 241a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 242a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 24306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// addRange - Add a new store to the MemsetRanges data structure. This adds a 244a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// new range for the specified store at the specified offset, merging into 245a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// existing ranges as appropriate. 24606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// 24706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// Do a linear search of the ranges to see if this can be joined and/or to 24806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// find the insertion point in the list. We keep the ranges sorted for 24906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// simplicity here. This is a linear search of a linked list, which is ugly, 25006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner/// however the number of ranges is limited, so this won't get crazy slow. 25106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattnervoid MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, 25206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner unsigned Alignment, Instruction *Inst) { 25306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t End = Start+Size; 254a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson range_iterator I = Ranges.begin(), E = Ranges.end(); 255a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 256a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson while (I != E && Start > I->End) 257a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson ++I; 258a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 259a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // We now know that I == E, in which case we didn't find anything to merge 260a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // with, or that Start <= I->End. If End < I->Start or I == E, then we need 261a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // to insert a new range. Handle this now. 262a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (I == E || End < I->Start) { 263a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson MemsetRange &R = *Ranges.insert(I, MemsetRange()); 264a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson R.Start = Start; 265a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson R.End = End; 26606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner R.StartPtr = Ptr; 26706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner R.Alignment = Alignment; 26806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner R.TheStores.push_back(Inst); 269a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return; 270a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 271a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 272a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // This store overlaps with I, add it. 27306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner I->TheStores.push_back(Inst); 274a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 275a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // At this point, we may have an interval that completely contains our store. 276a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If so, just add it to the interval and return. 277a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (I->Start <= Start && I->End >= End) 278a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return; 279a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 280a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Now we know that Start <= I->End and End >= I->Start so the range overlaps 281a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // but is not entirely contained within the range. 282a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 283a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // See if the range extends the start of the range. In this case, it couldn't 284a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // possibly cause it to join the prior range, because otherwise we would have 285a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // stopped on *it*. 286a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (Start < I->Start) { 287a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->Start = Start; 28806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner I->StartPtr = Ptr; 28906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner I->Alignment = Alignment; 290a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 291a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 292a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Now we know that Start <= I->End and Start >= I->Start (so the startpoint 293a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // is in or right at the end of I), and that End >= I->Start. Extend I out to 294a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // End. 295a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (End > I->End) { 296a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->End = End; 2979c0f146d50ccc3ba780d4854b8e14422430013efNick Lewycky range_iterator NextI = I; 298a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson while (++NextI != E && End >= NextI->Start) { 299a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Merge the range in. 300a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); 301a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (NextI->End > I->End) 302a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->End = NextI->End; 303a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Ranges.erase(NextI); 304a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson NextI = I; 305a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 306a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 307a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 308a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 309a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 310a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// MemCpyOpt Pass 311a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 312a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 313a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonnamespace { 3143e8b6631e67e01e4960a7ba4668a50c596607473Chris Lattner class MemCpyOpt : public FunctionPass { 3152f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MemoryDependenceAnalysis *MD; 316149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner TargetLibraryInfo *TLI; 31736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const DataLayout *DL; 318a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson public: 319a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson static char ID; // Pass identification, replacement for typeid 320081c34b725980f995be9080eaec24cd3dfaaf065Owen Anderson MemCpyOpt() : FunctionPass(ID) { 321081c34b725980f995be9080eaec24cd3dfaaf065Owen Anderson initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); 322dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MD = nullptr; 323dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines TLI = nullptr; 324dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DL = nullptr; 325081c34b725980f995be9080eaec24cd3dfaaf065Owen Anderson } 326a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 32736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool runOnFunction(Function &F) override; 32867a716ab818301fe28068754c879e568c44e62f8Chris Lattner 329a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson private: 330a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // This transformation requires dominator postdominator info 33136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override { 332a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.setPreservesCFG(); 33337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines AU.addRequired<AssumptionTracker>(); 33436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AU.addRequired<DominatorTreeWrapperPass>(); 335a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.addRequired<MemoryDependenceAnalysis>(); 336a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.addRequired<AliasAnalysis>(); 337149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner AU.addRequired<TargetLibraryInfo>(); 338a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.addPreserved<AliasAnalysis>(); 339a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.addPreserved<MemoryDependenceAnalysis>(); 340a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 341a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 342a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Helper fuctions 34361c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); 344d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); 34561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool processMemCpy(MemCpyInst *M); 346f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner bool processMemMove(MemMoveInst *M); 3476549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, 348f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands uint64_t cpyLen, unsigned cpyAlign, CallInst *C); 34943f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, 35043f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner uint64_t MSize); 3512f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner bool processByValArgument(CallSite CS, unsigned ArgNo); 35267a716ab818301fe28068754c879e568c44e62f8Chris Lattner Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, 35367a716ab818301fe28068754c879e568c44e62f8Chris Lattner Value *ByteVal); 35467a716ab818301fe28068754c879e568c44e62f8Chris Lattner 355a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson bool iterateOnFunction(Function &F); 356a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson }; 357a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 358a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson char MemCpyOpt::ID = 0; 359a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 360a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 361a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// createMemCpyOptPass - The public interface to this file... 362a723d1e48f4a261512c28845c53eda569fa5218cOwen AndersonFunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } 363a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 3642ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", 3652ab36d350293c77fc8941ce1023e4899df7e3a82Owen Anderson false, false) 36637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen HinesINITIALIZE_PASS_DEPENDENCY(AssumptionTracker) 36736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesINITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 3682ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 369149f5283f93ec85b96888c284f56099a72cc2731Chris LattnerINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 3702ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 3712ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", 3722ab36d350293c77fc8941ce1023e4899df7e3a82Owen Anderson false, false) 373a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 37467a716ab818301fe28068754c879e568c44e62f8Chris Lattner/// tryMergingIntoMemset - When scanning forward over instructions, we look for 375a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// some other patterns to fold away. In particular, this looks for stores to 376ab4c366274a582dd8146b2820c6b999cad5fce36Duncan Sands/// neighboring locations of memory. If it sees enough consecutive ones, it 37767a716ab818301fe28068754c879e568c44e62f8Chris Lattner/// attempts to merge them together into a memcpy/memset. 378a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav RotemInstruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, 37967a716ab818301fe28068754c879e568c44e62f8Chris Lattner Value *StartPtr, Value *ByteVal) { 380dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!DL) return nullptr; 381a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 382a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Okay, so we now have a single store that can be splatable. Scan to find 383a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // all subsequent stores of the same value to offset from the same pointer. 384a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Join these together into ranges, so we can decide whether contiguous blocks 385a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // are stored. 38636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MemsetRanges Ranges(*DL); 387a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 38867a716ab818301fe28068754c879e568c44e62f8Chris Lattner BasicBlock::iterator BI = StartInst; 389a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (++BI; !isa<TerminatorInst>(BI); ++BI) { 39006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { 39106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If the instruction is readnone, ignore it, otherwise bail out. We 39206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // don't even allow readonly here because we don't want something like: 393a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). 39406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) 39506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 39606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner continue; 39706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 398a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 39906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { 40006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If this is a store, see if we can merge it in. 40156efe24431b045be120d1fd5f6b0aa43a6b01c48Eli Friedman if (!NextStore->isSimple()) break; 402a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 40306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // Check to see if this stored value is of the same byte-splattable value. 40406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) 40506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 406a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 40706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // Check to see if this store is to a constant offset from the start ptr. 40806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t Offset; 409f42685004c997a4a4728cbcd9e6be1ee1d6b418fChris Lattner if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), 41036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Offset, *DL)) 41106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 412a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 41306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner Ranges.addStore(Offset, NextStore); 41406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } else { 41506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner MemSetInst *MSI = cast<MemSetInst>(BI); 416a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 41706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (MSI->isVolatile() || ByteVal != MSI->getValue() || 41806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner !isa<ConstantInt>(MSI->getLength())) 41906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 420a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 42106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // Check to see if this store is to a constant offset from the start ptr. 42206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t Offset; 42336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *DL)) 42406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 425a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 42606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner Ranges.addMemSet(Offset, MSI); 42706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 428a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 429a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 430a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we have no ranges, then we just had a single store with nothing that 431a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // could be merged in. This is a very common case of course. 432a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (Ranges.empty()) 433dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 434a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 435a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we had at least one store that could be merged in, add the starting 436a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // store as well. We try to avoid this unless there is at least something 437a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // interesting as a small compile-time optimization. 43867a716ab818301fe28068754c879e568c44e62f8Chris Lattner Ranges.addInst(0, StartInst); 43967a716ab818301fe28068754c879e568c44e62f8Chris Lattner 44067a716ab818301fe28068754c879e568c44e62f8Chris Lattner // If we create any memsets, we put it right before the first instruction that 44167a716ab818301fe28068754c879e568c44e62f8Chris Lattner // isn't part of the memset block. This ensure that the memset is dominated 44267a716ab818301fe28068754c879e568c44e62f8Chris Lattner // by any addressing instruction needed by the start of the block. 44367a716ab818301fe28068754c879e568c44e62f8Chris Lattner IRBuilder<> Builder(BI); 44467a716ab818301fe28068754c879e568c44e62f8Chris Lattner 445a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Now that we have full information about ranges, loop over the ranges and 446a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // emit memset's for anything big enough to be worthwhile. 447dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Instruction *AMemSet = nullptr; 448a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); 449a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I != E; ++I) { 450a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson const MemsetRange &Range = *I; 451a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 452a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (Range.TheStores.size() == 1) continue; 453a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 454a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If it is profitable to lower this range to memset, do so now. 45536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!Range.isProfitableToUseMemset(*DL)) 456a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson continue; 457a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 45867a716ab818301fe28068754c879e568c44e62f8Chris Lattner // Otherwise, we do want to transform this! Create a new memset. 459a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Get the starting pointer of the block. 460a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson StartPtr = Range.StartPtr; 461a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 46220adc9dc4650313f017b27d9818eb2176238113dMon P Wang // Determine alignment 46320adc9dc4650313f017b27d9818eb2176238113dMon P Wang unsigned Alignment = Range.Alignment; 46420adc9dc4650313f017b27d9818eb2176238113dMon P Wang if (Alignment == 0) { 465a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem Type *EltType = 46667a716ab818301fe28068754c879e568c44e62f8Chris Lattner cast<PointerType>(StartPtr->getType())->getElementType(); 46736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Alignment = DL->getABITypeAlignment(EltType); 46820adc9dc4650313f017b27d9818eb2176238113dMon P Wang } 469a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 470a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem AMemSet = 47161db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); 472a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 473cb33fd17cce475a1d47b2695e311b6934ad0ef86David Greene DEBUG(dbgs() << "Replace stores:\n"; 474a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) 4752f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner dbgs() << *Range.TheStores[i] << '\n'; 47667a716ab818301fe28068754c879e568c44e62f8Chris Lattner dbgs() << "With: " << *AMemSet << '\n'); 477b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel 478b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel if (!Range.TheStores.empty()) 479b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); 480b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel 481a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Zap all the stores. 482365ef0b197d7c841f8e501da64296df65be4ca23Craig Topper for (SmallVectorImpl<Instruction *>::const_iterator 483ff1e98c72ae5f2aa805112925fd5c06049aa8e79Chris Lattner SI = Range.TheStores.begin(), 4848a629577f89869f9810065dc61afd7e4302d3e46Chris Lattner SE = Range.TheStores.end(); SI != SE; ++SI) { 4858a629577f89869f9810065dc61afd7e4302d3e46Chris Lattner MD->removeInstruction(*SI); 486a8bd65835be9e1ce07f5006e92625ec4e9fa387aOwen Anderson (*SI)->eraseFromParent(); 4878a629577f89869f9810065dc61afd7e4302d3e46Chris Lattner } 488a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson ++NumMemSetInfer; 489a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 490a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 49167a716ab818301fe28068754c879e568c44e62f8Chris Lattner return AMemSet; 49267a716ab818301fe28068754c879e568c44e62f8Chris Lattner} 49367a716ab818301fe28068754c879e568c44e62f8Chris Lattner 49467a716ab818301fe28068754c879e568c44e62f8Chris Lattner 49567a716ab818301fe28068754c879e568c44e62f8Chris Lattnerbool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { 49656efe24431b045be120d1fd5f6b0aa43a6b01c48Eli Friedman if (!SI->isSimple()) return false; 497a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 498dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!DL) return false; 49967a716ab818301fe28068754c879e568c44e62f8Chris Lattner 50067a716ab818301fe28068754c879e568c44e62f8Chris Lattner // Detect cases where we're performing call slot forwarding, but 50167a716ab818301fe28068754c879e568c44e62f8Chris Lattner // happen to be using a load-store pair to implement it, rather than 50267a716ab818301fe28068754c879e568c44e62f8Chris Lattner // a memcpy. 50367a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { 50456efe24431b045be120d1fd5f6b0aa43a6b01c48Eli Friedman if (LI->isSimple() && LI->hasOneUse() && 5055d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman LI->getParent() == SI->getParent()) { 50670d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman MemDepResult ldep = MD->getDependency(LI); 507dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CallInst *C = nullptr; 50870d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) 50970d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman C = dyn_cast<CallInst>(ldep.getInst()); 51070d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman 51170d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman if (C) { 51270d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman // Check that nothing touches the dest of the "copy" between 51370d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman // the call and the store. 5145d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 5155d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman AliasAnalysis::Location StoreLoc = AA.getLocation(SI); 5165d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman for (BasicBlock::iterator I = --BasicBlock::iterator(SI), 5175d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman E = C; I != E; --I) { 5185d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { 519dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines C = nullptr; 5205d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman break; 5215d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman } 52270d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman } 52370d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman } 52470d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman 52567a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (C) { 526f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands unsigned storeAlign = SI->getAlignment(); 527f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands if (!storeAlign) 52836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines storeAlign = DL->getABITypeAlignment(SI->getOperand(0)->getType()); 529f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands unsigned loadAlign = LI->getAlignment(); 530f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands if (!loadAlign) 53136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines loadAlign = DL->getABITypeAlignment(LI->getType()); 532f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands 53367a716ab818301fe28068754c879e568c44e62f8Chris Lattner bool changed = performCallSlotOptzn(LI, 534a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem SI->getPointerOperand()->stripPointerCasts(), 53567a716ab818301fe28068754c879e568c44e62f8Chris Lattner LI->getPointerOperand()->stripPointerCasts(), 53636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DL->getTypeStoreSize(SI->getOperand(0)->getType()), 537f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands std::min(storeAlign, loadAlign), C); 53867a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (changed) { 53967a716ab818301fe28068754c879e568c44e62f8Chris Lattner MD->removeInstruction(SI); 54067a716ab818301fe28068754c879e568c44e62f8Chris Lattner SI->eraseFromParent(); 541f42685004c997a4a4728cbcd9e6be1ee1d6b418fChris Lattner MD->removeInstruction(LI); 54267a716ab818301fe28068754c879e568c44e62f8Chris Lattner LI->eraseFromParent(); 54367a716ab818301fe28068754c879e568c44e62f8Chris Lattner ++NumMemCpyInstr; 54467a716ab818301fe28068754c879e568c44e62f8Chris Lattner return true; 54567a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 54667a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 54767a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 54867a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 549a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 55067a716ab818301fe28068754c879e568c44e62f8Chris Lattner // There are two cases that are interesting for this code to handle: memcpy 55167a716ab818301fe28068754c879e568c44e62f8Chris Lattner // and memset. Right now we only handle memset. 552a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 55367a716ab818301fe28068754c879e568c44e62f8Chris Lattner // Ensure that the value being stored is something that can be memset'able a 55467a716ab818301fe28068754c879e568c44e62f8Chris Lattner // byte at a time like "0" or "-1" or any width, as well as things like 55567a716ab818301fe28068754c879e568c44e62f8Chris Lattner // 0xA0A0A0A0 and 0.0. 55667a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) 55767a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), 55867a716ab818301fe28068754c879e568c44e62f8Chris Lattner ByteVal)) { 55967a716ab818301fe28068754c879e568c44e62f8Chris Lattner BBI = I; // Don't invalidate iterator. 56067a716ab818301fe28068754c879e568c44e62f8Chris Lattner return true; 56167a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 562a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 56367a716ab818301fe28068754c879e568c44e62f8Chris Lattner return false; 564a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 565a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 566d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattnerbool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { 567d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner // See if there is another memset or store neighboring this memset which 568d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner // allows us to widen out the memset to do a single larger store. 5690468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile()) 5700468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(), 5710468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner MSI->getValue())) { 5720468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner BBI = I; // Don't invalidate iterator. 5730468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner return true; 5740468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner } 575d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner return false; 576d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner} 577d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner 578a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 579a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// performCallSlotOptzn - takes a memcpy and a call that it depends on, 580a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// and checks for the possibility of a call slot optimization by having 581a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// the call write its result directly into the destination of the memcpy. 5826549121c660dfd18361cd3daf6c766bee80d3097Owen Andersonbool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, 5836549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson Value *cpyDest, Value *cpySrc, 584f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands uint64_t cpyLen, unsigned cpyAlign, 585f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands CallInst *C) { 586a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // The general transformation to keep in mind is 587a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 588a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // call @func(..., src, ...) 589a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memcpy(dest, src, ...) 590a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 591a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // -> 592a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 593a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memcpy(dest, src, ...) 594a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // call @func(..., dest, ...) 595a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 596a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Since moving the memcpy is technically awkward, we additionally check that 597a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // src only holds uninitialized values at the moment of the call, meaning that 598a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the memcpy can be discarded rather than moved. 599a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 600a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Deliberately get the source and destination with bitcasts stripped away, 601a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // because we'll need to do type comparisons based on the underlying type. 6027d3056b16038a6a09c452c0dfcc3c8f4e421506aGabor Greif CallSite CS(C); 603a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 604a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Require that src be an alloca. This simplifies the reasoning considerably. 60561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); 606a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!srcAlloca) 607a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 608a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 609a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Check that all of src is copied to dest. 610dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!DL) return false; 611a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 61261c6ba85715fdcb66f746678879984151f1e5485Chris Lattner ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); 613a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!srcArraySize) 614a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 615a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 61636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) * 617a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson srcArraySize->getZExtValue(); 618a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 6196549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson if (cpyLen < srcSize) 620a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 621a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 622a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Check that accessing the first srcSize bytes of dest will not cause a 623a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // trap. Otherwise the transform is invalid since it might cause a trap 624a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // to occur earlier than it otherwise would. 62561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { 626a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // The destination is an alloca. Check it is larger than srcSize. 62761c6ba85715fdcb66f746678879984151f1e5485Chris Lattner ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); 628a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!destArraySize) 629a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 630a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 63136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) * 632a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson destArraySize->getZExtValue(); 633a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 634a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (destSize < srcSize) 635a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 63661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { 63737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (A->getDereferenceableBytes() < srcSize) { 63837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // If the destination is an sret parameter then only accesses that are 63937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // outside of the returned struct type can trap. 64037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!A->hasStructRetAttr()) 64137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 642a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 64337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Type *StructTy = cast<PointerType>(A->getType())->getElementType(); 64437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!StructTy->isSized()) { 64537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The call may never return and hence the copy-instruction may never 64637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // be executed, and therefore it's not safe to say "the destination 64737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // has at least <cpyLen> bytes, as implied by the copy-instruction", 64837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 64937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 6509792b646c68d0dcee4049662091f1496b4c85ce7Shuxin Yang 65137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines uint64_t destSize = DL->getTypeAllocSize(StructTy); 65237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (destSize < srcSize) 65337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 65437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 655a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } else { 656a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 657a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 658a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 6593372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands // Check that dest points to memory that is at least as aligned as src. 6603372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands unsigned srcAlign = srcAlloca->getAlignment(); 6613372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands if (!srcAlign) 66236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType()); 6633372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands bool isDestSufficientlyAligned = srcAlign <= cpyAlign; 6643372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands // If dest is not aligned enough and we can't increase its alignment then 6653372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands // bail out. 6663372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) 6673372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands return false; 6683372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands 669a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Check that src is not accessed except via the call and the memcpy. This 670a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // guarantees that it holds only undefined values when passed in (so the final 671a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memcpy can be dropped), that it is not read or written between the call and 672a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the memcpy, and that writing beyond the end of it is undefined. 67336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(), 67436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines srcAlloca->user_end()); 675a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson while (!srcUseList.empty()) { 67636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines User *U = srcUseList.pop_back_val(); 677a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 67836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) { 67936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (User *UU : U->users()) 68036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines srcUseList.push_back(UU); 68137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 68237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 68337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) { 68437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!G->hasAllZeroIndices()) 685009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson return false; 68637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 68737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (User *UU : U->users()) 68837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines srcUseList.push_back(UU); 68937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 690a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 69137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U)) 69237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (IT->getIntrinsicID() == Intrinsic::lifetime_start || 69337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines IT->getIntrinsicID() == Intrinsic::lifetime_end) 69437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 69537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 69637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (U != C && U != cpy) 69737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 698a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 699a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 70037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Check that src isn't captured by the called function since the 70137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // transformation can cause aliasing issues in that case. 70237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 70337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i)) 70437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 70537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 706a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Since we're changing the parameter to the callsite, we need to make sure 707a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // that what would be the new parameter dominates the callsite. 70836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 70961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) 710a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!DT.dominates(cpyDestInst, C)) 711a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 712a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 713a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // In addition to knowing that the call does not access src in some 714a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // unexpected manner, for example via a global, which we deduce from 715a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the use analysis, we also need to know that it does not sneakily 716a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // access dest. We rely on AA to figure this out for us. 71761c6ba85715fdcb66f746678879984151f1e5485Chris Lattner AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 7183a884f5c17ac32e34e7e62b4602a0d73eeda1ce8Chad Rosier AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize); 7193a884f5c17ac32e34e7e62b4602a0d73eeda1ce8Chad Rosier // If necessary, perform additional analysis. 7203a884f5c17ac32e34e7e62b4602a0d73eeda1ce8Chad Rosier if (MR != AliasAnalysis::NoModRef) 7213a884f5c17ac32e34e7e62b4602a0d73eeda1ce8Chad Rosier MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT); 7223a884f5c17ac32e34e7e62b4602a0d73eeda1ce8Chad Rosier if (MR != AliasAnalysis::NoModRef) 723a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 724a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 725a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // All the checks have passed, so do the transformation. 72612cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson bool changedArgument = false; 727a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = 0; i < CS.arg_size(); ++i) 728009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { 7297508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest 7307508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), 7317508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands cpyDest->getName(), C); 73212cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson changedArgument = true; 7337508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands if (CS.getArgument(i)->getType() == Dest->getType()) 7347508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands CS.setArgument(i, Dest); 73561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner else 7367508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands CS.setArgument(i, CastInst::CreatePointerCast(Dest, 7377508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands CS.getArgument(i)->getType(), Dest->getName(), C)); 738a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 739a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 74012cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson if (!changedArgument) 74112cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson return false; 74212cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson 743f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands // If the destination wasn't sufficiently aligned then increase its alignment. 744f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands if (!isDestSufficientlyAligned) { 745f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!"); 746f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands cast<AllocaInst>(cpyDest)->setAlignment(srcAlign); 747f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands } 748f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands 749a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Drop any cached information about the call, because we may have changed 750a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // its dependence information by changing its parameter. 7512f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(C); 752a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 7532f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Remove the memcpy. 7542f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(cpy); 755fe60104ac97f3a8736dcfbfdf9547c7b7cc7b951Dan Gohman ++NumMemCpyInstr; 756a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 757a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return true; 758a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 759a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 76043f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// processMemCpyMemCpyDependence - We've found that the (upward scanning) 76143f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to 76243f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// copy from MDep's input if we can. MSize is the size of M's copy. 763a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem/// 76443f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattnerbool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, 76543f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner uint64_t MSize) { 766a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // We can only transforms memcpy's where the dest of one is the source of the 76743f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // other. 7682f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (M->getSource() != MDep->getDest() || MDep->isVolatile()) 769a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 770a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 771f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // If dep instruction is reading from our current input, then it is a noop 772f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // transfer and substituting the input won't change this instruction. Just 773f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // ignore the input and let someone else zap MDep. This handles cases like: 774f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // memcpy(a <- a) 775f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // memcpy(b <- a) 776f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner if (M->getSource() == MDep->getSource()) 777f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner return false; 778a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 7797a2bdde0a0eebcd2125055e0eacaca040f0b766cChris Lattner // Second, the length of the memcpy's must be the same, or the preceding one 780a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // must be larger than the following one. 7818fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); 7828fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); 7838fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) 7848fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman return false; 785a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 7862f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 787604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner 788604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // Verify that the copied-from memory doesn't change in between the two 789604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // transfers. For example, in: 790604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // memcpy(a <- b) 791604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // *b = 42; 792604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // memcpy(c <- a) 793604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // It would be invalid to transform the second memcpy into memcpy(c <- b). 794604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // 795604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // TODO: If the code between M and MDep is transparent to the destination "c", 796604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // then we could still perform the xform by moving M up to the first memcpy. 797604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // 798604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // NOTE: This is conservative, it will stop on any read from the source loc, 799604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // not just the defining memcpy. 800604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MemDepResult SourceDep = 801604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MD->getPointerDependencyFrom(AA.getLocationForSource(MDep), 802604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner false, M, M->getParent()); 803604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) 804604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner return false; 805a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 8065a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner // If the dest of the second might alias the source of the first, then the 8075a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner // source and dest might overlap. We still want to eliminate the intermediate 8085a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner // value, but we have to generate a memmove instead of memcpy. 80961db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner bool UseMemMove = false; 81061db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) 81161db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner UseMemMove = true; 812a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 8132f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // If all checks passed, then we can transform M. 814a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 81504fcbf954fef6d9866b5120f406e7401dc9aa29fEric Christopher // Make sure to use the lesser of the alignment of the source and the dest 81604fcbf954fef6d9866b5120f406e7401dc9aa29fEric Christopher // since we're changing where we're reading from, but don't want to increase 81704fcbf954fef6d9866b5120f406e7401dc9aa29fEric Christopher // the alignment past what can be read from or written to. 818c69a00047013a0e2e07ae44c38e013a7d905b10eEric Christopher // TODO: Is this worth it if we're creating a less aligned memcpy? For 819c69a00047013a0e2e07ae44c38e013a7d905b10eEric Christopher // example we could be moving from movaps -> movq on x86. 820d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); 821a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 82261db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner IRBuilder<> Builder(M); 82361db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner if (UseMemMove) 82461db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), 82561db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Align, M->isVolatile()); 82661db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner else 82761db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), 82861db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Align, M->isVolatile()); 829d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner 830604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // Remove the instruction we're replacing. 8312f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(M); 832d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner M->eraseFromParent(); 833d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner ++NumMemCpyInstr; 834d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner return true; 835a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 836a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 83743f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 83843f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// processMemCpy - perform simplification of memcpy's. If we have memcpy A 83943f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite 84043f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// B to be a memcpy from X to Z (or potentially a memmove, depending on 84143f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// circumstances). This allows later passes to remove the first memcpy 84243f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// altogether. 84343f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattnerbool MemCpyOpt::processMemCpy(MemCpyInst *M) { 84436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We can only optimize non-volatile memcpy's. 84536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (M->isVolatile()) return false; 84643f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 8478fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner // If the source and destination of the memcpy are the same, then zap it. 8488fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner if (M->getSource() == M->getDest()) { 8498fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner MD->removeInstruction(M); 8508fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner M->eraseFromParent(); 8518fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner return false; 8528fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner } 853a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer 854a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer // If copying from a constant, try to turn the memcpy into a memset. 85549c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) 8563fed0d917d23f4151cb8d98f122edd3229daf6abBenjamin Kramer if (GV->isConstant() && GV->hasDefinitiveInitializer()) 85749c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { 85861db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner IRBuilder<> Builder(M); 85936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), 86061db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner M->getAlignment(), false); 86149c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer MD->removeInstruction(M); 86249c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer M->eraseFromParent(); 86349c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer ++NumCpyToSet; 86449c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer return true; 86549c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer } 866a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer 86736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // The optimizations after this point require the memcpy size. 86836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); 869dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!CopySize) return false; 87036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 87136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // The are three possible optimizations we can do for memcpy: 87243f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // a) memcpy-memcpy xform which exposes redundance for DSE. 87343f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // b) call-memcpy xform for return slot optimization. 87436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // c) memcpy from freshly alloca'd space or space that has just started its 87536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // lifetime copies undefined data, and we can therefore eliminate the 87636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // memcpy in favor of the data that was already at the destination. 8772f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MemDepResult DepInfo = MD->getDependency(M); 87836c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (DepInfo.isClobber()) { 87936c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { 88036c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (performCallSlotOptzn(M, M->getDest(), M->getSource(), 881f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands CopySize->getZExtValue(), M->getAlignment(), 882f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands C)) { 88336c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky MD->removeInstruction(M); 88436c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky M->eraseFromParent(); 88536c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky return true; 88636c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky } 8878fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner } 88843f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner } 889b83a67e1e3fe210bd99a82eccd3dc5b1b44f1503Ahmed Charles 890b83a67e1e3fe210bd99a82eccd3dc5b1b44f1503Ahmed Charles AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M); 89136c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true, 89236c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky M, M->getParent()); 89336c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (SrcDepInfo.isClobber()) { 89436c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) 89536c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); 89636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else if (SrcDepInfo.isDef()) { 89736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Instruction *I = SrcDepInfo.getInst(); 89836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool hasUndefContents = false; 89936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 90036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isa<AllocaInst>(I)) { 90136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines hasUndefContents = true; 90236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 90336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (II->getIntrinsicID() == Intrinsic::lifetime_start) 90436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0))) 90536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (LTSize->getZExtValue() >= CopySize->getZExtValue()) 90636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines hasUndefContents = true; 90736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 90836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 90936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (hasUndefContents) { 91036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MD->removeInstruction(M); 91136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines M->eraseFromParent(); 91236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NumMemCpyInstr; 91336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 91436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 91536c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky } 91636c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky 91743f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner return false; 91843f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner} 91943f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 920f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst 921f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner/// are guaranteed not to alias. 922f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattnerbool MemCpyOpt::processMemMove(MemMoveInst *M) { 923f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 924f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner 925149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner if (!TLI->has(LibFunc::memmove)) 926149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner return false; 927a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 928f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // See if the pointers alias. 92961db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) 930f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner return false; 931a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 932cb33fd17cce475a1d47b2695e311b6934ad0ef86David Greene DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); 933a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 934f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // If not, then we know we can transform this. 935f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner Module *Mod = M->getParent()->getParent()->getParent(); 9365fdd6c8793462549e3593890ec61573da06e3346Jay Foad Type *ArgTys[3] = { M->getRawDest()->getType(), 9375fdd6c8793462549e3593890ec61573da06e3346Jay Foad M->getRawSource()->getType(), 9385fdd6c8793462549e3593890ec61573da06e3346Jay Foad M->getLength()->getType() }; 939a399781289092fcdceb58b21174229f4373c4191Gabor Greif M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, 940eb9a85f09e18b3fe88499710404b38d3a9128f62Benjamin Kramer ArgTys)); 94105cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan Sands 942f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // MemDep may have over conservative information about this instruction, just 943f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // conservatively flush it from the cache. 9442f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(M); 94505cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan Sands 94605cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan Sands ++NumMoveToCpy; 947f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner return true; 948f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner} 949a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 9502f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner/// processByValArgument - This is called on every byval argument in call sites. 9512f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattnerbool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { 952dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!DL) return false; 953f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner 954604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // Find out what feeds this byval argument. 9552f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner Value *ByValArg = CS.getArgument(ArgNo); 956865703e53afb258805c9e35ea3cb346fa7eff115Nick Lewycky Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); 95736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t ByValSize = DL->getTypeAllocSize(ByValTy); 958604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MemDepResult DepInfo = 959604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), 960604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner true, CS.getInstruction(), 961604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner CS.getInstruction()->getParent()); 9622f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (!DepInfo.isClobber()) 9632f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return false; 9642f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 9652f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by 9662f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // a memcpy, see if we can byval from the source of the memcpy instead of the 9672f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // result. 9682f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); 969dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!MDep || MDep->isVolatile() || 9702f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ByValArg->stripPointerCasts() != MDep->getDest()) 9712f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return false; 972a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 9732f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // The length of the memcpy must be larger or equal to the size of the byval. 9742f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); 975dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!C1 || C1->getValue().getZExtValue() < ByValSize) 9762f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return false; 9772f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 978b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // Get the alignment of the byval. If the call doesn't specify the alignment, 979b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // then it is some target specific value that we can't know. 9802f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); 981b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner if (ByValAlign == 0) return false; 982a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 983b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // If it is greater than the memcpy, then we check to see if we can force the 984b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // source of the memcpy to the alignment we need. If we fail, we bail out. 98537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines AssumptionTracker *AT = &getAnalysis<AssumptionTracker>(); 98637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 987b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner if (MDep->getAlignment() < ByValAlign && 98837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, 98937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DL, AT, CS.getInstruction(), &DT) < ByValAlign) 990b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner return false; 991a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 9922f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Verify that the copied-from memory doesn't change in between the memcpy and 9932f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // the byval call. 9942f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // memcpy(a <- b) 9952f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // *b = 42; 9962f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // foo(*a) 9972f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // It would be invalid to transform the second memcpy into foo(*b). 998604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // 999604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // NOTE: This is conservative, it will stop on any read from the source loc, 1000604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // not just the defining memcpy. 1001604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MemDepResult SourceDep = 1002604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep), 1003604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner false, CS.getInstruction(), MDep->getParent()); 1004604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) 1005604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner return false; 1006a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 10072f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner Value *TmpCast = MDep->getSource(); 10082f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (MDep->getSource()->getType() != ByValArg->getType()) 10092f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), 10102f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner "tmpcast", CS.getInstruction()); 1011a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 10122f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" 10132f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner << " " << *MDep << "\n" 10142f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner << " " << *CS.getInstruction() << "\n"); 1015a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 10162f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Otherwise we're good! Update the byval argument. 10172f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner CS.setArgument(ArgNo, TmpCast); 10182f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ++NumMemCpyInstr; 10192f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return true; 10202f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner} 10212f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 10222f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner/// iterateOnFunction - Executes one iteration of MemCpyOpt. 1023a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonbool MemCpyOpt::iterateOnFunction(Function &F) { 102461c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool MadeChange = false; 1025a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 102661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner // Walk all instruction in the function. 1027a8bd65835be9e1ce07f5006e92625ec4e9fa387aOwen Anderson for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { 10282f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { 102961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner // Avoid invalidating the iterator. 103061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner Instruction *I = BI++; 1031a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 10322f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner bool RepeatInstruction = false; 1033a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1034a8bd65835be9e1ce07f5006e92625ec4e9fa387aOwen Anderson if (StoreInst *SI = dyn_cast<StoreInst>(I)) 103561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner MadeChange |= processStore(SI, BI); 1036d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (MemSetInst *M = dyn_cast<MemSetInst>(I)) 1037d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner RepeatInstruction = processMemSet(M, BI); 1038d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) 10392f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner RepeatInstruction = processMemCpy(M); 1040d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) 10412f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner RepeatInstruction = processMemMove(M); 1042d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (CallSite CS = (Value*)I) { 10432f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 1044173862e5468fbcf4b022b9088d2c81b25c2d60c5Nick Lewycky if (CS.isByValArgument(i)) 10452f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MadeChange |= processByValArgument(CS, i); 10462f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner } 10472f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 10482f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Reprocess the instruction if desired. 10492f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (RepeatInstruction) { 10508a629577f89869f9810065dc61afd7e4302d3e46Chris Lattner if (BI != BB->begin()) --BI; 10512f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MadeChange = true; 1052f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner } 1053a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 1054a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 1055a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 105661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner return MadeChange; 1057a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 105861c6ba85715fdcb66f746678879984151f1e5485Chris Lattner 105961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner// MemCpyOpt::runOnFunction - This is the main transformation entry point for a 106061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner// function. 106161c6ba85715fdcb66f746678879984151f1e5485Chris Lattner// 106261c6ba85715fdcb66f746678879984151f1e5485Chris Lattnerbool MemCpyOpt::runOnFunction(Function &F) { 106336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (skipOptnoneFunction(F)) 106436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 106536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 106661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool MadeChange = false; 10672f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD = &getAnalysis<MemoryDependenceAnalysis>(); 106836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 1069dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DL = DLP ? &DLP->getDataLayout() : nullptr; 1070149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner TLI = &getAnalysis<TargetLibraryInfo>(); 1071a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1072149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner // If we don't have at least memset and memcpy, there is little point of doing 1073149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner // anything here. These are required by a freestanding implementation, so if 1074149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner // even they are disabled, there is no point in trying hard. 1075149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) 1076149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner return false; 1077a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 107861c6ba85715fdcb66f746678879984151f1e5485Chris Lattner while (1) { 107961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (!iterateOnFunction(F)) 108061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner break; 108161c6ba85715fdcb66f746678879984151f1e5485Chris Lattner MadeChange = true; 108261c6ba85715fdcb66f746678879984151f1e5485Chris Lattner } 1083a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1084dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MD = nullptr; 108561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner return MadeChange; 108661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner} 1087