1a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// 2a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 3a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// The LLVM Compiler Infrastructure 4a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 5a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// This file is distributed under the University of Illinois Open Source 6a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// License. See LICENSE.TXT for details. 7a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 8a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 9a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 10a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// This pass performs various transformations related to eliminating memcpy 11a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// calls, or transforming sets of stores into memset's. 12a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson// 13a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 14a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 15de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" 16a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Transforms/Scalar.h" 17de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/ADT/DenseSet.h" 18a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/ADT/SmallVector.h" 19a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/ADT/Statistic.h" 20bb89710dddf967199dfc56e8bf5d28b0003f2ee6Chris Lattner#include "llvm/Analysis/ValueTracking.h" 210b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h" 2236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/GetElementPtrTypeIterator.h" 230b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/GlobalVariable.h" 240b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IRBuilder.h" 25a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson#include "llvm/Support/Debug.h" 26bdff548e4dd577a72094d57b282de4e765643b96Chris Lattner#include "llvm/Support/raw_ostream.h" 2706cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Transforms/Utils/Local.h" 28f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar#include <algorithm> 29a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonusing namespace llvm; 30a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 31dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "memcpyopt" 32dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 33a723d1e48f4a261512c28845c53eda569fa5218cOwen AndersonSTATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); 34a723d1e48f4a261512c28845c53eda569fa5218cOwen AndersonSTATISTIC(NumMemSetInfer, "Number of memsets inferred"); 3505cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan SandsSTATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); 36a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin KramerSTATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); 37a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 3839acdb0200ff78065699509fccfc605f86237350Benjamin Kramerstatic int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, 394c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool &VariableIdxFound, 404c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL) { 41a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Skip over the first indices. 42a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson gep_type_iterator GTI = gep_type_begin(GEP); 43a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = 1; i != Idx; ++i, ++GTI) 44a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /*skip along*/; 45a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 46a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Compute the offset implied by the rest of the indices. 47a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Offset = 0; 48a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { 49a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); 50dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!OpC) 51a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return VariableIdxFound = true; 52a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (OpC->isZero()) continue; // No offset. 53a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 54a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Handle struct indices, which add their field offset to the pointer. 55db125cfaf57cc83e7dd7453de2d509bc8efd0e5eChris Lattner if (StructType *STy = dyn_cast<StructType>(*GTI)) { 564c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 57a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson continue; 58a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 59a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 60a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Otherwise, we have a sequential type like an array or vector. Multiply 61a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the index by the ElementSize. 624c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); 63a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Offset += Size*OpC->getSExtValue(); 64a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 65a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 66a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return Offset; 67a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 68a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 69f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and 70f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2 71f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// might be &A[40]. In this case offset would be -8. 72a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonstatic bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, 734c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL) { 742d5c0cd197454408531cd53e4dd65a431e07ba6fChris Lattner Ptr1 = Ptr1->stripPointerCasts(); 752d5c0cd197454408531cd53e4dd65a431e07ba6fChris Lattner Ptr2 = Ptr2->stripPointerCasts(); 7636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 7736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Handle the trivial case first. 7836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Ptr1 == Ptr2) { 7936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Offset = 0; 8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 8236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 8339acdb0200ff78065699509fccfc605f86237350Benjamin Kramer GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1); 8439acdb0200ff78065699509fccfc605f86237350Benjamin Kramer GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2); 85a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 869fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner bool VariableIdxFound = false; 879fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner 889fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner // If one pointer is a GEP and the other isn't, then see if the GEP is a 899fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner // constant offset from the base, as in "P" and "gep P, 1". 90dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { 914c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL); 929fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner return !VariableIdxFound; 939fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner } 949fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner 95dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { 964c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL); 979fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner return !VariableIdxFound; 989fa11e94b5a7709cf05396420b3b3eaad6fb8e37Chris Lattner } 99a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 100a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical 101a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // base. After that base, they may have some number of common (and 102a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // potentially variable) indices. After that they handle some constant 103a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // offset, which determines their offset from each other. At this point, we 104a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // handle no other case. 105a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) 106a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 107a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 108a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Skip any common indices and track the GEP types. 109a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned Idx = 1; 110a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) 111a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) 112a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson break; 113a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 1144c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL); 1154c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL); 116a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (VariableIdxFound) return false; 117a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 118a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Offset = Offset2-Offset1; 119a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return true; 120a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 121a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 122a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 123f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Represents a range of memset'd bytes with the ByteVal value. 124a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// This allows us to analyze stores like: 125a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+1 126a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+0 127a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+3 128a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// store 0 -> P+2 129a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// which sometimes happens with stores to arrays of structs etc. When we see 130a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// the first store, we make a range [1, 2). The second store extends the range 131a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the 132a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// two ranges into [0, 3) which is memset'able. 133a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonnamespace { 134a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonstruct MemsetRange { 135a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Start/End - A semi range that describes the span that this range covers. 136a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem // The range is closed at the start and open at the end: [Start, End). 137a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson int64_t Start, End; 138a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 139a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// StartPtr - The getelementptr instruction that points to the start of the 140a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// range. 141a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Value *StartPtr; 142a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 143a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// Alignment - The known alignment of the first store. 144a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned Alignment; 145a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 146a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson /// TheStores - The actual stores that make up this range. 14706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner SmallVector<Instruction*, 16> TheStores; 148a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1494c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool isProfitableToUseMemset(const DataLayout &DL) const; 150a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson}; 151a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} // end anon namespace 152a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 1534c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarbool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { 154a4b6fd5be08ad6a1cb2d07aba2814aaa2a626c8dChad Rosier // If we found more than 4 stores to merge or 16 bytes, use memset. 155d8bd26ee240bbc10a230d2ea8f8975d1fd32ba7cChad Rosier if (TheStores.size() >= 4 || End-Start >= 16) return true; 15606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 15706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If there is nothing to merge, don't do anything. 15806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (TheStores.size() < 2) return false; 159a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 16006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If any of the stores are a memset, then it is always good to extend the 16106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // memset. 162f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Instruction *SI : TheStores) 163f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!isa<StoreInst>(SI)) 16406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner return true; 165a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 166a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Assume that the code generator is capable of merging pairs of stores 167a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // together if it wants to. 16806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (TheStores.size() == 2) return false; 169a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 170a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we have fewer than 8 stores, it can still be worthwhile to do this. 171a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // For example, merging 4 i8 stores into an i32 store is useful almost always. 172a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the 173a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memset will be split into 2 32-bit stores anyway) and doing so can 174a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // pessimize the llvm optimizer. 175a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 176a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Since we don't have perfect knowledge here, make some assumptions: assume 1774b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault // the maximum GPR width is the same size as the largest legal integer 1784b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault // size. If so, check to see whether we will end up actually reducing the 1794b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault // number of stores used. 180a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson unsigned Bytes = unsigned(End-Start); 181de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8; 1824b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault if (MaxIntSize == 0) 1834b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault MaxIntSize = 1; 1844b28ee208895d2a9c98b9e63d0c39985500e9291Matt Arsenault unsigned NumPointerStores = Bytes / MaxIntSize; 185a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 186a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Assume the remaining bytes if any are done a byte at a time. 187f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned NumByteStores = Bytes % MaxIntSize; 188a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 189a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we will reduce the # stores (according to this heuristic), do the 190a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 191a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // etc. 192a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return TheStores.size() > NumPointerStores+NumByteStores; 193a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem} 194a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 195a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 196a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonnamespace { 197a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonclass MemsetRanges { 198f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar /// A sorted list of the memset ranges. 199f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SmallVector<MemsetRange, 8> Ranges; 200f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar typedef SmallVectorImpl<MemsetRange>::iterator range_iterator; 20136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const DataLayout &DL; 202a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonpublic: 20336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MemsetRanges(const DataLayout &DL) : DL(DL) {} 204a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 205f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar typedef SmallVectorImpl<MemsetRange>::const_iterator const_iterator; 206a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson const_iterator begin() const { return Ranges.begin(); } 207a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson const_iterator end() const { return Ranges.end(); } 208a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson bool empty() const { return Ranges.empty(); } 209a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 21067a716ab818301fe28068754c879e568c44e62f8Chris Lattner void addInst(int64_t OffsetFromFirst, Instruction *Inst) { 21106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 21206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addStore(OffsetFromFirst, SI); 21306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner else 21406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); 21506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 21606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 21706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner void addStore(int64_t OffsetFromFirst, StoreInst *SI) { 21836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); 219a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 22006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addRange(OffsetFromFirst, StoreSize, 22106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner SI->getPointerOperand(), SI->getAlignment(), SI); 22206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 223a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 22406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { 22506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 22606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); 22767a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 228a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 22906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner void addRange(int64_t Start, int64_t Size, Value *Ptr, 23006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner unsigned Alignment, Instruction *Inst); 23106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner 232a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson}; 233a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 234a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} // end anon namespace 235a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 236a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 237f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Add a new store to the MemsetRanges data structure. This adds a 238a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// new range for the specified store at the specified offset, merging into 239a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// existing ranges as appropriate. 24006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattnervoid MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, 24106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner unsigned Alignment, Instruction *Inst) { 24206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t End = Start+Size; 243a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 244f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start, 245f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; }); 246a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 247a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // We now know that I == E, in which case we didn't find anything to merge 248a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // with, or that Start <= I->End. If End < I->Start or I == E, then we need 249a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // to insert a new range. Handle this now. 250f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (I == Ranges.end() || End < I->Start) { 251a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson MemsetRange &R = *Ranges.insert(I, MemsetRange()); 252a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson R.Start = Start; 253a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson R.End = End; 25406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner R.StartPtr = Ptr; 25506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner R.Alignment = Alignment; 25606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner R.TheStores.push_back(Inst); 257a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return; 258a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 259a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 260a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // This store overlaps with I, add it. 26106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner I->TheStores.push_back(Inst); 262a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 263a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // At this point, we may have an interval that completely contains our store. 264a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If so, just add it to the interval and return. 265a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (I->Start <= Start && I->End >= End) 266a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return; 267a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 268a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Now we know that Start <= I->End and End >= I->Start so the range overlaps 269a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // but is not entirely contained within the range. 270a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 271a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // See if the range extends the start of the range. In this case, it couldn't 272a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // possibly cause it to join the prior range, because otherwise we would have 273a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // stopped on *it*. 274a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (Start < I->Start) { 275a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->Start = Start; 27606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner I->StartPtr = Ptr; 27706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner I->Alignment = Alignment; 278a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 279a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 280a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Now we know that Start <= I->End and Start >= I->Start (so the startpoint 281a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // is in or right at the end of I), and that End >= I->Start. Extend I out to 282a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // End. 283a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (End > I->End) { 284a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->End = End; 2859c0f146d50ccc3ba780d4854b8e14422430013efNick Lewycky range_iterator NextI = I; 286f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar while (++NextI != Ranges.end() && End >= NextI->Start) { 287a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Merge the range in. 288a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); 289a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (NextI->End > I->End) 290a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson I->End = NextI->End; 291a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson Ranges.erase(NextI); 292a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson NextI = I; 293a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 294a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 295a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 296a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 297a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 298de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// MemCpyOptLegacyPass Pass 299a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson//===----------------------------------------------------------------------===// 300a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 301a723d1e48f4a261512c28845c53eda569fa5218cOwen Andersonnamespace { 302de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar class MemCpyOptLegacyPass : public FunctionPass { 303de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MemCpyOptPass Impl; 304a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson public: 305a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson static char ID; // Pass identification, replacement for typeid 306de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MemCpyOptLegacyPass() : FunctionPass(ID) { 307de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry()); 308081c34b725980f995be9080eaec24cd3dfaaf065Owen Anderson } 309a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 31036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool runOnFunction(Function &F) override; 31167a716ab818301fe28068754c879e568c44e62f8Chris Lattner 312a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson private: 313a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // This transformation requires dominator postdominator info 31436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override { 315a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson AU.setPreservesCFG(); 316ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addRequired<AssumptionCacheTracker>(); 31736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AU.addRequired<DominatorTreeWrapperPass>(); 318de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AU.addRequired<MemoryDependenceWrapperPass>(); 319f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AU.addRequired<AAResultsWrapperPass>(); 320ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addRequired<TargetLibraryInfoWrapperPass>(); 321f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AU.addPreserved<GlobalsAAWrapperPass>(); 322de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AU.addPreserved<MemoryDependenceWrapperPass>(); 323a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 324a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 3256948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // Helper functions 32661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); 327d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); 32861c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool processMemCpy(MemCpyInst *M); 329f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner bool processMemMove(MemMoveInst *M); 3306549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, 331f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands uint64_t cpyLen, unsigned cpyAlign, CallInst *C); 3326948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep); 3336948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep); 3346948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep); 3352f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner bool processByValArgument(CallSite CS, unsigned ArgNo); 33667a716ab818301fe28068754c879e568c44e62f8Chris Lattner Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, 33767a716ab818301fe28068754c879e568c44e62f8Chris Lattner Value *ByteVal); 33867a716ab818301fe28068754c879e568c44e62f8Chris Lattner 339a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson bool iterateOnFunction(Function &F); 340a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson }; 341a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 342de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar char MemCpyOptLegacyPass::ID = 0; 343a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 344a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 345f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// The public interface to this file... 346de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarFunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); } 347a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 348de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarINITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", 3492ab36d350293c77fc8941ce1023e4899df7e3a82Owen Anderson false, false) 350ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 35136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesINITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 352de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarINITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 353ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 354f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarINITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 355f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarINITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 356de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarINITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", 3572ab36d350293c77fc8941ce1023e4899df7e3a82Owen Anderson false, false) 358a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 359f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// When scanning forward over instructions, we look for some other patterns to 360f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// fold away. In particular, this looks for stores to neighboring locations of 361f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// memory. If it sees enough consecutive ones, it attempts to merge them 362f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// together into a memcpy/memset. 363de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarInstruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, 364de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *StartPtr, 365de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *ByteVal) { 3664c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = StartInst->getModule()->getDataLayout(); 367a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 368a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Okay, so we now have a single store that can be splatable. Scan to find 369a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // all subsequent stores of the same value to offset from the same pointer. 370a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Join these together into ranges, so we can decide whether contiguous blocks 371a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // are stored. 3724c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar MemsetRanges Ranges(DL); 373a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 374f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BasicBlock::iterator BI(StartInst); 375a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (++BI; !isa<TerminatorInst>(BI); ++BI) { 37606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { 37706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If the instruction is readnone, ignore it, otherwise bail out. We 37806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // don't even allow readonly here because we don't want something like: 379a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). 38006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) 38106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 38206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner continue; 38306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 384a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 38506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { 38606511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // If this is a store, see if we can merge it in. 38756efe24431b045be120d1fd5f6b0aa43a6b01c48Eli Friedman if (!NextStore->isSimple()) break; 388a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 38906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // Check to see if this stored value is of the same byte-splattable value. 39006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) 39106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 392a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 39306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // Check to see if this store is to a constant offset from the start ptr. 39406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t Offset; 3954c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, 3964c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DL)) 39706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 398a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 39906511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner Ranges.addStore(Offset, NextStore); 40006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } else { 40106511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner MemSetInst *MSI = cast<MemSetInst>(BI); 402a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 40306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner if (MSI->isVolatile() || ByteVal != MSI->getValue() || 40406511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner !isa<ConstantInt>(MSI->getLength())) 40506511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 406a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 40706511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner // Check to see if this store is to a constant offset from the start ptr. 40806511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner int64_t Offset; 4094c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL)) 41006511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner break; 411a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 41206511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner Ranges.addMemSet(Offset, MSI); 41306511264f806cf2a55a090a555dc91a2a2e10a36Chris Lattner } 414a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 415a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 416a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we have no ranges, then we just had a single store with nothing that 417a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // could be merged in. This is a very common case of course. 418a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (Ranges.empty()) 419dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 420a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 421a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If we had at least one store that could be merged in, add the starting 422a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // store as well. We try to avoid this unless there is at least something 423a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // interesting as a small compile-time optimization. 42467a716ab818301fe28068754c879e568c44e62f8Chris Lattner Ranges.addInst(0, StartInst); 42567a716ab818301fe28068754c879e568c44e62f8Chris Lattner 42667a716ab818301fe28068754c879e568c44e62f8Chris Lattner // If we create any memsets, we put it right before the first instruction that 42767a716ab818301fe28068754c879e568c44e62f8Chris Lattner // isn't part of the memset block. This ensure that the memset is dominated 42867a716ab818301fe28068754c879e568c44e62f8Chris Lattner // by any addressing instruction needed by the start of the block. 429f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar IRBuilder<> Builder(&*BI); 43067a716ab818301fe28068754c879e568c44e62f8Chris Lattner 431a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Now that we have full information about ranges, loop over the ranges and 432a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // emit memset's for anything big enough to be worthwhile. 433dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Instruction *AMemSet = nullptr; 434f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (const MemsetRange &Range : Ranges) { 435a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 436a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (Range.TheStores.size() == 1) continue; 437a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 438a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // If it is profitable to lower this range to memset, do so now. 4394c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!Range.isProfitableToUseMemset(DL)) 440a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson continue; 441a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 44267a716ab818301fe28068754c879e568c44e62f8Chris Lattner // Otherwise, we do want to transform this! Create a new memset. 443a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Get the starting pointer of the block. 444a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson StartPtr = Range.StartPtr; 445a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 44620adc9dc4650313f017b27d9818eb2176238113dMon P Wang // Determine alignment 44720adc9dc4650313f017b27d9818eb2176238113dMon P Wang unsigned Alignment = Range.Alignment; 44820adc9dc4650313f017b27d9818eb2176238113dMon P Wang if (Alignment == 0) { 449a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem Type *EltType = 45067a716ab818301fe28068754c879e568c44e62f8Chris Lattner cast<PointerType>(StartPtr->getType())->getElementType(); 4514c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Alignment = DL.getABITypeAlignment(EltType); 45220adc9dc4650313f017b27d9818eb2176238113dMon P Wang } 453a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 454a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem AMemSet = 45561db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); 456a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 457cb33fd17cce475a1d47b2695e311b6934ad0ef86David Greene DEBUG(dbgs() << "Replace stores:\n"; 458f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Instruction *SI : Range.TheStores) 459f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar dbgs() << *SI << '\n'; 46067a716ab818301fe28068754c879e568c44e62f8Chris Lattner dbgs() << "With: " << *AMemSet << '\n'); 461b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel 462b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel if (!Range.TheStores.empty()) 463b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); 464b90584ae78a7acc4ac92e3ad52121a10c520b980Devang Patel 465a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Zap all the stores. 466f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Instruction *SI : Range.TheStores) { 467f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MD->removeInstruction(SI); 468f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SI->eraseFromParent(); 4698a629577f89869f9810065dc61afd7e4302d3e46Chris Lattner } 470a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson ++NumMemSetInfer; 471a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 472a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 47367a716ab818301fe28068754c879e568c44e62f8Chris Lattner return AMemSet; 47467a716ab818301fe28068754c879e568c44e62f8Chris Lattner} 47567a716ab818301fe28068754c879e568c44e62f8Chris Lattner 476de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, 477de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const LoadInst *LI) { 478de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned StoreAlign = SI->getAlignment(); 479de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!StoreAlign) 480de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); 481de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned LoadAlign = LI->getAlignment(); 482de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!LoadAlign) 483de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LoadAlign = DL.getABITypeAlignment(LI->getType()); 48467a716ab818301fe28068754c879e568c44e62f8Chris Lattner 485de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return std::min(StoreAlign, LoadAlign); 486de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 487de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 488de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// This method try to lift a store instruction before position P. 489de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// It will lift the store and its argument + that anything that 490de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// may alias with these. 491de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// The method returns true if it was successful. 492de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P) { 493de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If the store alias this position, early bail out. 494de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MemoryLocation StoreLoc = MemoryLocation::get(SI); 495de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (AA.getModRefInfo(P, StoreLoc) != MRI_NoModRef) 496de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 497de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 498de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Keep track of the arguments of all instruction we plan to lift 499de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // so we can make sure to lift them as well if apropriate. 500de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DenseSet<Instruction*> Args; 501de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) 502de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Ptr->getParent() == SI->getParent()) 503de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Args.insert(Ptr); 504de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 505de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Instruction to lift before P. 506de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<Instruction*, 8> ToLift; 507de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 508de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Memory locations of lifted instructions. 509de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<MemoryLocation, 8> MemLocs; 510de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MemLocs.push_back(StoreLoc); 511de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 512de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Lifted callsites. 513de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<ImmutableCallSite, 8> CallSites; 514de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 515de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) { 516de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *C = &*I; 517de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 518de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool MayAlias = AA.getModRefInfo(C) != MRI_NoModRef; 519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 520de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool NeedLift = false; 521de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Args.erase(C)) 522de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar NeedLift = true; 523de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar else if (MayAlias) { 524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar NeedLift = std::any_of(MemLocs.begin(), MemLocs.end(), 525de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar [C, &AA](const MemoryLocation &ML) { 526de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AA.getModRefInfo(C, ML); 527de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar }); 528de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 529de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!NeedLift) 530de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar NeedLift = std::any_of(CallSites.begin(), CallSites.end(), 531de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar [C, &AA](const ImmutableCallSite &CS) { 532de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AA.getModRefInfo(C, CS); 533de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar }); 534de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 535de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 536de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!NeedLift) 537de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 538de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 539de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MayAlias) { 540de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto CS = ImmutableCallSite(C)) { 541de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we can't lift this before P, it's game over. 542de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (AA.getModRefInfo(P, CS) != MRI_NoModRef) 543de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 544de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 545de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CallSites.push_back(CS); 546de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) { 547de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we can't lift this before P, it's game over. 548de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto ML = MemoryLocation::get(C); 549de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (AA.getModRefInfo(P, ML) != MRI_NoModRef) 550de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 551de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 552de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MemLocs.push_back(ML); 553de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else 554de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We don't know how to lift this instruction. 555de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 556de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 557de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 558de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ToLift.push_back(C); 559de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k) 560de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) 561de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (A->getParent() == SI->getParent()) 562de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Args.insert(A); 563de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 564de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 565de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We made it, we need to lift 566de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto *I : reverse(ToLift)) { 567de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); 568de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar I->moveBefore(P); 569de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 570de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 571de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 572de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 573de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 574de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { 57556efe24431b045be120d1fd5f6b0aa43a6b01c48Eli Friedman if (!SI->isSimple()) return false; 576f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 577f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Avoid merging nontemporal stores since the resulting 578f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // memcpy/memset would not be able to preserve the nontemporal hint. 579f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // In theory we could teach how to propagate the !nontemporal metadata to 580f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // memset calls. However, that change would force the backend to 581f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // conservatively expand !nontemporal memset calls back to sequences of 582f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // store instructions (effectively undoing the merging). 583f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (SI->getMetadata(LLVMContext::MD_nontemporal)) 584f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 585f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 5864c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = SI->getModule()->getDataLayout(); 58767a716ab818301fe28068754c879e568c44e62f8Chris Lattner 588de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Load to store forwarding can be interpreted as memcpy. 58967a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { 59056efe24431b045be120d1fd5f6b0aa43a6b01c48Eli Friedman if (LI->isSimple() && LI->hasOneUse() && 5915d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman LI->getParent() == SI->getParent()) { 592de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 593de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *T = LI->getType(); 594de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (T->isAggregateType()) { 595de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AliasAnalysis &AA = LookupAliasAnalysis(); 596de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MemoryLocation LoadLoc = MemoryLocation::get(LI); 597de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 598de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We use alias analysis to check if an instruction may store to 599de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // the memory we load from in between the load and the store. If 600de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // such an instruction is found, we try to promote there instead 601de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // of at the store position. 602de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction *P = SI; 603de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { 604de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (AA.getModRefInfo(&I, LoadLoc) & MRI_Mod) { 605de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar P = &I; 606de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 607de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 608de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 609de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 610de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We found an instruction that may write to the loaded memory. 611de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We can try to promote at this position instead of the store 612de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // position if nothing alias the store memory after this and the store 613de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // destination is not in the range. 614de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (P && P != SI) { 615de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!moveUp(AA, SI, P)) 616de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar P = nullptr; 617de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 618de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 619de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If a valid insertion position is found, then we can promote 620de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // the load/store pair to a memcpy. 621de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (P) { 622de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we load from memory that may alias the memory we store to, 623de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // memmove must be used to preserve semantic. If not, memcpy can 624de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // be used. 625de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool UseMemMove = false; 626de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc)) 627de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar UseMemMove = true; 628de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 629de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Align = findCommonAlignment(DL, SI, LI); 630de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar uint64_t Size = DL.getTypeStoreSize(T); 631de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 632de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar IRBuilder<> Builder(P); 633de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction *M; 634de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (UseMemMove) 635de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar M = Builder.CreateMemMove(SI->getPointerOperand(), 636de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LI->getPointerOperand(), Size, 637de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Align, SI->isVolatile()); 638de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar else 639de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar M = Builder.CreateMemCpy(SI->getPointerOperand(), 640de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LI->getPointerOperand(), Size, 641de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Align, SI->isVolatile()); 642de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 643de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI 644de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar << " => " << *M << "\n"); 645de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 646de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MD->removeInstruction(SI); 647de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SI->eraseFromParent(); 648de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MD->removeInstruction(LI); 649de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LI->eraseFromParent(); 650de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ++NumMemCpyInstr; 651de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 652de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Make sure we do not invalidate the iterator. 653de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BBI = M->getIterator(); 654de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 655de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 656de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 657de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 658de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Detect cases where we're performing call slot forwarding, but 659de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // happen to be using a load-store pair to implement it, rather than 660de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // a memcpy. 66170d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman MemDepResult ldep = MD->getDependency(LI); 662dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CallInst *C = nullptr; 66370d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) 66470d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman C = dyn_cast<CallInst>(ldep.getInst()); 66570d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman 66670d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman if (C) { 66770d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman // Check that nothing touches the dest of the "copy" between 66870d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman // the call and the store. 669de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *CpyDest = SI->getPointerOperand()->stripPointerCasts(); 670de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool CpyDestIsLocal = isa<AllocaInst>(CpyDest); 671de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AliasAnalysis &AA = LookupAliasAnalysis(); 672f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemoryLocation StoreLoc = MemoryLocation::get(SI); 673f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator(); 674f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar I != E; --I) { 675f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { 676dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines C = nullptr; 6775d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman break; 6785d40ef2b1df29e726cfa093fe0acd0aa97161236Eli Friedman } 679de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // The store to dest may never happen if an exception can be thrown 680de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // between the load and the store. 681de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (I->mayThrow() && !CpyDestIsLocal) { 682de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar C = nullptr; 683de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 684de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 68570d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman } 68670d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman } 68770d893e84b0de21205ad042c6c00148d0e3cd74eEli Friedman 68867a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (C) { 6894c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool changed = performCallSlotOptzn( 6904c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar LI, SI->getPointerOperand()->stripPointerCasts(), 6914c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar LI->getPointerOperand()->stripPointerCasts(), 6924c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DL.getTypeStoreSize(SI->getOperand(0)->getType()), 693de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar findCommonAlignment(DL, SI, LI), C); 69467a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (changed) { 69567a716ab818301fe28068754c879e568c44e62f8Chris Lattner MD->removeInstruction(SI); 69667a716ab818301fe28068754c879e568c44e62f8Chris Lattner SI->eraseFromParent(); 697f42685004c997a4a4728cbcd9e6be1ee1d6b418fChris Lattner MD->removeInstruction(LI); 69867a716ab818301fe28068754c879e568c44e62f8Chris Lattner LI->eraseFromParent(); 69967a716ab818301fe28068754c879e568c44e62f8Chris Lattner ++NumMemCpyInstr; 70067a716ab818301fe28068754c879e568c44e62f8Chris Lattner return true; 70167a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 70267a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 70367a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 70467a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 705a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 70667a716ab818301fe28068754c879e568c44e62f8Chris Lattner // There are two cases that are interesting for this code to handle: memcpy 70767a716ab818301fe28068754c879e568c44e62f8Chris Lattner // and memset. Right now we only handle memset. 708a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 70967a716ab818301fe28068754c879e568c44e62f8Chris Lattner // Ensure that the value being stored is something that can be memset'able a 71067a716ab818301fe28068754c879e568c44e62f8Chris Lattner // byte at a time like "0" or "-1" or any width, as well as things like 71167a716ab818301fe28068754c879e568c44e62f8Chris Lattner // 0xA0A0A0A0 and 0.0. 712de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *V = SI->getOperand(0); 713de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Value *ByteVal = isBytewiseValue(V)) { 71467a716ab818301fe28068754c879e568c44e62f8Chris Lattner if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), 71567a716ab818301fe28068754c879e568c44e62f8Chris Lattner ByteVal)) { 716f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BBI = I->getIterator(); // Don't invalidate iterator. 71767a716ab818301fe28068754c879e568c44e62f8Chris Lattner return true; 71867a716ab818301fe28068754c879e568c44e62f8Chris Lattner } 719a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 720de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we have an aggregate, we try to promote it to memset regardless 721de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // of opportunity for merging as it can expose optimization opportunities 722de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // in subsequent passes. 723de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *T = V->getType(); 724de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (T->isAggregateType()) { 725de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar uint64_t Size = DL.getTypeStoreSize(T); 726de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Align = SI->getAlignment(); 727de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Align) 728de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Align = DL.getABITypeAlignment(T); 729de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar IRBuilder<> Builder(SI); 730de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, 731de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Size, Align, SI->isVolatile()); 732de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 733de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); 734de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 735de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MD->removeInstruction(SI); 736de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SI->eraseFromParent(); 737de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar NumMemSetInfer++; 738de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 739de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Make sure we do not invalidate the iterator. 740de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BBI = M->getIterator(); 741de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 742de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 743de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 744de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 74567a716ab818301fe28068754c879e568c44e62f8Chris Lattner return false; 746a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 747a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 748de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { 749d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner // See if there is another memset or store neighboring this memset which 750d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner // allows us to widen out the memset to do a single larger store. 7510468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile()) 7520468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(), 7530468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner MSI->getValue())) { 754f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BBI = I->getIterator(); // Don't invalidate iterator. 7550468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner return true; 7560468e3e2654cdd0ede16efa025161ce39785eb8dChris Lattner } 757d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner return false; 758d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner} 759d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner 760a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 761f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Takes a memcpy and a call that it depends on, 762a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// and checks for the possibility of a call slot optimization by having 763a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson/// the call write its result directly into the destination of the memcpy. 764de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, 765de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *cpySrc, uint64_t cpyLen, 766de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned cpyAlign, CallInst *C) { 767a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // The general transformation to keep in mind is 768a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 769a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // call @func(..., src, ...) 770a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memcpy(dest, src, ...) 771a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 772a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // -> 773a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 774a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memcpy(dest, src, ...) 775a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // call @func(..., dest, ...) 776a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // 777a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Since moving the memcpy is technically awkward, we additionally check that 778a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // src only holds uninitialized values at the moment of the call, meaning that 779a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the memcpy can be discarded rather than moved. 780a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 781de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Lifetime marks shouldn't be operated on. 782de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Function *F = C->getCalledFunction()) 783de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start) 784de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 785de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 786a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Deliberately get the source and destination with bitcasts stripped away, 787a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // because we'll need to do type comparisons based on the underlying type. 7887d3056b16038a6a09c452c0dfcc3c8f4e421506aGabor Greif CallSite CS(C); 789a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 790a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Require that src be an alloca. This simplifies the reasoning considerably. 79161c6ba85715fdcb66f746678879984151f1e5485Chris Lattner AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); 792a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!srcAlloca) 793a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 794a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 79561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); 796a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!srcArraySize) 797a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 798a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 7994c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = cpy->getModule()->getDataLayout(); 8004c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) * 8014c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar srcArraySize->getZExtValue(); 802a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 8036549121c660dfd18361cd3daf6c766bee80d3097Owen Anderson if (cpyLen < srcSize) 804a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 805a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 806a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Check that accessing the first srcSize bytes of dest will not cause a 807a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // trap. Otherwise the transform is invalid since it might cause a trap 808a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // to occur earlier than it otherwise would. 80961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { 810a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // The destination is an alloca. Check it is larger than srcSize. 81161c6ba85715fdcb66f746678879984151f1e5485Chris Lattner ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); 812a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!destArraySize) 813a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 814a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 8154c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) * 8164c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar destArraySize->getZExtValue(); 817a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 818a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (destSize < srcSize) 819a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 82061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { 821de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // The store to dest may never happen if the call can throw. 822de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (C->mayThrow()) 823de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 824de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 82537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (A->getDereferenceableBytes() < srcSize) { 82637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // If the destination is an sret parameter then only accesses that are 82737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // outside of the returned struct type can trap. 82837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!A->hasStructRetAttr()) 82937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 830a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 83137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Type *StructTy = cast<PointerType>(A->getType())->getElementType(); 83237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!StructTy->isSized()) { 83337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The call may never return and hence the copy-instruction may never 83437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // be executed, and therefore it's not safe to say "the destination 83537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // has at least <cpyLen> bytes, as implied by the copy-instruction", 83637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 83737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 8389792b646c68d0dcee4049662091f1496b4c85ce7Shuxin Yang 8394c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar uint64_t destSize = DL.getTypeAllocSize(StructTy); 84037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (destSize < srcSize) 84137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 84237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 843a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } else { 844a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 845a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 846a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 8473372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands // Check that dest points to memory that is at least as aligned as src. 8483372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands unsigned srcAlign = srcAlloca->getAlignment(); 8493372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands if (!srcAlign) 8504c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType()); 8513372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands bool isDestSufficientlyAligned = srcAlign <= cpyAlign; 8523372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands // If dest is not aligned enough and we can't increase its alignment then 8533372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands // bail out. 8543372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) 8553372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands return false; 8563372c5a50fba4e4d277ba627a6b711e3df5f493dDuncan Sands 857a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Check that src is not accessed except via the call and the memcpy. This 858a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // guarantees that it holds only undefined values when passed in (so the final 859a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // memcpy can be dropped), that it is not read or written between the call and 860a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the memcpy, and that writing beyond the end of it is undefined. 86136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(), 86236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines srcAlloca->user_end()); 863a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson while (!srcUseList.empty()) { 86436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines User *U = srcUseList.pop_back_val(); 865a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 86636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) { 86736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (User *UU : U->users()) 86836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines srcUseList.push_back(UU); 86937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 87037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 87137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) { 87237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!G->hasAllZeroIndices()) 873009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson return false; 87437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 87537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (User *UU : U->users()) 87637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines srcUseList.push_back(UU); 87737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 878a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 87937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U)) 88037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (IT->getIntrinsicID() == Intrinsic::lifetime_start || 88137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines IT->getIntrinsicID() == Intrinsic::lifetime_end) 88237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 88337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 88437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (U != C && U != cpy) 88537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 886a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 887a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 88837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Check that src isn't captured by the called function since the 88937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // transformation can cause aliasing issues in that case. 89037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 89137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i)) 89237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 89337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 894a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Since we're changing the parameter to the callsite, we need to make sure 895a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // that what would be the new parameter dominates the callsite. 896de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DominatorTree &DT = LookupDomTree(); 89761c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) 898a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson if (!DT.dominates(cpyDestInst, C)) 899a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 900a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 901a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // In addition to knowing that the call does not access src in some 902a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // unexpected manner, for example via a global, which we deduce from 903a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // the use analysis, we also need to know that it does not sneakily 904a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // access dest. We rely on AA to figure this out for us. 905de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AliasAnalysis &AA = LookupAliasAnalysis(); 906f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize); 9073a884f5c17ac32e34e7e62b4602a0d73eeda1ce8Chad Rosier // If necessary, perform additional analysis. 908f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (MR != MRI_NoModRef) 9093a884f5c17ac32e34e7e62b4602a0d73eeda1ce8Chad Rosier MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT); 910f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (MR != MRI_NoModRef) 911a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 912a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 913a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // All the checks have passed, so do the transformation. 91412cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson bool changedArgument = false; 915a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson for (unsigned i = 0; i < CS.arg_size(); ++i) 916009e4f760969e3530cc2641a9599e646a20580c2Owen Anderson if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { 9177508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest 9187508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), 9197508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands cpyDest->getName(), C); 92012cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson changedArgument = true; 9217508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands if (CS.getArgument(i)->getType() == Dest->getType()) 9227508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands CS.setArgument(i, Dest); 92361c6ba85715fdcb66f746678879984151f1e5485Chris Lattner else 9247508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands CS.setArgument(i, CastInst::CreatePointerCast(Dest, 9257508f946bc4b5022cc4612c8c7492f2e23043976Duncan Sands CS.getArgument(i)->getType(), Dest->getName(), C)); 926a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 927a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 92812cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson if (!changedArgument) 92912cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson return false; 93012cb36c11564e2a7cf85b4b29bddab5c5fd63cf5Owen Anderson 931f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands // If the destination wasn't sufficiently aligned then increase its alignment. 932f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands if (!isDestSufficientlyAligned) { 933f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!"); 934f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands cast<AllocaInst>(cpyDest)->setAlignment(srcAlign); 935f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands } 936f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands 937a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // Drop any cached information about the call, because we may have changed 938a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // its dependence information by changing its parameter. 9392f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(C); 940a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 941ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Update AA metadata 942ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be 943ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // handled here, but combineMetadata doesn't support them yet 944f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, 945f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar LLVMContext::MD_noalias, 946f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar LLVMContext::MD_invariant_group}; 947ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines combineMetadata(C, cpy, KnownIDs); 948ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 9492f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Remove the memcpy. 9502f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(cpy); 951fe60104ac97f3a8736dcfbfdf9547c7b7cc7b951Dan Gohman ++NumMemCpyInstr; 952a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 953a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return true; 954a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 955a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 956f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// We've found that the (upward scanning) memory dependence of memcpy 'M' is 957f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can. 958de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, 959de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MemCpyInst *MDep) { 960a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // We can only transforms memcpy's where the dest of one is the source of the 96143f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // other. 9622f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (M->getSource() != MDep->getDest() || MDep->isVolatile()) 963a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson return false; 964a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 965f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // If dep instruction is reading from our current input, then it is a noop 966f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // transfer and substituting the input won't change this instruction. Just 967f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // ignore the input and let someone else zap MDep. This handles cases like: 968f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // memcpy(a <- a) 969f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner // memcpy(b <- a) 970f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner if (M->getSource() == MDep->getSource()) 971f7f35467a9aac818bd5813c17e80d7efb66dadd7Chris Lattner return false; 972a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 9737a2bdde0a0eebcd2125055e0eacaca040f0b766cChris Lattner // Second, the length of the memcpy's must be the same, or the preceding one 974a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson // must be larger than the following one. 9758fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); 9768fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); 9778fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) 9788fb25c53bdc22a1f480ac0a6c0215a23f397deb3Dan Gohman return false; 979a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 980de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AliasAnalysis &AA = LookupAliasAnalysis(); 981604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner 982604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // Verify that the copied-from memory doesn't change in between the two 983604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // transfers. For example, in: 984604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // memcpy(a <- b) 985604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // *b = 42; 986604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // memcpy(c <- a) 987604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // It would be invalid to transform the second memcpy into memcpy(c <- b). 988604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // 989604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // TODO: If the code between M and MDep is transparent to the destination "c", 990604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // then we could still perform the xform by moving M up to the first memcpy. 991604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // 992604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // NOTE: This is conservative, it will stop on any read from the source loc, 993604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // not just the defining memcpy. 994f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemDepResult SourceDep = 995f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, 996f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar M->getIterator(), M->getParent()); 997604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) 998604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner return false; 999a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 10005a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner // If the dest of the second might alias the source of the first, then the 10015a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner // source and dest might overlap. We still want to eliminate the intermediate 10025a7aeaa01904b9b0adf256108f302f8961295754Chris Lattner // value, but we have to generate a memmove instead of memcpy. 100361db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner bool UseMemMove = false; 10046948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (!AA.isNoAlias(MemoryLocation::getForDest(M), 10056948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar MemoryLocation::getForSource(MDep))) 100661db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner UseMemMove = true; 1007a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 10082f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // If all checks passed, then we can transform M. 1009a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 101004fcbf954fef6d9866b5120f406e7401dc9aa29fEric Christopher // Make sure to use the lesser of the alignment of the source and the dest 101104fcbf954fef6d9866b5120f406e7401dc9aa29fEric Christopher // since we're changing where we're reading from, but don't want to increase 101204fcbf954fef6d9866b5120f406e7401dc9aa29fEric Christopher // the alignment past what can be read from or written to. 1013c69a00047013a0e2e07ae44c38e013a7d905b10eEric Christopher // TODO: Is this worth it if we're creating a less aligned memcpy? For 1014c69a00047013a0e2e07ae44c38e013a7d905b10eEric Christopher // example we could be moving from movaps -> movq on x86. 1015d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); 1016a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 101761db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner IRBuilder<> Builder(M); 101861db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner if (UseMemMove) 101961db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), 102061db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Align, M->isVolatile()); 102161db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner else 102261db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), 102361db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner Align, M->isVolatile()); 1024d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner 1025604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // Remove the instruction we're replacing. 10262f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(M); 1027d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner M->eraseFromParent(); 1028d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner ++NumMemCpyInstr; 1029d528be6636539567194981a8c0f8b90220bec0a5Chris Lattner return true; 1030a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 1031a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 10326948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// We've found that the (upward scanning) memory dependence of \p MemCpy is 10336948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that 10346948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// weren't copied over by \p MemCpy. 10356948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// 10366948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// In other words, transform: 10376948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// \code 10386948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// memset(dst, c, dst_size); 10396948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// memcpy(dst, src, src_size); 10406948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// \endcode 10416948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// into: 10426948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// \code 10436948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// memcpy(dst, src, src_size); 10446948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size); 10456948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// \endcode 1046de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, 1047de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MemSetInst *MemSet) { 10486948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // We can only transform memset/memcpy with the same destination. 10496948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (MemSet->getDest() != MemCpy->getDest()) 10506948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return false; 10516948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 10526948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // Check that there are no other dependencies on the memset destination. 1053f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemDepResult DstDepInfo = 1054f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false, 1055f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemCpy->getIterator(), MemCpy->getParent()); 10566948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (DstDepInfo.getInst() != MemSet) 10576948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return false; 10586948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 10596948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // Use the same i8* dest as the memcpy, killing the memset dest if different. 10606948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Value *Dest = MemCpy->getRawDest(); 10616948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Value *DestSize = MemSet->getLength(); 10626948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Value *SrcSize = MemCpy->getLength(); 10636948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 10646948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // By default, create an unaligned memset. 10656948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar unsigned Align = 1; 10666948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // If Dest is aligned, and SrcSize is constant, use the minimum alignment 10676948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // of the sum. 10686948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar const unsigned DestAlign = 10696948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar std::max(MemSet->getAlignment(), MemCpy->getAlignment()); 10706948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (DestAlign > 1) 10716948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize)) 10726948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); 10736948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 10746948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar IRBuilder<> Builder(MemCpy); 10756948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 10766948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // If the sizes have different types, zext the smaller one. 10776948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (DestSize->getType() != SrcSize->getType()) { 10786948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (DestSize->getType()->getIntegerBitWidth() > 10796948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar SrcSize->getType()->getIntegerBitWidth()) 10806948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType()); 10816948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar else 10826948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar DestSize = Builder.CreateZExt(DestSize, SrcSize->getType()); 10836948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar } 10846948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 10856948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Value *MemsetLen = 10866948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize), 10876948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar ConstantInt::getNullValue(DestSize->getType()), 10886948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Builder.CreateSub(DestSize, SrcSize)); 10896948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1), 10906948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar MemsetLen, Align); 10916948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 10926948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar MD->removeInstruction(MemSet); 10936948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar MemSet->eraseFromParent(); 10946948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return true; 10956948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar} 10966948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 10976948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// Transform memcpy to memset when its source was just memset. 10986948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// In other words, turn: 10996948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// \code 11006948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// memset(dst1, c, dst1_size); 11016948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// memcpy(dst2, dst1, dst2_size); 11026948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// \endcode 11036948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// into: 11046948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// \code 11056948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// memset(dst1, c, dst1_size); 11066948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// memset(dst2, c, dst2_size); 11076948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// \endcode 11086948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// When dst2_size <= dst1_size. 11096948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// 11106948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar/// The \p MemCpy must have a Constant length. 1111de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, 1112de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MemSetInst *MemSet) { 11136948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // This only makes sense on memcpy(..., memset(...), ...). 11146948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (MemSet->getRawDest() != MemCpy->getRawSource()) 11156948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return false; 11166948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 11176948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength()); 11186948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength()); 11196948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // Make sure the memcpy doesn't read any more than what the memset wrote. 11206948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // Don't worry about sizes larger than i64. 11216948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue()) 11226948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return false; 11236948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 11246948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar IRBuilder<> Builder(MemCpy); 11256948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), 11266948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar CopySize, MemCpy->getAlignment()); 11276948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return true; 11286948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar} 112943f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 1130f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Perform simplification of memcpy's. If we have memcpy A 113143f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite 113243f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// B to be a memcpy from X to Z (or potentially a memmove, depending on 113343f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// circumstances). This allows later passes to remove the first memcpy 113443f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner/// altogether. 1135de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::processMemCpy(MemCpyInst *M) { 113636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We can only optimize non-volatile memcpy's. 113736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (M->isVolatile()) return false; 113843f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 11398fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner // If the source and destination of the memcpy are the same, then zap it. 11408fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner if (M->getSource() == M->getDest()) { 11418fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner MD->removeInstruction(M); 11428fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner M->eraseFromParent(); 11438fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner return false; 11448fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner } 1145a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer 1146a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer // If copying from a constant, try to turn the memcpy into a memset. 114749c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) 11483fed0d917d23f4151cb8d98f122edd3229daf6abBenjamin Kramer if (GV->isConstant() && GV->hasDefinitiveInitializer()) 114949c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { 115061db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner IRBuilder<> Builder(M); 115136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), 115261db1f56d0b717d67557bbb2a9d83af1449458cbChris Lattner M->getAlignment(), false); 115349c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer MD->removeInstruction(M); 115449c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer M->eraseFromParent(); 115549c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer ++NumCpyToSet; 115649c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer return true; 115749c7e3e290e4633971cbeac996d8cffbe2aedc1dBenjamin Kramer } 1158a112087e4298ca8ec1bc8aef8a2b272e49faa7acBenjamin Kramer 11596948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar MemDepResult DepInfo = MD->getDependency(M); 11606948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 11616948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // Try to turn a partially redundant memset + memcpy into 11626948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // memcpy + smaller memset. We don't need the memcpy size for this. 11636948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (DepInfo.isClobber()) 11646948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst())) 11656948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (processMemSetMemCpyDependence(M, MDep)) 11666948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return true; 11676948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 116836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // The optimizations after this point require the memcpy size. 116936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); 1170dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!CopySize) return false; 117136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 11726948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // There are four possible optimizations we can do for memcpy: 117343f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // a) memcpy-memcpy xform which exposes redundance for DSE. 117443f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner // b) call-memcpy xform for return slot optimization. 117536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // c) memcpy from freshly alloca'd space or space that has just started its 117636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // lifetime copies undefined data, and we can therefore eliminate the 117736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // memcpy in favor of the data that was already at the destination. 11786948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // d) memcpy from a just-memset'd source can be turned into memset. 117936c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (DepInfo.isClobber()) { 118036c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { 118136c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (performCallSlotOptzn(M, M->getDest(), M->getSource(), 1182f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands CopySize->getZExtValue(), M->getAlignment(), 1183f58747517cf2ba55c6f89a5ddc4de63be9e1362fDuncan Sands C)) { 118436c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky MD->removeInstruction(M); 118536c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky M->eraseFromParent(); 118636c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky return true; 118736c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky } 11888fdca6a8738c1ad7091137688ee48c9e623b75bbChris Lattner } 118943f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner } 1190b83a67e1e3fe210bd99a82eccd3dc5b1b44f1503Ahmed Charles 1191f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemoryLocation SrcLoc = MemoryLocation::getForSource(M); 1192f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( 1193f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SrcLoc, true, M->getIterator(), M->getParent()); 11946948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 119536c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (SrcDepInfo.isClobber()) { 119636c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) 11976948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return processMemCpyMemCpyDependence(M, MDep); 119836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else if (SrcDepInfo.isDef()) { 119936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Instruction *I = SrcDepInfo.getInst(); 120036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool hasUndefContents = false; 120136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 120236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isa<AllocaInst>(I)) { 120336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines hasUndefContents = true; 120436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 120536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (II->getIntrinsicID() == Intrinsic::lifetime_start) 120636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0))) 120736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (LTSize->getZExtValue() >= CopySize->getZExtValue()) 120836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines hasUndefContents = true; 120936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 121036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 121136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (hasUndefContents) { 121236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MD->removeInstruction(M); 121336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines M->eraseFromParent(); 121436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NumMemCpyInstr; 121536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 121636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 121736c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky } 121836c7e6c36cce7896b762e79a75b9a29e6a39d48cNick Lewycky 12196948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (SrcDepInfo.isClobber()) 12206948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst())) 12216948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (performMemCpyToMemSetOptzn(M, MDep)) { 12226948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar MD->removeInstruction(M); 12236948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar M->eraseFromParent(); 12246948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar ++NumCpyToSet; 12256948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return true; 12266948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar } 12276948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 122843f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner return false; 122943f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner} 123043f8e43eb2a166f50c3a077040d8bdb24104433aChris Lattner 1231f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed 1232f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// not to alias. 1233de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::processMemMove(MemMoveInst *M) { 1234de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AliasAnalysis &AA = LookupAliasAnalysis(); 1235f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner 1236149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner if (!TLI->has(LibFunc::memmove)) 1237149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner return false; 1238a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1239f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // See if the pointers alias. 12406948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (!AA.isNoAlias(MemoryLocation::getForDest(M), 12416948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar MemoryLocation::getForSource(M))) 1242f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner return false; 1243a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1244de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M 1245de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar << "\n"); 1246a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1247f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // If not, then we know we can transform this. 12485fdd6c8793462549e3593890ec61573da06e3346Jay Foad Type *ArgTys[3] = { M->getRawDest()->getType(), 12495fdd6c8793462549e3593890ec61573da06e3346Jay Foad M->getRawSource()->getType(), 12505fdd6c8793462549e3593890ec61573da06e3346Jay Foad M->getLength()->getType() }; 1251f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(), 1252f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Intrinsic::memcpy, ArgTys)); 125305cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan Sands 1254f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // MemDep may have over conservative information about this instruction, just 1255f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner // conservatively flush it from the cache. 12562f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MD->removeInstruction(M); 125705cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan Sands 125805cd03b33559732f8ed55e5ff7554fd06d59eb6aDuncan Sands ++NumMoveToCpy; 1259f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner return true; 1260f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner} 1261a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1262f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// This is called on every byval argument in call sites. 1263de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) { 12644c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout(); 1265604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // Find out what feeds this byval argument. 12662f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner Value *ByValArg = CS.getArgument(ArgNo); 1267865703e53afb258805c9e35ea3cb346fa7eff115Nick Lewycky Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); 12684c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); 1269f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemDepResult DepInfo = MD->getPointerDependencyFrom( 1270f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemoryLocation(ByValArg, ByValSize), true, 1271f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar CS.getInstruction()->getIterator(), CS.getInstruction()->getParent()); 12722f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (!DepInfo.isClobber()) 12732f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return false; 12742f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 12752f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by 12762f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // a memcpy, see if we can byval from the source of the memcpy instead of the 12772f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // result. 12782f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); 1279dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!MDep || MDep->isVolatile() || 12802f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ByValArg->stripPointerCasts() != MDep->getDest()) 12812f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return false; 1282a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 12832f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // The length of the memcpy must be larger or equal to the size of the byval. 12842f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); 1285dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!C1 || C1->getValue().getZExtValue() < ByValSize) 12862f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return false; 12872f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 1288b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // Get the alignment of the byval. If the call doesn't specify the alignment, 1289b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // then it is some target specific value that we can't know. 12902f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); 1291b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner if (ByValAlign == 0) return false; 1292a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1293b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // If it is greater than the memcpy, then we check to see if we can force the 1294b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner // source of the memcpy to the alignment we need. If we fail, we bail out. 1295de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AssumptionCache &AC = LookupAssumptionCache(); 1296de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DominatorTree &DT = LookupDomTree(); 1297b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner if (MDep->getAlignment() < ByValAlign && 12984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, 12994c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar CS.getInstruction(), &AC, &DT) < ByValAlign) 1300b3f0673d52b72f34434dec13c4e2044c82012ef6Chris Lattner return false; 1301a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 13022f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Verify that the copied-from memory doesn't change in between the memcpy and 13032f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // the byval call. 13042f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // memcpy(a <- b) 13052f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // *b = 42; 13062f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // foo(*a) 13072f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // It would be invalid to transform the second memcpy into foo(*b). 1308604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // 1309604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // NOTE: This is conservative, it will stop on any read from the source loc, 1310604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner // not just the defining memcpy. 1311f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemDepResult SourceDep = MD->getPointerDependencyFrom( 1312f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemoryLocation::getForSource(MDep), false, 1313f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar CS.getInstruction()->getIterator(), MDep->getParent()); 1314604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) 1315604f6fe553eb430c6d991f72baa3633842759b49Chris Lattner return false; 1316a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 13172f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner Value *TmpCast = MDep->getSource(); 13182f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (MDep->getSource()->getType() != ByValArg->getType()) 13192f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), 13202f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner "tmpcast", CS.getInstruction()); 1321a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1322de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n" 13232f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner << " " << *MDep << "\n" 13242f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner << " " << *CS.getInstruction() << "\n"); 1325a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 13262f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Otherwise we're good! Update the byval argument. 13272f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner CS.setArgument(ArgNo, TmpCast); 13282f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner ++NumMemCpyInstr; 13292f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner return true; 13302f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner} 13312f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 1332de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// Executes one iteration of MemCpyOptPass. 1333de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::iterateOnFunction(Function &F) { 133461c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool MadeChange = false; 1335a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson 133661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner // Walk all instruction in the function. 1337de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (BasicBlock &BB : F) { 1338de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { 133961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner // Avoid invalidating the iterator. 1340f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Instruction *I = &*BI++; 1341a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 13422f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner bool RepeatInstruction = false; 1343a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1344a8bd65835be9e1ce07f5006e92625ec4e9fa387aOwen Anderson if (StoreInst *SI = dyn_cast<StoreInst>(I)) 134561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner MadeChange |= processStore(SI, BI); 1346d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (MemSetInst *M = dyn_cast<MemSetInst>(I)) 1347d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner RepeatInstruction = processMemSet(M, BI); 1348d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) 13492f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner RepeatInstruction = processMemCpy(M); 1350d90a192279c8580c4e80d1ee102d1317ce2aaffaChris Lattner else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) 13512f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner RepeatInstruction = processMemMove(M); 13520c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar else if (auto CS = CallSite(I)) { 13532f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 1354173862e5468fbcf4b022b9088d2c81b25c2d60c5Nick Lewycky if (CS.isByValArgument(i)) 13552f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MadeChange |= processByValArgument(CS, i); 13562f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner } 13572f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner 13582f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner // Reprocess the instruction if desired. 13592f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner if (RepeatInstruction) { 1360de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (BI != BB.begin()) 1361de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar --BI; 13622f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3Chris Lattner MadeChange = true; 1363f41eaacee4a4a2d4339dd553626d98c73650c8c7Chris Lattner } 1364a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 1365a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson } 1366a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 136761c6ba85715fdcb66f746678879984151f1e5485Chris Lattner return MadeChange; 1368a723d1e48f4a261512c28845c53eda569fa5218cOwen Anderson} 136961c6ba85715fdcb66f746678879984151f1e5485Chris Lattner 1370de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarPreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { 137136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1372de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto &MD = AM.getResult<MemoryDependenceAnalysis>(F); 1373de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 1374de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1375de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto LookupAliasAnalysis = [&]() -> AliasAnalysis & { 1376de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AM.getResult<AAManager>(F); 1377de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar }; 1378de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto LookupAssumptionCache = [&]() -> AssumptionCache & { 1379de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AM.getResult<AssumptionAnalysis>(F); 1380de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar }; 1381de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto LookupDomTree = [&]() -> DominatorTree & { 1382de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AM.getResult<DominatorTreeAnalysis>(F); 1383de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar }; 1384de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1385de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis, 1386de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LookupAssumptionCache, LookupDomTree); 1387de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!MadeChange) 1388de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return PreservedAnalyses::all(); 1389de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PreservedAnalyses PA; 1390de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PA.preserve<GlobalsAA>(); 1391de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PA.preserve<MemoryDependenceAnalysis>(); 1392de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return PA; 1393de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1394de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1395de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptPass::runImpl( 1396de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, 1397de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar std::function<AliasAnalysis &()> LookupAliasAnalysis_, 1398de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar std::function<AssumptionCache &()> LookupAssumptionCache_, 1399de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar std::function<DominatorTree &()> LookupDomTree_) { 140061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner bool MadeChange = false; 1401de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MD = MD_; 1402de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TLI = TLI_; 1403de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LookupAliasAnalysis = std::move(LookupAliasAnalysis_); 1404de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LookupAssumptionCache = std::move(LookupAssumptionCache_); 1405de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LookupDomTree = std::move(LookupDomTree_); 1406a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1407149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner // If we don't have at least memset and memcpy, there is little point of doing 1408149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner // anything here. These are required by a freestanding implementation, so if 1409149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner // even they are disabled, there is no point in trying hard. 1410149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) 1411149f5283f93ec85b96888c284f56099a72cc2731Chris Lattner return false; 1412a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 141361c6ba85715fdcb66f746678879984151f1e5485Chris Lattner while (1) { 141461c6ba85715fdcb66f746678879984151f1e5485Chris Lattner if (!iterateOnFunction(F)) 141561c6ba85715fdcb66f746678879984151f1e5485Chris Lattner break; 141661c6ba85715fdcb66f746678879984151f1e5485Chris Lattner MadeChange = true; 141761c6ba85715fdcb66f746678879984151f1e5485Chris Lattner } 1418a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem 1419dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MD = nullptr; 142061c6ba85715fdcb66f746678879984151f1e5485Chris Lattner return MadeChange; 142161c6ba85715fdcb66f746678879984151f1e5485Chris Lattner} 1422de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1423de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// This is the main transformation entry point for a function. 1424de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool MemCpyOptLegacyPass::runOnFunction(Function &F) { 1425de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (skipFunction(F)) 1426de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1427de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1428de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 1429de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 1430de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1431de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto LookupAliasAnalysis = [this]() -> AliasAnalysis & { 1432de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return getAnalysis<AAResultsWrapperPass>().getAAResults(); 1433de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar }; 1434de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto LookupAssumptionCache = [this, &F]() -> AssumptionCache & { 1435de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 1436de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar }; 1437de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto LookupDomTree = [this]() -> DominatorTree & { 1438de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1439de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar }; 1440de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1441de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupAssumptionCache, 1442de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LookupDomTree); 1443de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1444