MemCpyOptimizer.cpp revision ac53a0b272452013124bfc70480aea5e41b60f40
121e463b2bf864671a87ebe386cb100ef9349a540Nate Begeman//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
27c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner//
37c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner//                     The LLVM Compiler Infrastructure
47c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner//
54ee451de366474b9c228b4e5fa573795a715216dChris Lattner// This file is distributed under the University of Illinois Open Source
64ee451de366474b9c228b4e5fa573795a715216dChris Lattner// License. See LICENSE.TXT for details.
77c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner//
87c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner//===----------------------------------------------------------------------===//
97c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner//
107c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner// This pass performs various transformations related to eliminating memcpy
117c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner// calls, or transforming sets of stores into memset's.
127c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner//
137c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner//===----------------------------------------------------------------------===//
147c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner
157c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner#define DEBUG_TYPE "memcpyopt"
167c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner#include "llvm/Transforms/Scalar.h"
177c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner#include "llvm/IntrinsicInst.h"
187c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner#include "llvm/Instructions.h"
190bbea954331b8f08afa5b094dfb0841829c70eaaChris Lattner#include "llvm/LLVMContext.h"
202668959b8879097db368aec7d76c455260abc75bChris Lattner#include "llvm/ADT/SmallVector.h"
21331d1bc5dfe1be9090e29f9af9579888a63a9a79Chris Lattner#include "llvm/ADT/Statistic.h"
227c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner#include "llvm/Analysis/Dominators.h"
237c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner#include "llvm/Analysis/AliasAnalysis.h"
240bbea954331b8f08afa5b094dfb0841829c70eaaChris Lattner#include "llvm/Analysis/MemoryDependenceAnalysis.h"
250bbea954331b8f08afa5b094dfb0841829c70eaaChris Lattner#include "llvm/Support/Debug.h"
263c983c3dc19bb83807f978c04737b4572be90a93Nate Begeman#include "llvm/Support/GetElementPtrTypeIterator.h"
270ba2bcfcc3149a25d08aa8aa00fb6c34a4e25bddDan Gohman#include "llvm/Support/raw_ostream.h"
280bbea954331b8f08afa5b094dfb0841829c70eaaChris Lattner#include "llvm/Target/TargetData.h"
290bbea954331b8f08afa5b094dfb0841829c70eaaChris Lattner#include <list>
300bbea954331b8f08afa5b094dfb0841829c70eaaChris Lattnerusing namespace llvm;
310bbea954331b8f08afa5b094dfb0841829c70eaaChris Lattner
32f76053269ecc6c7bd3d0b1e90ebdd0cef1bb2bdcChris LattnerSTATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
33c09eeec0ebc378644bafd04916e5efafa7d98152Nate BegemanSTATISTIC(NumMemSetInfer, "Number of memsets inferred");
34c09eeec0ebc378644bafd04916e5efafa7d98152Nate BegemanSTATISTIC(NumMoveToCpy,   "Number of memmoves converted to memcpy");
35c09eeec0ebc378644bafd04916e5efafa7d98152Nate Begeman
36c09eeec0ebc378644bafd04916e5efafa7d98152Nate Begeman/// isBytewiseValue - If the specified value can be set by repeating the same
37c09eeec0ebc378644bafd04916e5efafa7d98152Nate Begeman/// byte in memory, return the i8 value that it is represented with.  This is
38c09eeec0ebc378644bafd04916e5efafa7d98152Nate Begeman/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
39c09eeec0ebc378644bafd04916e5efafa7d98152Nate Begeman/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
40c09eeec0ebc378644bafd04916e5efafa7d98152Nate Begeman/// byte store (e.g. i16 0x1234), return null.
41c09eeec0ebc378644bafd04916e5efafa7d98152Nate Begemanstatic Value *isBytewiseValue(Value *V) {
42860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner  LLVMContext &Context = V->getContext();
435126984b1da4bda0e93961da07e883699f1f2d57Chris Lattner
445126984b1da4bda0e93961da07e883699f1f2d57Chris Lattner  // All byte-wide stores are splatable, even of arbitrary variables.
455126984b1da4bda0e93961da07e883699f1f2d57Chris Lattner  if (V->getType() == Type::getInt8Ty(Context)) return V;
465126984b1da4bda0e93961da07e883699f1f2d57Chris Lattner
475126984b1da4bda0e93961da07e883699f1f2d57Chris Lattner  // Constant float and double values can be handled as integer values if the
48993aeb2ed93f99faf1438f1b67cd922989306828Nate Begeman  // corresponding integer value is "byteable".  An important case is 0.0.
49993aeb2ed93f99faf1438f1b67cd922989306828Nate Begeman  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
50993aeb2ed93f99faf1438f1b67cd922989306828Nate Begeman    if (CFP->getType()->isFloatTy())
51993aeb2ed93f99faf1438f1b67cd922989306828Nate Begeman      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context));
52f1d0b2bedaa065972a5ba17259055c1176cd1497Chris Lattner    if (CFP->getType()->isDoubleTy())
53f1d0b2bedaa065972a5ba17259055c1176cd1497Chris Lattner      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));
54f1d0b2bedaa065972a5ba17259055c1176cd1497Chris Lattner    // Don't handle long double formats, which have strange constraints.
55f1d0b2bedaa065972a5ba17259055c1176cd1497Chris Lattner  }
56860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner
57860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner  // We can handle constant integers that are power of two in size and a
58860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner  // multiple of 8 bits.
59860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
60860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner    unsigned Width = CI->getBitWidth();
61860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner    if (isPowerOf2_32(Width) && Width > 8) {
62860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner      // We can handle this value if the recursive binary decomposition is the
636b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller      // same at all levels.
646b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller      APInt Val = CI->getValue();
652f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey      APInt Val2;
662f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey      while (Val.getBitWidth() != 8) {
672f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey        unsigned NextWidth = Val.getBitWidth()/2;
682f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey        Val2  = Val.lshr(NextWidth);
692f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey        Val2.trunc(Val.getBitWidth()/2);
70860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner        Val.trunc(Val.getBitWidth()/2);
71860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner
72860e8862c1fbd3b261da4a64a8c0096f9f373681Chris Lattner        // If the top/bottom halves aren't the same, reject it.
734172b10ca1adfc1026428e5f522aaab98bd939adChris Lattner        if (Val != Val2)
744172b10ca1adfc1026428e5f522aaab98bd939adChris Lattner          return 0;
754172b10ca1adfc1026428e5f522aaab98bd939adChris Lattner      }
764172b10ca1adfc1026428e5f522aaab98bd939adChris Lattner      return ConstantInt::get(Context, Val);
774172b10ca1adfc1026428e5f522aaab98bd939adChris Lattner    }
78ecfe55e65b6a72fddd543c42f2e2df4c96c647baChris Lattner  }
79ecfe55e65b6a72fddd543c42f2e2df4c96c647baChris Lattner
80ecfe55e65b6a72fddd543c42f2e2df4c96c647baChris Lattner  // Conceptually, we could handle things like:
81ecfe55e65b6a72fddd543c42f2e2df4c96c647baChris Lattner  //   %a = zext i8 %X to i16
829e4dd9dfc97f3930f58ca6e47bebbd8eb5cdd8a1Nate Begeman  //   %b = shl i16 %a, 8
83ecfe55e65b6a72fddd543c42f2e2df4c96c647baChris Lattner  //   %c = or i16 %a, %b
84ecfe55e65b6a72fddd543c42f2e2df4c96c647baChris Lattner  // but until there is an example that actually needs this, it doesn't seem
85ecfe55e65b6a72fddd543c42f2e2df4c96c647baChris Lattner  // worth worrying about.
86c703a8fbf8653ac8302ae368391a4954c307ca2cChris Lattner  return 0;
872a9ddfb903ae3baede7282348afae1f750905248Tilmann Scheller}
88281b55ebeccd3f0d723888c1bb9ec6e476f708f1Chris Lattner
896b16eff207f99bbde3c0f7340452a5287218772cTilmann Schellerstatic int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
906b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller                                  bool &VariableIdxFound, TargetData &TD) {
916b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller  // Skip over the first indices.
92c703a8fbf8653ac8302ae368391a4954c307ca2cChris Lattner  gep_type_iterator GTI = gep_type_begin(GEP);
93c703a8fbf8653ac8302ae368391a4954c307ca2cChris Lattner  for (unsigned i = 1; i != Idx; ++i, ++GTI)
94c703a8fbf8653ac8302ae368391a4954c307ca2cChris Lattner    /*skip along*/;
95c703a8fbf8653ac8302ae368391a4954c307ca2cChris Lattner
96c703a8fbf8653ac8302ae368391a4954c307ca2cChris Lattner  // Compute the offset implied by the rest of the indices.
97c703a8fbf8653ac8302ae368391a4954c307ca2cChris Lattner  int64_t Offset = 0;
982a9ddfb903ae3baede7282348afae1f750905248Tilmann Scheller  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
99c703a8fbf8653ac8302ae368391a4954c307ca2cChris Lattner    ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
1009e4dd9dfc97f3930f58ca6e47bebbd8eb5cdd8a1Nate Begeman    if (OpC == 0)
1019e4dd9dfc97f3930f58ca6e47bebbd8eb5cdd8a1Nate Begeman      return VariableIdxFound = true;
1026d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner    if (OpC->isZero()) continue;  // No offset.
1036d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner
1046d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner    // Handle struct indices, which add their field offset to the pointer.
1056d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
1066d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
107a17b1557ad705c56c41624e6841e19093ed31f21Chris Lattner      continue;
108a17b1557ad705c56c41624e6841e19093ed31f21Chris Lattner    }
109a17b1557ad705c56c41624e6841e19093ed31f21Chris Lattner
110a17b1557ad705c56c41624e6841e19093ed31f21Chris Lattner    // Otherwise, we have a sequential type like an array or vector.  Multiply
111a17b1557ad705c56c41624e6841e19093ed31f21Chris Lattner    // the index by the ElementSize.
112a17b1557ad705c56c41624e6841e19093ed31f21Chris Lattner    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
1136d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner    Offset += Size*OpC->getSExtValue();
1146d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner  }
1156d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner
1166d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner  return Offset;
1176d92caddc4aa5fc946b294259e00cc35536e61e8Chris Lattner}
11890564f26d17701e11effa2f4e0fb9a18d8a91274Chris Lattner
11990564f26d17701e11effa2f4e0fb9a18d8a91274Chris Lattner/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
12090564f26d17701e11effa2f4e0fb9a18d8a91274Chris Lattner/// constant offset, and return that constant offset.  For example, Ptr1 might
12190564f26d17701e11effa2f4e0fb9a18d8a91274Chris Lattner/// be &A[42], and Ptr2 might be &A[40].  In this case offset would be -8.
12290564f26d17701e11effa2f4e0fb9a18d8a91274Chris Lattnerstatic bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
12390564f26d17701e11effa2f4e0fb9a18d8a91274Chris Lattner                            TargetData &TD) {
12490564f26d17701e11effa2f4e0fb9a18d8a91274Chris Lattner  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
125d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  // base.  After that base, they may have some number of common (and
126d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  // potentially variable) indices.  After that they handle some constant
127d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  // offset, which determines their offset from each other.  At this point, we
128d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  // handle no other case.
129d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
130d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
131d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
132d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner    return false;
133d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner
134d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  // Skip any common indices and track the GEP types.
135d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  unsigned Idx = 1;
136d9989384592a3bd9dd374470a723ca8303071a2dChris Lattner  for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
1376eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen    if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
1386eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen      break;
1396eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen
1406eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen  bool VariableIdxFound = false;
1416eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
1426eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
1436eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen  if (VariableIdxFound) return false;
1446eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen
1456eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen  Offset = Offset2-Offset1;
1466eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen  return true;
1476eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen}
1486eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen
1496eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen
1506eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
1516eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen/// This allows us to analyze stores like:
1526eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen///   store 0 -> P+1
1536eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen///   store 0 -> P+0
1546eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen///   store 0 -> P+3
1556eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen///   store 0 -> P+2
1566eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen/// which sometimes happens with stores to arrays of structs etc.  When we see
1576eaeff29b8a6990107735f7e5f5e49da38f56223Dale Johannesen/// the first store, we make a range [1, 2).  The second store extends the range
15854fc97dcdc0ab747f49bd09c5a877bfd2a00e364Evan Cheng/// to [0, 2).  The third makes a new range [2, 3).  The fourth store joins the
15954fc97dcdc0ab747f49bd09c5a877bfd2a00e364Evan Cheng/// two ranges into [0, 3) which is memset'able.
1608608f2eff2dab5345243c40d0bca9138f2dce6f1Evan Chengnamespace {
16154fc97dcdc0ab747f49bd09c5a877bfd2a00e364Evan Chengstruct MemsetRange {
1628608f2eff2dab5345243c40d0bca9138f2dce6f1Evan Cheng  // Start/End - A semi range that describes the span that this range covers.
16354fc97dcdc0ab747f49bd09c5a877bfd2a00e364Evan Cheng  // The range is closed at the start and open at the end: [Start, End).
1648608f2eff2dab5345243c40d0bca9138f2dce6f1Evan Cheng  int64_t Start, End;
1658608f2eff2dab5345243c40d0bca9138f2dce6f1Evan Cheng
1668608f2eff2dab5345243c40d0bca9138f2dce6f1Evan Cheng  /// StartPtr - The getelementptr instruction that points to the start of the
16754fc97dcdc0ab747f49bd09c5a877bfd2a00e364Evan Cheng  /// range.
16830e62c098b5841259f8026df1c5c45c7c1182a38Arnold Schwaighofer  Value *StartPtr;
16930e62c098b5841259f8026df1c5c45c7c1182a38Arnold Schwaighofer
17030e62c098b5841259f8026df1c5c45c7c1182a38Arnold Schwaighofer  /// Alignment - The known alignment of the first store.
17130e62c098b5841259f8026df1c5c45c7c1182a38Arnold Schwaighofer  unsigned Alignment;
17230e62c098b5841259f8026df1c5c45c7c1182a38Arnold Schwaighofer
17330e62c098b5841259f8026df1c5c45c7c1182a38Arnold Schwaighofer  /// TheStores - The actual stores that make up this range.
174281b55ebeccd3f0d723888c1bb9ec6e476f708f1Chris Lattner  SmallVector<StoreInst*, 16> TheStores;
1753c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner
1763c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner  bool isProfitableToUseMemset(const TargetData &TD) const;
1773c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner
1783c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner};
179ddb739e5ea6ccf6fa4f4e2a23e3da550868efaa1Chris Lattner} // end anon namespace
180ddb739e5ea6ccf6fa4f4e2a23e3da550868efaa1Chris Lattner
1819008ca6b6b4f638cfafccb593cbc5b1d3f5ab877Nate Begemanbool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
182ddb739e5ea6ccf6fa4f4e2a23e3da550868efaa1Chris Lattner  // If we found more than 8 stores to merge or 64 bytes, use memset.
183ddb739e5ea6ccf6fa4f4e2a23e3da550868efaa1Chris Lattner  if (TheStores.size() >= 8 || End-Start >= 64) return true;
184ddb739e5ea6ccf6fa4f4e2a23e3da550868efaa1Chris Lattner
1859008ca6b6b4f638cfafccb593cbc5b1d3f5ab877Nate Begeman  // Assume that the code generator is capable of merging pairs of stores
186116cc48e30b9c307bf3eec29c890b4ba25cd18dbChris Lattner  // together if it wants to.
187116cc48e30b9c307bf3eec29c890b4ba25cd18dbChris Lattner  if (TheStores.size() <= 2) return false;
188116cc48e30b9c307bf3eec29c890b4ba25cd18dbChris Lattner
1899008ca6b6b4f638cfafccb593cbc5b1d3f5ab877Nate Begeman  // If we have fewer than 8 stores, it can still be worthwhile to do this.
1909008ca6b6b4f638cfafccb593cbc5b1d3f5ab877Nate Begeman  // For example, merging 4 i8 stores into an i32 store is useful almost always.
191116cc48e30b9c307bf3eec29c890b4ba25cd18dbChris Lattner  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
192116cc48e30b9c307bf3eec29c890b4ba25cd18dbChris Lattner  // memset will be split into 2 32-bit stores anyway) and doing so can
193116cc48e30b9c307bf3eec29c890b4ba25cd18dbChris Lattner  // pessimize the llvm optimizer.
1949008ca6b6b4f638cfafccb593cbc5b1d3f5ab877Nate Begeman  //
1959008ca6b6b4f638cfafccb593cbc5b1d3f5ab877Nate Begeman  // Since we don't have perfect knowledge here, make some assumptions: assume
196ddb739e5ea6ccf6fa4f4e2a23e3da550868efaa1Chris Lattner  // the maximum GPR width is the same size as the pointer size and assume that
197d0608e191ff9c00af68985f246410c219d1bec57Chris Lattner  // this width can be stored.  If so, check to see whether we will end up
198d0608e191ff9c00af68985f246410c219d1bec57Chris Lattner  // actually reducing the number of stores used.
199f24380e78ecc8a2db1b2116867d878b1e7c6f6edChris Lattner  unsigned Bytes = unsigned(End-Start);
200d0608e191ff9c00af68985f246410c219d1bec57Chris Lattner  unsigned NumPointerStores = Bytes/TD.getPointerSize();
2013c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner
2023c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner  // Assume the remaining bytes if any are done a byte at a time.
2033c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
2049008ca6b6b4f638cfafccb593cbc5b1d3f5ab877Nate Begeman
2053c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner  // If we will reduce the # stores (according to this heuristic), do the
20666ffe6be0c7b50100a00cb0cc87a5d4983818572Evan Cheng  // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
20766ffe6be0c7b50100a00cb0cc87a5d4983818572Evan Cheng  // etc.
20866ffe6be0c7b50100a00cb0cc87a5d4983818572Evan Cheng  return TheStores.size() > NumPointerStores+NumByteStores;
20966ffe6be0c7b50100a00cb0cc87a5d4983818572Evan Cheng}
2103c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner
2113c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattner
2127ff7e674580adad7a5bccdbd74cf9c9f05e46d0fChris Lattnernamespace {
21364b3a08bc696b2ef8733d72ce81e49be175cbbffChris Lattnerclass MemsetRanges {
214e87192a854ff0f2f1904dd9ea282eb36059bb5afChris Lattner  /// Ranges - A sorted list of the memset ranges.  We use std::list here
215140a58f9dfda30dbb80edd3da1b5632c178f7efcChris Lattner  /// because each element is relatively large and expensive to copy.
216140a58f9dfda30dbb80edd3da1b5632c178f7efcChris Lattner  std::list<MemsetRange> Ranges;
217140a58f9dfda30dbb80edd3da1b5632c178f7efcChris Lattner  typedef std::list<MemsetRange>::iterator range_iterator;
218475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  TargetData &TD;
2193c0f9cc90cdcb70caf0dc517b9f9206d731aeb70Chris Lattnerpublic:
2200bbea954331b8f08afa5b094dfb0841829c70eaaChris Lattner  MemsetRanges(TargetData &td) : TD(td) {}
22121e463b2bf864671a87ebe386cb100ef9349a540Nate Begeman
2227c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner  typedef std::list<MemsetRange>::const_iterator const_iterator;
2230111999a88077f237c49d03c5e7891ec874b33a9Nicolas Geoffray  const_iterator begin() const { return Ranges.begin(); }
2240111999a88077f237c49d03c5e7891ec874b33a9Nicolas Geoffray  const_iterator end() const { return Ranges.end(); }
2250111999a88077f237c49d03c5e7891ec874b33a9Nicolas Geoffray  bool empty() const { return Ranges.empty(); }
2260111999a88077f237c49d03c5e7891ec874b33a9Nicolas Geoffray
2270111999a88077f237c49d03c5e7891ec874b33a9Nicolas Geoffray  void addStore(int64_t OffsetFromFirst, StoreInst *SI);
2280111999a88077f237c49d03c5e7891ec874b33a9Nicolas Geoffray};
229331d1bc5dfe1be9090e29f9af9579888a63a9a79Chris Lattner
2307c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner} // end anon namespace
23161e729e2e9517ab2d8887bab86fb377900fa1081Dan Gohman
2327c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner
233da6d20f0c15205923cb2c3ef4bf9b5d77de88881Chris Lattner/// addStore - Add a new store to the MemsetRanges data structure.  This adds a
234da6d20f0c15205923cb2c3ef4bf9b5d77de88881Chris Lattner/// new range for the specified store at the specified offset, merging into
235da6d20f0c15205923cb2c3ef4bf9b5d77de88881Chris Lattner/// existing ranges as appropriate.
236fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattnervoid MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
2375b8f82e35b51bf007de07a7ca9347d804084ddf8Scott Michel  int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
238825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson
2395b8f82e35b51bf007de07a7ca9347d804084ddf8Scott Michel  // Do a linear search of the ranges to see if this can be joined and/or to
240fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // find the insertion point in the list.  We keep the ranges sorted for
241fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // simplicity here.  This is a linear search of a linked list, which is ugly,
242fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // however the number of ranges is limited, so this won't get crazy slow.
243475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  range_iterator I = Ranges.begin(), E = Ranges.end();
244475871a144eb604ddaf37503397ba0941442e5fbDan Gohman
245144d8f09e139f691cafadbc17873943ba4c465f3Evan Cheng  while (I != E && Start > I->End)
24673e0914848662404cf2aa18eb049ff5aae543388Dan Gohman    ++I;
247fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner
248fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // We now know that I == E, in which case we didn't find anything to merge
249fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // with, or that Start <= I->End.  If End < I->Start or I == E, then we need
250fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // to insert a new range.  Handle this now.
251475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  if (I == E || End < I->Start) {
25273e0914848662404cf2aa18eb049ff5aae543388Dan Gohman    MemsetRange &R = *Ranges.insert(I, MemsetRange());
253fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner    R.Start        = Start;
254fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner    R.End          = End;
255fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner    R.StartPtr     = SI->getPointerOperand();
256fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner    R.Alignment    = SI->getAlignment();
257475871a144eb604ddaf37503397ba0941442e5fbDan Gohman    R.TheStores.push_back(SI);
25873e0914848662404cf2aa18eb049ff5aae543388Dan Gohman    return;
259fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  }
260fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner
261fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // This store overlaps with I, add it.
262475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  I->TheStores.push_back(SI);
26373e0914848662404cf2aa18eb049ff5aae543388Dan Gohman
264fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // At this point, we may have an interval that completely contains our store.
265fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // If so, just add it to the interval and return.
266fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  if (I->Start <= Start && I->End >= End)
267fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner    return;
268475871a144eb604ddaf37503397ba0941442e5fbDan Gohman
26973e0914848662404cf2aa18eb049ff5aae543388Dan Gohman  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
270fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // but is not entirely contained within the range.
271da6d20f0c15205923cb2c3ef4bf9b5d77de88881Chris Lattner
272e4bc9ea0a560d8a0ba42f5a2da617e1f1f834710Chris Lattner  // See if the range extends the start of the range.  In this case, it couldn't
273e4bc9ea0a560d8a0ba42f5a2da617e1f1f834710Chris Lattner  // possibly cause it to join the prior range, because otherwise we would have
274475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  // stopped on *it*.
2751f873003266fbdec7c2c48a965c60f4e2e35a158Chris Lattner  if (Start < I->Start) {
2761607f05cb7d77d01ce521a30232faa389dbed4e2Duncan Sands    I->Start = Start;
2771607f05cb7d77d01ce521a30232faa389dbed4e2Duncan Sands    I->StartPtr = SI->getPointerOperand();
2781607f05cb7d77d01ce521a30232faa389dbed4e2Duncan Sands    I->Alignment = SI->getAlignment();
2791607f05cb7d77d01ce521a30232faa389dbed4e2Duncan Sands  }
2801607f05cb7d77d01ce521a30232faa389dbed4e2Duncan Sands
2811607f05cb7d77d01ce521a30232faa389dbed4e2Duncan Sands  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
282475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  // is in or right at the end of I), and that End >= I->Start.  Extend I out to
283fc5b1ab94959879a91c34aee8859e652a50270d0Chris Lattner  // End.
284475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  if (End > I->End) {
285977a76fbb6ea1b87dfd7fbbe2ae2afb63e982ff3Dan Gohman    I->End = End;
286fd29e0eb060ea8b4d490860329234d2ae5f5952eDan Gohman    range_iterator NextI = I;
287fd29e0eb060ea8b4d490860329234d2ae5f5952eDan Gohman    while (++NextI != E && End >= NextI->Start) {
288ea859be53ca13a1547c4675549946b74dc3c6f41Dan Gohman      // Merge the range in.
289bbe77de450ef36b4f83cc3b57705a9758adbd925Chris Lattner      I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
2904a95945fa5aa431110f50092f4a45d24772a553bNate Begeman      if (NextI->End > I->End)
291ff9b373e8f5006c629af81e2619778b4c4f5249eEvan Cheng        I->End = NextI->End;
2921fdbc1dd4e9cb42c79a30e8dc308c322e923cc52Dan Gohman      Ranges.erase(NextI);
293bdab93a2ef5d9574bb4e322e020849f9bc9c90d7Dale Johannesen      NextI = I;
294bdab93a2ef5d9574bb4e322e020849f9bc9c90d7Dale Johannesen    }
2951fdbc1dd4e9cb42c79a30e8dc308c322e923cc52Dan Gohman  }
29697efa365869d3b7b62836434585360a232836f0eDale Johannesen}
29797efa365869d3b7b62836434585360a232836f0eDale Johannesen
2981fdbc1dd4e9cb42c79a30e8dc308c322e923cc52Dan Gohman//===----------------------------------------------------------------------===//
299ddc787dfdc75fb2d78eb3e5793ca0f417ad74fd3Chris Lattner//                         MemCpyOpt Pass
3004234f57fa02b1f04a9f52a7b3c2aa22d32ac521cChris Lattner//===----------------------------------------------------------------------===//
301331d1bc5dfe1be9090e29f9af9579888a63a9a79Chris Lattner
302331d1bc5dfe1be9090e29f9af9579888a63a9a79Chris Lattnernamespace {
303e50ed30282bb5b4a9ed952580523f2dda16215acOwen Anderson  class MemCpyOpt : public FunctionPass {
304c4c6257c1a154279bf10e9498d46d6c1793dbaa7Evan Cheng    bool runOnFunction(Function &F);
30528d08fdb9f6572cafd5aae95c7caffa3cd136d8eDale Johannesen  public:
30628d08fdb9f6572cafd5aae95c7caffa3cd136d8eDale Johannesen    static char ID; // Pass identification, replacement for typeid
30728d08fdb9f6572cafd5aae95c7caffa3cd136d8eDale Johannesen    MemCpyOpt() : FunctionPass(&ID) {}
30828d08fdb9f6572cafd5aae95c7caffa3cd136d8eDale Johannesen
30928d08fdb9f6572cafd5aae95c7caffa3cd136d8eDale Johannesen  private:
31048884cd80b52be1528618f2e9b3425ac24e7b5caChris Lattner    // This transformation requires dominator postdominator info
311da43bcf624acb56a3d77bb5ae9a02728af032613Evan Cheng    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
312da43bcf624acb56a3d77bb5ae9a02728af032613Evan Cheng      AU.setPreservesCFG();
313da43bcf624acb56a3d77bb5ae9a02728af032613Evan Cheng      AU.addRequired<DominatorTree>();
314475871a144eb604ddaf37503397ba0941442e5fbDan Gohman      AU.addRequired<MemoryDependenceAnalysis>();
31548884cd80b52be1528618f2e9b3425ac24e7b5caChris Lattner      AU.addRequired<AliasAnalysis>();
316da43bcf624acb56a3d77bb5ae9a02728af032613Evan Cheng      AU.addPreserved<AliasAnalysis>();
317475871a144eb604ddaf37503397ba0941442e5fbDan Gohman      AU.addPreserved<MemoryDependenceAnalysis>();
3185e764233f398b6929b67701672a5e78fec20ce2eChris Lattner    }
31948884cd80b52be1528618f2e9b3425ac24e7b5caChris Lattner
320c9addb74883fef318140272768422656a694341fChris Lattner    // Helper fuctions
321c9addb74883fef318140272768422656a694341fChris Lattner    bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
322c9addb74883fef318140272768422656a694341fChris Lattner    bool processMemCpy(MemCpyInst *M);
323c9addb74883fef318140272768422656a694341fChris Lattner    bool processMemMove(MemMoveInst *M);
324c4c6257c1a154279bf10e9498d46d6c1793dbaa7Evan Cheng    bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
325861939152debbaa15a55a196a4321837c7bc379dEvan Cheng    bool iterateOnFunction(Function &F);
326861939152debbaa15a55a196a4321837c7bc379dEvan Cheng  };
327861939152debbaa15a55a196a4321837c7bc379dEvan Cheng
328861939152debbaa15a55a196a4321837c7bc379dEvan Cheng  char MemCpyOpt::ID = 0;
329861939152debbaa15a55a196a4321837c7bc379dEvan Cheng}
330861939152debbaa15a55a196a4321837c7bc379dEvan Cheng
331861939152debbaa15a55a196a4321837c7bc379dEvan Cheng// createMemCpyOptPass - The public interface to this file...
33243c6e7cd9b0d9a3b0006650ddfac256848f10d51Nicolas GeoffrayFunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
33398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
33498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohmanstatic RegisterPass<MemCpyOpt> X("memcpyopt",
33565c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel                                 "MemCpy Optimization");
33698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
33798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
33898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
33930e62c098b5841259f8026df1c5c45c7c1182a38Arnold Schwaighofer/// processStore - When GVN is scanning forward over instructions, we look for
34054aeea39a743effe88eedb43d2f7f4805e806ab5Dan Gohman/// some other patterns to fold away.  In particular, this looks for stores to
341ffd0200abfd63177257f949a3674b91dcf87bf23Tilmann Scheller/// neighboring locations of memory.  If it sees enough consequtive ones
342e50ed30282bb5b4a9ed952580523f2dda16215acOwen Anderson/// (currently 4) it attempts to merge them together into a memcpy/memset.
343ffd0200abfd63177257f949a3674b91dcf87bf23Tilmann Schellerbool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
344ffd0200abfd63177257f949a3674b91dcf87bf23Tilmann Scheller  if (SI->isVolatile()) return false;
34554aeea39a743effe88eedb43d2f7f4805e806ab5Dan Gohman
346b4202b84d7e54efe5e144885c7da63e6cc465f80Bill Wendling  LLVMContext &Context = SI->getContext();
34720c568f366be211323eeaf0e45ef053278ec9ddcBill Wendling
34820c568f366be211323eeaf0e45ef053278ec9ddcBill Wendling  // There are two cases that are interesting for this code to handle: memcpy
34954fc97dcdc0ab747f49bd09c5a877bfd2a00e364Evan Cheng  // and memset.  Right now we only handle memset.
350475871a144eb604ddaf37503397ba0941442e5fbDan Gohman
351475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  // Ensure that the value being stored is something that can be memset'able a
35230e62c098b5841259f8026df1c5c45c7c1182a38Arnold Schwaighofer  // byte at a time like "0" or "-1" or any width, as well as things like
353475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  // 0xA0A0A0A0 and 0.0.
35433c960f523f2308482d5b2816af46a7ec90a6d3dDale Johannesen  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
35533c960f523f2308482d5b2816af46a7ec90a6d3dDale Johannesen  if (!ByteVal)
35633c960f523f2308482d5b2816af46a7ec90a6d3dDale Johannesen    return false;
35733c960f523f2308482d5b2816af46a7ec90a6d3dDale Johannesen
3582a9ddfb903ae3baede7282348afae1f750905248Tilmann Scheller  TargetData *TD = getAnalysisIfAvailable<TargetData>();
35933c960f523f2308482d5b2816af46a7ec90a6d3dDale Johannesen  if (!TD) return false;
360475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
361475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  Module *M = SI->getParent()->getParent()->getParent();
362475871a144eb604ddaf37503397ba0941442e5fbDan Gohman
363475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  // Okay, so we now have a single store that can be splatable.  Scan to find
364475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  // all subsequent stores of the same value to offset from the same pointer.
365475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  // Join these together into ranges, so we can decide whether contiguous blocks
366475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  // are stored.
367475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  MemsetRanges Ranges(*TD);
3687795932d41a84c921a5d348b7fa70f5d32e146d0Bill Wendling
369475871a144eb604ddaf37503397ba0941442e5fbDan Gohman  Value *StartPtr = SI->getPointerOperand();
3705b3b695c2f1e11b6f5e0c89e1644211a92edab49Dale Johannesen
3715b3b695c2f1e11b6f5e0c89e1644211a92edab49Dale Johannesen  BasicBlock::iterator BI = SI;
3725b3b695c2f1e11b6f5e0c89e1644211a92edab49Dale Johannesen  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
373475871a144eb604ddaf37503397ba0941442e5fbDan Gohman    if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
3745b3b695c2f1e11b6f5e0c89e1644211a92edab49Dale Johannesen      // If the call is readnone, ignore it, otherwise bail out.  We don't even
3755b3b695c2f1e11b6f5e0c89e1644211a92edab49Dale Johannesen      // allow readonly here because we don't want something like:
376475871a144eb604ddaf37503397ba0941442e5fbDan Gohman      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
3775b3b695c2f1e11b6f5e0c89e1644211a92edab49Dale Johannesen      if (AA.getModRefBehavior(CallSite::get(BI)) ==
378475871a144eb604ddaf37503397ba0941442e5fbDan Gohman            AliasAnalysis::DoesNotAccessMemory)
3795b3b695c2f1e11b6f5e0c89e1644211a92edab49Dale Johannesen        continue;
380475871a144eb604ddaf37503397ba0941442e5fbDan Gohman
3814c9369df57a52cec5e1fc735e61a979766288074Dale Johannesen      // TODO: If this is a memset, try to join it in.
382475871a144eb604ddaf37503397ba0941442e5fbDan Gohman
383475871a144eb604ddaf37503397ba0941442e5fbDan Gohman      break;
384475871a144eb604ddaf37503397ba0941442e5fbDan Gohman    } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
385475871a144eb604ddaf37503397ba0941442e5fbDan Gohman      break;
386475871a144eb604ddaf37503397ba0941442e5fbDan Gohman
387475871a144eb604ddaf37503397ba0941442e5fbDan Gohman    // If this is a non-store instruction it is fine, ignore it.
388475871a144eb604ddaf37503397ba0941442e5fbDan Gohman    StoreInst *NextStore = dyn_cast<StoreInst>(BI);
389475871a144eb604ddaf37503397ba0941442e5fbDan Gohman    if (NextStore == 0) continue;
390475871a144eb604ddaf37503397ba0941442e5fbDan Gohman
391475871a144eb604ddaf37503397ba0941442e5fbDan Gohman    // If this is a store, see if we can merge it in.
39298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    if (NextStore->isVolatile()) break;
39398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
39465c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel    // Check to see if this stored value is of the same byte-splattable value.
39598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
39698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman      break;
39798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
39865c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel    // Check to see if this store is to a constant offset from the start ptr.
39998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    int64_t Offset;
40098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
40198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman      break;
40298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
40398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    Ranges.addStore(Offset, NextStore);
40498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  }
40598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
40698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  // If we have no ranges, then we just had a single store with nothing that
40798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  // could be merged in.  This is a very common case of course.
40898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  if (Ranges.empty())
40998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    return false;
41098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
41165c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel  // If we had at least one store that could be merged in, add the starting
41298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  // store as well.  We try to avoid this unless there is at least something
41398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  // interesting as a small compile-time optimization.
41498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  Ranges.addStore(0, SI);
41598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
41698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  Function *MemSetF = 0;
41798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
41865c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel  // Now that we have full information about ranges, loop over the ranges and
41998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  // emit memset's for anything big enough to be worthwhile.
42098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  bool MadeChange = false;
42198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
42298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman       I != E; ++I) {
42398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    const MemsetRange &Range = *I;
42498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
42598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    if (Range.TheStores.size() == 1) continue;
42665c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel
42798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    // If it is profitable to lower this range to memset, do so now.
42898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    if (!Range.isProfitableToUseMemset(*TD))
42998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman      continue;
43098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
43198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    // Otherwise, we do want to transform this!  Create a new memset.  We put
43265c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel    // the memset right before the first instruction that isn't part of this
43398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    // memset block.  This ensure that the memset is dominated by any addressing
43498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    // instruction needed by the start of the block.
43598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    BasicBlock::iterator InsertPt = BI;
43698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
43798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    if (MemSetF == 0) {
43865c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel      const Type *Ty = Type::getInt64Ty(Context);
43998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman      MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1);
44098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    }
44198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
44298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    // Get the starting pointer of the block.
44398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    StartPtr = Range.StartPtr;
44498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
44565c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel    // Cast the start ptr to be i8* as memset requires.
44698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    const Type *i8Ptr = Type::getInt8PtrTy(Context);
44798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    if (StartPtr->getType() != i8Ptr)
44898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
44998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman                                 InsertPt);
45098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman
45198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman    Value *Ops[] = {
45265c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel      StartPtr, ByteVal,   // Start, value
45398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman      // size
45498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman      ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
45598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman      // align
45698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman      ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment)
4577c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner    };
4587c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner    Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
4597c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner    DEBUG(errs() << "Replace stores:\n";
4607c5a3d390a463fb50a6eee7ae3174817925e6d28Chris Lattner          for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
461            errs() << *Range.TheStores[i];
462          errs() << "With: " << *C); C=C;
463
464    // Don't invalidate the iterator
465    BBI = BI;
466
467    // Zap all the stores.
468    for (SmallVector<StoreInst*, 16>::const_iterator
469         SI = Range.TheStores.begin(),
470         SE = Range.TheStores.end(); SI != SE; ++SI)
471      (*SI)->eraseFromParent();
472    ++NumMemSetInfer;
473    MadeChange = true;
474  }
475
476  return MadeChange;
477}
478
479
480/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
481/// and checks for the possibility of a call slot optimization by having
482/// the call write its result directly into the destination of the memcpy.
483bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
484  // The general transformation to keep in mind is
485  //
486  //   call @func(..., src, ...)
487  //   memcpy(dest, src, ...)
488  //
489  // ->
490  //
491  //   memcpy(dest, src, ...)
492  //   call @func(..., dest, ...)
493  //
494  // Since moving the memcpy is technically awkward, we additionally check that
495  // src only holds uninitialized values at the moment of the call, meaning that
496  // the memcpy can be discarded rather than moved.
497
498  // Deliberately get the source and destination with bitcasts stripped away,
499  // because we'll need to do type comparisons based on the underlying type.
500  Value *cpyDest = cpy->getDest();
501  Value *cpySrc = cpy->getSource();
502  CallSite CS = CallSite::get(C);
503
504  // We need to be able to reason about the size of the memcpy, so we require
505  // that it be a constant.
506  ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
507  if (!cpyLength)
508    return false;
509
510  // Require that src be an alloca.  This simplifies the reasoning considerably.
511  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
512  if (!srcAlloca)
513    return false;
514
515  // Check that all of src is copied to dest.
516  TargetData *TD = getAnalysisIfAvailable<TargetData>();
517  if (!TD) return false;
518
519  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
520  if (!srcArraySize)
521    return false;
522
523  uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
524    srcArraySize->getZExtValue();
525
526  if (cpyLength->getZExtValue() < srcSize)
527    return false;
528
529  // Check that accessing the first srcSize bytes of dest will not cause a
530  // trap.  Otherwise the transform is invalid since it might cause a trap
531  // to occur earlier than it otherwise would.
532  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
533    // The destination is an alloca.  Check it is larger than srcSize.
534    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
535    if (!destArraySize)
536      return false;
537
538    uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
539      destArraySize->getZExtValue();
540
541    if (destSize < srcSize)
542      return false;
543  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
544    // If the destination is an sret parameter then only accesses that are
545    // outside of the returned struct type can trap.
546    if (!A->hasStructRetAttr())
547      return false;
548
549    const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
550    uint64_t destSize = TD->getTypeAllocSize(StructTy);
551
552    if (destSize < srcSize)
553      return false;
554  } else {
555    return false;
556  }
557
558  // Check that src is not accessed except via the call and the memcpy.  This
559  // guarantees that it holds only undefined values when passed in (so the final
560  // memcpy can be dropped), that it is not read or written between the call and
561  // the memcpy, and that writing beyond the end of it is undefined.
562  SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
563                                   srcAlloca->use_end());
564  while (!srcUseList.empty()) {
565    User *UI = srcUseList.back();
566    srcUseList.pop_back();
567
568    if (isa<BitCastInst>(UI)) {
569      for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
570           I != E; ++I)
571        srcUseList.push_back(*I);
572    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
573      if (G->hasAllZeroIndices())
574        for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
575             I != E; ++I)
576          srcUseList.push_back(*I);
577      else
578        return false;
579    } else if (UI != C && UI != cpy) {
580      return false;
581    }
582  }
583
584  // Since we're changing the parameter to the callsite, we need to make sure
585  // that what would be the new parameter dominates the callsite.
586  DominatorTree &DT = getAnalysis<DominatorTree>();
587  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
588    if (!DT.dominates(cpyDestInst, C))
589      return false;
590
591  // In addition to knowing that the call does not access src in some
592  // unexpected manner, for example via a global, which we deduce from
593  // the use analysis, we also need to know that it does not sneakily
594  // access dest.  We rely on AA to figure this out for us.
595  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
596  if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
597      AliasAnalysis::NoModRef)
598    return false;
599
600  // All the checks have passed, so do the transformation.
601  bool changedArgument = false;
602  for (unsigned i = 0; i < CS.arg_size(); ++i)
603    if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
604      if (cpySrc->getType() != cpyDest->getType())
605        cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
606                                              cpyDest->getName(), C);
607      changedArgument = true;
608      if (CS.getArgument(i)->getType() == cpyDest->getType())
609        CS.setArgument(i, cpyDest);
610      else
611        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
612                          CS.getArgument(i)->getType(), cpyDest->getName(), C));
613    }
614
615  if (!changedArgument)
616    return false;
617
618  // Drop any cached information about the call, because we may have changed
619  // its dependence information by changing its parameter.
620  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
621  MD.removeInstruction(C);
622
623  // Remove the memcpy
624  MD.removeInstruction(cpy);
625  cpy->eraseFromParent();
626  NumMemCpyInstr++;
627
628  return true;
629}
630
631/// processMemCpy - perform simplication of memcpy's.  If we have memcpy A which
632/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
633/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
634///  This allows later passes to remove the first memcpy altogether.
635bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
636  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
637
638  // The are two possible optimizations we can do for memcpy:
639  //   a) memcpy-memcpy xform which exposes redundance for DSE.
640  //   b) call-memcpy xform for return slot optimization.
641  MemDepResult dep = MD.getDependency(M);
642  if (!dep.isClobber())
643    return false;
644  if (!isa<MemCpyInst>(dep.getInst())) {
645    if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
646      return performCallSlotOptzn(M, C);
647    return false;
648  }
649
650  MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
651
652  // We can only transforms memcpy's where the dest of one is the source of the
653  // other
654  if (M->getSource() != MDep->getDest())
655    return false;
656
657  // Second, the length of the memcpy's must be the same, or the preceeding one
658  // must be larger than the following one.
659  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
660  ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
661  if (!C1 || !C2)
662    return false;
663
664  uint64_t DepSize = C1->getValue().getZExtValue();
665  uint64_t CpySize = C2->getValue().getZExtValue();
666
667  if (DepSize < CpySize)
668    return false;
669
670  // Finally, we have to make sure that the dest of the second does not
671  // alias the source of the first
672  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
673  if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
674      AliasAnalysis::NoAlias)
675    return false;
676  else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
677           AliasAnalysis::NoAlias)
678    return false;
679  else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
680           != AliasAnalysis::NoAlias)
681    return false;
682
683  // If all checks passed, then we can transform these memcpy's
684  const Type *Ty = M->getLength()->getType();
685  Function *MemCpyFun = Intrinsic::getDeclaration(
686                                 M->getParent()->getParent()->getParent(),
687                                 M->getIntrinsicID(), &Ty, 1);
688
689  Value *Args[4] = {
690    M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
691  };
692
693  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
694
695
696  // If C and M don't interfere, then this is a valid transformation.  If they
697  // did, this would mean that the two sources overlap, which would be bad.
698  if (MD.getDependency(C) == dep) {
699    MD.removeInstruction(M);
700    M->eraseFromParent();
701    NumMemCpyInstr++;
702    return true;
703  }
704
705  // Otherwise, there was no point in doing this, so we remove the call we
706  // inserted and act like nothing happened.
707  MD.removeInstruction(C);
708  C->eraseFromParent();
709  return false;
710}
711
712/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
713/// are guaranteed not to alias.
714bool MemCpyOpt::processMemMove(MemMoveInst *M) {
715  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
716
717  // If the memmove is a constant size, use it for the alias query, this allows
718  // us to optimize things like: memmove(P, P+64, 64);
719  uint64_t MemMoveSize = ~0ULL;
720  if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
721    MemMoveSize = Len->getZExtValue();
722
723  // See if the pointers alias.
724  if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
725      AliasAnalysis::NoAlias)
726    return false;
727
728  DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
729
730  // If not, then we know we can transform this.
731  Module *Mod = M->getParent()->getParent()->getParent();
732  const Type *Ty = M->getLength()->getType();
733  M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1));
734
735  // MemDep may have over conservative information about this instruction, just
736  // conservatively flush it from the cache.
737  getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
738
739  ++NumMoveToCpy;
740  return true;
741}
742
743
744// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
745bool MemCpyOpt::iterateOnFunction(Function &F) {
746  bool MadeChange = false;
747
748  // Walk all instruction in the function.
749  for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
750    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
751         BI != BE;) {
752      // Avoid invalidating the iterator.
753      Instruction *I = BI++;
754
755      if (StoreInst *SI = dyn_cast<StoreInst>(I))
756        MadeChange |= processStore(SI, BI);
757      else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
758        MadeChange |= processMemCpy(M);
759      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
760        if (processMemMove(M)) {
761          --BI;         // Reprocess the new memcpy.
762          MadeChange = true;
763        }
764      }
765    }
766  }
767
768  return MadeChange;
769}
770
771// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
772// function.
773//
774bool MemCpyOpt::runOnFunction(Function &F) {
775  bool MadeChange = false;
776  while (1) {
777    if (!iterateOnFunction(F))
778      break;
779    MadeChange = true;
780  }
781
782  return MadeChange;
783}
784
785
786
787