NVPTXLowerAggrCopies.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     The LLVM Compiler Infrastructure
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// This file is distributed under the University of Illinois Open Source
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// License. See LICENSE.TXT for details.
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//===----------------------------------------------------------------------===//
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// the size is large or is not a compile-time constant.
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//===----------------------------------------------------------------------===//
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "NVPTXLowerAggrCopies.h"
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/Constants.h"
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/DataLayout.h"
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/Function.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/IRBuilder.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/InstIterator.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/Instructions.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/IntrinsicInst.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/Intrinsics.h"
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/LLVMContext.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "llvm/IR/Module.h"
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgusing namespace llvm;
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgnamespace llvm { FunctionPass *createLowerAggrCopies(); }
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgchar NVPTXLowerAggrCopies::ID = 0;
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Lower MemTransferInst or load-store pair to loop
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void convertTransferToLoop(
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //unsigned numLoads,
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  Type *indType = len->getType();
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  BasicBlock *origBB = splitAt->getParent();
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  origBB->getTerminator()->setSuccessor(0, loopBB);
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  IRBuilder<> builder(origBB, origBB->getTerminator());
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // srcAddr and dstAddr are expected to be pointer types,
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // so no check is made here.
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // Cast pointers to (char *)
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  IRBuilder<> loop(loopBB);
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // The loop index (ind) is a phi node.
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  PHINode *ind = loop.CreatePHI(indType, 0);
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // Incoming value for ind is 0
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  ind->addIncoming(ConstantInt::get(indType, 0), origBB);
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // load from srcAddr+ind
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // store at dstAddr+ind
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // The value for ind coming from backedge is (ind + 1)
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  ind->addIncoming(newind, loopBB);
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Lower MemSetInst to loop
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                Value *len, Value *val, LLVMContext &Context,
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                Function &F) {
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  BasicBlock *origBB = splitAt->getParent();
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  origBB->getTerminator()->setSuccessor(0, loopBB);
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  IRBuilder<> builder(origBB, origBB->getTerminator());
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // Cast pointer to the type of value getting stored
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  dstAddr =
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  IRBuilder<> loop(loopBB);
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  PHINode *ind = loop.CreatePHI(len->getType(), 0);
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  ind->addIncoming(newind, loopBB);
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgbool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  SmallVector<LoadInst *, 4> aggrLoads;
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  SmallVector<MemTransferInst *, 4> aggrMemcpys;
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  SmallVector<MemSetInst *, 4> aggrMemsets;
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  LLVMContext &Context = F.getParent()->getContext();
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  //
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  //
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  //const BasicBlock *firstBB = &F.front();  // first BB in F
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //BasicBlock *bb = BI;
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         ++II) {
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      if (LoadInst *load = dyn_cast<LoadInst>(II)) {
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (load->hasOneUse() == false)
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          continue;
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (DL->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          continue;
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        User *use = load->user_back();
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (StoreInst *store = dyn_cast<StoreInst>(use)) {
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (store->getOperand(0) != load) //getValueOperand
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          aggrLoads.push_back(load);
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        Value *len = intr->getLength();
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // If the number of elements being copied is greater
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // than MaxAggrCopySize, lower it to a loop
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (len_int->getZExtValue() >= MaxAggrCopySize) {
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            aggrMemcpys.push_back(intr);
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          }
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          // turn variable length memcpy/memmov into loop
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          aggrMemcpys.push_back(intr);
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        Value *len = memsetintr->getLength();
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (len_int->getZExtValue() >= MaxAggrCopySize) {
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            aggrMemsets.push_back(memsetintr);
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          }
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          // turn variable length memset into loop
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          aggrMemsets.push_back(memsetintr);
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      }
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      (aggrMemsets.size() == 0))
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return false;
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  //
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // Do the transformation of an aggr load/copy/set to a loop
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  //
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    LoadInst *load = aggrLoads[i];
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Value *srcAddr = load->getOperand(0);
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Value *dstAddr = store->getOperand(1);
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    unsigned numLoads = DL->getTypeStoreSize(load->getType());
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          store->isVolatile(), Context, F);
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    store->eraseFromParent();
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    load->eraseFromParent();
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    MemTransferInst *cpy = aggrMemcpys[i];
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Value *len = cpy->getLength();
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // llvm 2.7 version of memcpy does not have volatile
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // operand yet. So always making it non-volatile
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // optimistically, so that we don't see unnecessary
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // st.volatile in ptx
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          false, Context, F);
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    cpy->eraseFromParent();
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    MemSetInst *memsetinst = aggrMemsets[i];
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Value *len = memsetinst->getLength();
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Value *val = memsetinst->getValue();
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        F);
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    memsetinst->eraseFromParent();
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return true;
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgFunctionPass *llvm::createLowerAggrCopies() {
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return new NVPTXLowerAggrCopies();
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
206