16c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
26c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//
36c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//                     The LLVM Compiler Infrastructure
46c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//
56c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// This file is distributed under the University of Illinois Open Source
66c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// License. See LICENSE.TXT for details.
76c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//
86c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//===----------------------------------------------------------------------===//
96c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//
106c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// This provides a class for CUDA code generation targeting the NVIDIA CUDA
116c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// runtime library.
126c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//
136c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//===----------------------------------------------------------------------===//
146c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne
156c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne#include "CGCUDARuntime.h"
16a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne#include "CodeGenFunction.h"
17a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne#include "CodeGenModule.h"
18a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne#include "clang/AST/Decl.h"
193b844ba7d5be205a9b4f5f0b0d1b7978977f4b8cChandler Carruth#include "llvm/IR/BasicBlock.h"
20651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#include "llvm/IR/CallSite.h"
213b844ba7d5be205a9b4f5f0b0d1b7978977f4b8cChandler Carruth#include "llvm/IR/Constants.h"
223b844ba7d5be205a9b4f5f0b0d1b7978977f4b8cChandler Carruth#include "llvm/IR/DerivedTypes.h"
23a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne#include <vector>
246c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne
256c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourneusing namespace clang;
266c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourneusing namespace CodeGen;
276c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne
286c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbournenamespace {
296c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne
306c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourneclass CGNVCUDARuntime : public CGCUDARuntime {
31a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
32a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourneprivate:
33a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  llvm::Type *IntTy, *SizeTy;
34a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  llvm::PointerType *CharPtrTy, *VoidPtrTy;
35a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
36a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  llvm::Constant *getSetupArgumentFn() const;
37a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  llvm::Constant *getLaunchFn() const;
38a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
396c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbournepublic:
406c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne  CGNVCUDARuntime(CodeGenModule &CGM);
41a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
42651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args) override;
436c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne};
446c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne
456c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne}
466c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne
476c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter CollingbourneCGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
48a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  CodeGen::CodeGenTypes &Types = CGM.getTypes();
49a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  ASTContext &Ctx = CGM.getContext();
50a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
51a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  IntTy = Types.ConvertType(Ctx.IntTy);
52a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  SizeTy = Types.ConvertType(Ctx.getSizeType());
53a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
54a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
55a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
56a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne}
57a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
58a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbournellvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
59a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  // cudaError_t cudaSetupArgument(void *, size_t, size_t)
60a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  std::vector<llvm::Type*> Params;
61a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  Params.push_back(VoidPtrTy);
62a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  Params.push_back(SizeTy);
63a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  Params.push_back(SizeTy);
64a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
65a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne                                                           Params, false),
66a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne                                   "cudaSetupArgument");
67a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne}
68a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
69a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbournellvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
70a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  // cudaError_t cudaLaunch(char *)
71a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  std::vector<llvm::Type*> Params;
72a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  Params.push_back(CharPtrTy);
73a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
74a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne                                                           Params, false),
75a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne                                   "cudaLaunch");
76a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne}
77a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
78a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbournevoid CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
79a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne                                         FunctionArgList &Args) {
80a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  // Build the argument value list and the argument stack struct type.
81cfa88f893915ceb8ae4ce2f17c46c24a4d67502fDmitri Gribenko  SmallVector<llvm::Value *, 16> ArgValues;
82a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  std::vector<llvm::Type *> ArgTypes;
83a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
84a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne       I != E; ++I) {
85a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
86a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    ArgValues.push_back(V);
87a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
88a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
89a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  }
90a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  llvm::StructType *ArgStackTy = llvm::StructType::get(
91a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne      CGF.getLLVMContext(), ArgTypes);
92a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
93a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
94a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
95a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  // Emit the calls to cudaSetupArgument
96a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
97a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  for (unsigned I = 0, E = Args.size(); I != E; ++I) {
98a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    llvm::Value *Args[3];
99a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
100a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
101a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    Args[1] = CGF.Builder.CreateIntCast(
102a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne        llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
103a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne        SizeTy, false);
104a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    Args[2] = CGF.Builder.CreateIntCast(
105a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne        llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
106a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne        SizeTy, false);
107bd7370a78604e9a20d698bfe328c1e43f12a0613John McCall    llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
108a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
109a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
110a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
111a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne    CGF.EmitBlock(NextBlock);
112a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  }
113a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
114a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  // Emit the call to cudaLaunch
115a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  llvm::Constant *cudaLaunchFn = getLaunchFn();
116a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
117bd7370a78604e9a20d698bfe328c1e43f12a0613John McCall  CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
118a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  CGF.EmitBranch(EndBlock);
119a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne
120a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne  CGF.EmitBlock(EndBlock);
1216c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne}
1226c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne
1236c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter CollingbourneCGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
1246c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne  return new CGNVCUDARuntime(CGM);
1256c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne}
126