CGCUDANV.cpp revision bd7370a78604e9a20d698bfe328c1e43f12a0613
16c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===// 26c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// 36c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// The LLVM Compiler Infrastructure 46c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// 56c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// This file is distributed under the University of Illinois Open Source 66c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// License. See LICENSE.TXT for details. 76c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// 86c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//===----------------------------------------------------------------------===// 96c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// 106c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// This provides a class for CUDA code generation targeting the NVIDIA CUDA 116c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// runtime library. 126c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne// 136c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne//===----------------------------------------------------------------------===// 146c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne 156c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne#include "CGCUDARuntime.h" 16a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne#include "CodeGenFunction.h" 17a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne#include "CodeGenModule.h" 18a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne#include "clang/AST/Decl.h" 193b844ba7d5be205a9b4f5f0b0d1b7978977f4b8cChandler Carruth#include "llvm/IR/BasicBlock.h" 203b844ba7d5be205a9b4f5f0b0d1b7978977f4b8cChandler Carruth#include "llvm/IR/Constants.h" 213b844ba7d5be205a9b4f5f0b0d1b7978977f4b8cChandler Carruth#include "llvm/IR/DerivedTypes.h" 22a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne#include "llvm/Support/CallSite.h" 23a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne#include <vector> 246c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne 256c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourneusing namespace clang; 266c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourneusing namespace CodeGen; 276c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne 286c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbournenamespace { 296c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne 306c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourneclass CGNVCUDARuntime : public CGCUDARuntime { 31a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 32a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourneprivate: 33a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Type *IntTy, *SizeTy; 34a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::PointerType *CharPtrTy, *VoidPtrTy; 35a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 36a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Constant *getSetupArgumentFn() const; 37a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Constant *getLaunchFn() const; 38a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 396c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbournepublic: 406c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne CGNVCUDARuntime(CodeGenModule &CGM); 41a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 42a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args); 436c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne}; 446c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne 456c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne} 466c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne 476c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter CollingbourneCGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) { 48a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne CodeGen::CodeGenTypes &Types = CGM.getTypes(); 49a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne ASTContext &Ctx = CGM.getContext(); 50a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 51a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne IntTy = Types.ConvertType(Ctx.IntTy); 52a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne SizeTy = Types.ConvertType(Ctx.getSizeType()); 53a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 54a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); 55a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy)); 56a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne} 57a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 58a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbournellvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { 59a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne // cudaError_t cudaSetupArgument(void *, size_t, size_t) 60a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne std::vector<llvm::Type*> Params; 61a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne Params.push_back(VoidPtrTy); 62a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne Params.push_back(SizeTy); 63a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne Params.push_back(SizeTy); 64a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, 65a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne Params, false), 66a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne "cudaSetupArgument"); 67a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne} 68a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 69a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbournellvm::Constant *CGNVCUDARuntime::getLaunchFn() const { 70a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne // cudaError_t cudaLaunch(char *) 71a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne std::vector<llvm::Type*> Params; 72a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne Params.push_back(CharPtrTy); 73a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, 74a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne Params, false), 75a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne "cudaLaunch"); 76a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne} 77a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 78a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbournevoid CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF, 79a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne FunctionArgList &Args) { 80a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne // Build the argument value list and the argument stack struct type. 81cfa88f893915ceb8ae4ce2f17c46c24a4d67502fDmitri Gribenko SmallVector<llvm::Value *, 16> ArgValues; 82a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne std::vector<llvm::Type *> ArgTypes; 83a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end(); 84a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne I != E; ++I) { 85a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Value *V = CGF.GetAddrOfLocalVar(*I); 86a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne ArgValues.push_back(V); 87a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType"); 88a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType()); 89a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne } 90a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::StructType *ArgStackTy = llvm::StructType::get( 91a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne CGF.getLLVMContext(), ArgTypes); 92a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 93a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); 94a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 95a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne // Emit the calls to cudaSetupArgument 96a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Constant *cudaSetupArgFn = getSetupArgumentFn(); 97a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne for (unsigned I = 0, E = Args.size(); I != E; ++I) { 98a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Value *Args[3]; 99a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); 100a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy); 101a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne Args[1] = CGF.Builder.CreateIntCast( 102a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::ConstantExpr::getSizeOf(ArgTypes[I]), 103a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne SizeTy, false); 104a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne Args[2] = CGF.Builder.CreateIntCast( 105a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::ConstantExpr::getOffsetOf(ArgStackTy, I), 106a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne SizeTy, false); 107bd7370a78604e9a20d698bfe328c1e43f12a0613John McCall llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); 108a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0); 109a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero); 110a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock); 111a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne CGF.EmitBlock(NextBlock); 112a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne } 113a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 114a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne // Emit the call to cudaLaunch 115a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Constant *cudaLaunchFn = getLaunchFn(); 116a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy); 117bd7370a78604e9a20d698bfe328c1e43f12a0613John McCall CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); 118a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne CGF.EmitBranch(EndBlock); 119a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne 120a4ae2294b6ebfb2554aacb6a6a0682fb5ed1f276Peter Collingbourne CGF.EmitBlock(EndBlock); 1216c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne} 1226c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne 1236c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter CollingbourneCGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { 1246c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne return new CGNVCUDARuntime(CGM); 1256c0aa5ff6e6253db0f993053599e2a52b5b93b2dPeter Collingbourne} 126