1// 2// The LLVM Compiler Infrastructure 3// 4// This file is distributed under the University of Illinois Open Source 5// License. See LICENSE.TXT for details. 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the interfaces that NVPTX uses to lower LLVM code into a 10// selection DAG. 11// 12//===----------------------------------------------------------------------===// 13 14 15#include "NVPTX.h" 16#include "NVPTXISelLowering.h" 17#include "NVPTXTargetMachine.h" 18#include "NVPTXTargetObjectFile.h" 19#include "NVPTXUtilities.h" 20#include "llvm/Intrinsics.h" 21#include "llvm/IntrinsicInst.h" 22#include "llvm/Support/CommandLine.h" 23#include "llvm/DerivedTypes.h" 24#include "llvm/GlobalValue.h" 25#include "llvm/Module.h" 26#include "llvm/Function.h" 27#include "llvm/CodeGen/Analysis.h" 28#include "llvm/CodeGen/MachineFrameInfo.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineInstrBuilder.h" 31#include "llvm/CodeGen/MachineRegisterInfo.h" 32#include "llvm/Support/CallSite.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/raw_ostream.h" 36#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 37#include "llvm/MC/MCSectionELF.h" 38#include <sstream> 39 40#undef DEBUG_TYPE 41#define DEBUG_TYPE "nvptx-lower" 42 43using namespace llvm; 44 45static unsigned int uniqueCallSite = 0; 46 47static cl::opt<bool> 48RetainVectorOperands("nvptx-codegen-vectors", 49 cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"), 50 cl::init(true)); 51 52static cl::opt<bool> 53sched4reg("nvptx-sched4reg", 54 cl::desc("NVPTX Specific: schedule for register pressue"), 55 cl::init(false)); 56 57// NVPTXTargetLowering Constructor. 58NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) 59: TargetLowering(TM, new NVPTXTargetObjectFile()), 60 nvTM(&TM), 61 nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { 62 63 // always lower memset, memcpy, and memmove intrinsics to load/store 64 // instructions, rather 65 // then generating calls to memset, mempcy or memmove. 66 maxStoresPerMemset = (unsigned)0xFFFFFFFF; 67 maxStoresPerMemcpy = (unsigned)0xFFFFFFFF; 68 maxStoresPerMemmove = (unsigned)0xFFFFFFFF; 69 70 setBooleanContents(ZeroOrNegativeOneBooleanContent); 71 72 // Jump is Expensive. Don't create extra control flow for 'and', 'or' 73 // condition branches. 74 setJumpIsExpensive(true); 75 76 // By default, use the Source scheduling 77 if (sched4reg) 78 setSchedulingPreference(Sched::RegPressure); 79 else 80 setSchedulingPreference(Sched::Source); 81 82 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 83 addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass); 84 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 85 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 86 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 87 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 88 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 89 90 if (RetainVectorOperands) { 91 addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass); 92 addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass); 93 addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass); 94 addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass); 95 addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass); 96 addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass); 97 addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass); 98 addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass); 99 addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass); 100 addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass); 101 102 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom); 103 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom); 104 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom); 105 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom); 106 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom); 107 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom); 108 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom); 109 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom); 110 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom); 111 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom); 112 113 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom); 114 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom); 115 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom); 116 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom); 117 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom); 118 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom); 119 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom); 120 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom); 121 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom); 122 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom); 123 } 124 125 // Operations not directly supported by NVPTX. 126 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 127 setOperationAction(ISD::BR_CC, MVT::Other, Expand); 128 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); 129 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 130 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 131 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 132 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 133 134 if (nvptxSubtarget.hasROT64()) { 135 setOperationAction(ISD::ROTL , MVT::i64, Legal); 136 setOperationAction(ISD::ROTR , MVT::i64, Legal); 137 } 138 else { 139 setOperationAction(ISD::ROTL , MVT::i64, Expand); 140 setOperationAction(ISD::ROTR , MVT::i64, Expand); 141 } 142 if (nvptxSubtarget.hasROT32()) { 143 setOperationAction(ISD::ROTL , MVT::i32, Legal); 144 setOperationAction(ISD::ROTR , MVT::i32, Legal); 145 } 146 else { 147 setOperationAction(ISD::ROTL , MVT::i32, Expand); 148 setOperationAction(ISD::ROTR , MVT::i32, Expand); 149 } 150 151 setOperationAction(ISD::ROTL , MVT::i16, Expand); 152 setOperationAction(ISD::ROTR , MVT::i16, Expand); 153 setOperationAction(ISD::ROTL , MVT::i8, Expand); 154 setOperationAction(ISD::ROTR , MVT::i8, Expand); 155 setOperationAction(ISD::BSWAP , MVT::i16, Expand); 156 setOperationAction(ISD::BSWAP , MVT::i32, Expand); 157 setOperationAction(ISD::BSWAP , MVT::i64, Expand); 158 159 // Indirect branch is not supported. 160 // This also disables Jump Table creation. 161 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 162 setOperationAction(ISD::BRIND, MVT::Other, Expand); 163 164 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 165 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 166 167 // We want to legalize constant related memmove and memcopy 168 // intrinsics. 169 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 170 171 // Turn FP extload into load/fextend 172 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 173 // Turn FP truncstore into trunc + store. 174 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 175 176 // PTX does not support load / store predicate registers 177 setOperationAction(ISD::LOAD, MVT::i1, Expand); 178 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 179 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 180 setOperationAction(ISD::STORE, MVT::i1, Expand); 181 setTruncStoreAction(MVT::i64, MVT::i1, Expand); 182 setTruncStoreAction(MVT::i32, MVT::i1, Expand); 183 setTruncStoreAction(MVT::i16, MVT::i1, Expand); 184 setTruncStoreAction(MVT::i8, MVT::i1, Expand); 185 186 // This is legal in NVPTX 187 setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 188 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 189 190 // TRAP can be lowered to PTX trap 191 setOperationAction(ISD::TRAP, MVT::Other, Legal); 192 193 // By default, CONCAT_VECTORS is implemented via store/load 194 // through stack. It is slow and uses local memory. We need 195 // to custom-lowering them. 196 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom); 197 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom); 198 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom); 199 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom); 200 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom); 201 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom); 202 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom); 203 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom); 204 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom); 205 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom); 206 207 // Expand vector int to float and float to int conversions 208 // - For SINT_TO_FP and UINT_TO_FP, the src type 209 // (Node->getOperand(0).getValueType()) 210 // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT, 211 // the dest type (Node->getValueType(0)) is used. 212 // 213 // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector 214 // case, and 215 // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case. 216 // 217 // That is why v4i32 or v2i32 are used here. 218 // 219 // The expansion for vectors happens in VectorLegalizer::LegalizeOp() 220 // (LegalizeVectorOps.cpp). 221 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); 222 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand); 223 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); 224 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand); 225 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand); 226 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); 227 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand); 228 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); 229 230 // Now deduce the information based on the above mentioned 231 // actions 232 computeRegisterProperties(); 233} 234 235 236const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 237 switch (Opcode) { 238 default: return 0; 239 case NVPTXISD::CALL: return "NVPTXISD::CALL"; 240 case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG"; 241 case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; 242 case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin"; 243 case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam"; 244 case NVPTXISD::DeclareScalarParam: 245 return "NVPTXISD::DeclareScalarParam"; 246 case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet"; 247 case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam"; 248 case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall"; 249 case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; 250 case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam"; 251 case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32"; 252 case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; 253 case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam"; 254 case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; 255 case NVPTXISD::CallArg: return "NVPTXISD::CallArg"; 256 case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg"; 257 case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd"; 258 case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid"; 259 case NVPTXISD::CallVal: return "NVPTXISD::CallVal"; 260 case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol"; 261 case NVPTXISD::Prototype: return "NVPTXISD::Prototype"; 262 case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; 263 case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval"; 264 case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval"; 265 case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; 266 case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam"; 267 case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; 268 case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; 269 case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; 270 } 271} 272 273 274SDValue 275NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 276 DebugLoc dl = Op.getDebugLoc(); 277 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 278 Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 279 return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); 280} 281 282std::string NVPTXTargetLowering::getPrototype(Type *retTy, 283 const ArgListTy &Args, 284 const SmallVectorImpl<ISD::OutputArg> &Outs, 285 unsigned retAlignment) const { 286 287 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 288 289 std::stringstream O; 290 O << "prototype_" << uniqueCallSite << " : .callprototype "; 291 292 if (retTy->getTypeID() == Type::VoidTyID) 293 O << "()"; 294 else { 295 O << "("; 296 if (isABI) { 297 if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { 298 unsigned size = 0; 299 if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { 300 size = ITy->getBitWidth(); 301 if (size < 32) size = 32; 302 } 303 else { 304 assert(retTy->isFloatingPointTy() && 305 "Floating point type expected here"); 306 size = retTy->getPrimitiveSizeInBits(); 307 } 308 309 O << ".param .b" << size << " _"; 310 } 311 else if (isa<PointerType>(retTy)) 312 O << ".param .b" << getPointerTy().getSizeInBits() 313 << " _"; 314 else { 315 if ((retTy->getTypeID() == Type::StructTyID) || 316 isa<VectorType>(retTy)) { 317 SmallVector<EVT, 16> vtparts; 318 ComputeValueVTs(*this, retTy, vtparts); 319 unsigned totalsz = 0; 320 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 321 unsigned elems = 1; 322 EVT elemtype = vtparts[i]; 323 if (vtparts[i].isVector()) { 324 elems = vtparts[i].getVectorNumElements(); 325 elemtype = vtparts[i].getVectorElementType(); 326 } 327 for (unsigned j=0, je=elems; j!=je; ++j) { 328 unsigned sz = elemtype.getSizeInBits(); 329 if (elemtype.isInteger() && (sz < 8)) sz = 8; 330 totalsz += sz/8; 331 } 332 } 333 O << ".param .align " 334 << retAlignment 335 << " .b8 _[" 336 << totalsz << "]"; 337 } 338 else { 339 assert(false && 340 "Unknown return type"); 341 } 342 } 343 } 344 else { 345 SmallVector<EVT, 16> vtparts; 346 ComputeValueVTs(*this, retTy, vtparts); 347 unsigned idx = 0; 348 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 349 unsigned elems = 1; 350 EVT elemtype = vtparts[i]; 351 if (vtparts[i].isVector()) { 352 elems = vtparts[i].getVectorNumElements(); 353 elemtype = vtparts[i].getVectorElementType(); 354 } 355 356 for (unsigned j=0, je=elems; j!=je; ++j) { 357 unsigned sz = elemtype.getSizeInBits(); 358 if (elemtype.isInteger() && (sz < 32)) sz = 32; 359 O << ".reg .b" << sz << " _"; 360 if (j<je-1) O << ", "; 361 ++idx; 362 } 363 if (i < e-1) 364 O << ", "; 365 } 366 } 367 O << ") "; 368 } 369 O << "_ ("; 370 371 bool first = true; 372 MVT thePointerTy = getPointerTy(); 373 374 for (unsigned i=0,e=Args.size(); i!=e; ++i) { 375 const Type *Ty = Args[i].Ty; 376 if (!first) { 377 O << ", "; 378 } 379 first = false; 380 381 if (Outs[i].Flags.isByVal() == false) { 382 unsigned sz = 0; 383 if (isa<IntegerType>(Ty)) { 384 sz = cast<IntegerType>(Ty)->getBitWidth(); 385 if (sz < 32) sz = 32; 386 } 387 else if (isa<PointerType>(Ty)) 388 sz = thePointerTy.getSizeInBits(); 389 else 390 sz = Ty->getPrimitiveSizeInBits(); 391 if (isABI) 392 O << ".param .b" << sz << " "; 393 else 394 O << ".reg .b" << sz << " "; 395 O << "_"; 396 continue; 397 } 398 const PointerType *PTy = dyn_cast<PointerType>(Ty); 399 assert(PTy && 400 "Param with byval attribute should be a pointer type"); 401 Type *ETy = PTy->getElementType(); 402 403 if (isABI) { 404 unsigned align = Outs[i].Flags.getByValAlign(); 405 unsigned sz = getTargetData()->getTypeAllocSize(ETy); 406 O << ".param .align " << align 407 << " .b8 "; 408 O << "_"; 409 O << "[" << sz << "]"; 410 continue; 411 } 412 else { 413 SmallVector<EVT, 16> vtparts; 414 ComputeValueVTs(*this, ETy, vtparts); 415 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 416 unsigned elems = 1; 417 EVT elemtype = vtparts[i]; 418 if (vtparts[i].isVector()) { 419 elems = vtparts[i].getVectorNumElements(); 420 elemtype = vtparts[i].getVectorElementType(); 421 } 422 423 for (unsigned j=0,je=elems; j!=je; ++j) { 424 unsigned sz = elemtype.getSizeInBits(); 425 if (elemtype.isInteger() && (sz < 32)) sz = 32; 426 O << ".reg .b" << sz << " "; 427 O << "_"; 428 if (j<je-1) O << ", "; 429 } 430 if (i<e-1) 431 O << ", "; 432 } 433 continue; 434 } 435 } 436 O << ");"; 437 return O.str(); 438} 439 440 441SDValue 442NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 443 SmallVectorImpl<SDValue> &InVals) const { 444 SelectionDAG &DAG = CLI.DAG; 445 DebugLoc &dl = CLI.DL; 446 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 447 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 448 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 449 SDValue Chain = CLI.Chain; 450 SDValue Callee = CLI.Callee; 451 bool &isTailCall = CLI.IsTailCall; 452 ArgListTy &Args = CLI.Args; 453 Type *retTy = CLI.RetTy; 454 ImmutableCallSite *CS = CLI.CS; 455 456 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 457 458 SDValue tempChain = Chain; 459 Chain = DAG.getCALLSEQ_START(Chain, 460 DAG.getIntPtrConstant(uniqueCallSite, true)); 461 SDValue InFlag = Chain.getValue(1); 462 463 assert((Outs.size() == Args.size()) && 464 "Unexpected number of arguments to function call"); 465 unsigned paramCount = 0; 466 // Declare the .params or .reg need to pass values 467 // to the function 468 for (unsigned i=0, e=Outs.size(); i!=e; ++i) { 469 EVT VT = Outs[i].VT; 470 471 if (Outs[i].Flags.isByVal() == false) { 472 // Plain scalar 473 // for ABI, declare .param .b<size> .param<n>; 474 // for nonABI, declare .reg .b<size> .param<n>; 475 unsigned isReg = 1; 476 if (isABI) 477 isReg = 0; 478 unsigned sz = VT.getSizeInBits(); 479 if (VT.isInteger() && (sz < 32)) sz = 32; 480 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 481 SDValue DeclareParamOps[] = { Chain, 482 DAG.getConstant(paramCount, MVT::i32), 483 DAG.getConstant(sz, MVT::i32), 484 DAG.getConstant(isReg, MVT::i32), 485 InFlag }; 486 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 487 DeclareParamOps, 5); 488 InFlag = Chain.getValue(1); 489 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 490 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 491 DAG.getConstant(0, MVT::i32), OutVals[i], InFlag }; 492 493 unsigned opcode = NVPTXISD::StoreParam; 494 if (isReg) 495 opcode = NVPTXISD::MoveToParam; 496 else { 497 if (Outs[i].Flags.isZExt()) 498 opcode = NVPTXISD::StoreParamU32; 499 else if (Outs[i].Flags.isSExt()) 500 opcode = NVPTXISD::StoreParamS32; 501 } 502 Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5); 503 504 InFlag = Chain.getValue(1); 505 ++paramCount; 506 continue; 507 } 508 // struct or vector 509 SmallVector<EVT, 16> vtparts; 510 const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); 511 assert(PTy && 512 "Type of a byval parameter should be pointer"); 513 ComputeValueVTs(*this, PTy->getElementType(), vtparts); 514 515 if (isABI) { 516 // declare .param .align 16 .b8 .param<n>[<size>]; 517 unsigned sz = Outs[i].Flags.getByValSize(); 518 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 519 // The ByValAlign in the Outs[i].Flags is alway set at this point, so we 520 // don't need to 521 // worry about natural alignment or not. See TargetLowering::LowerCallTo() 522 SDValue DeclareParamOps[] = { Chain, 523 DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), 524 DAG.getConstant(paramCount, MVT::i32), 525 DAG.getConstant(sz, MVT::i32), 526 InFlag }; 527 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 528 DeclareParamOps, 5); 529 InFlag = Chain.getValue(1); 530 unsigned curOffset = 0; 531 for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { 532 unsigned elems = 1; 533 EVT elemtype = vtparts[j]; 534 if (vtparts[j].isVector()) { 535 elems = vtparts[j].getVectorNumElements(); 536 elemtype = vtparts[j].getVectorElementType(); 537 } 538 for (unsigned k=0,ke=elems; k!=ke; ++k) { 539 unsigned sz = elemtype.getSizeInBits(); 540 if (elemtype.isInteger() && (sz < 8)) sz = 8; 541 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), 542 OutVals[i], 543 DAG.getConstant(curOffset, 544 getPointerTy())); 545 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 546 MachinePointerInfo(), false, false, false, 0); 547 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 548 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, 549 MVT::i32), 550 DAG.getConstant(curOffset, MVT::i32), 551 theVal, InFlag }; 552 Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 553 CopyParamOps, 5); 554 InFlag = Chain.getValue(1); 555 curOffset += sz/8; 556 } 557 } 558 ++paramCount; 559 continue; 560 } 561 // Non-abi, struct or vector 562 // Declare a bunch or .reg .b<size> .param<n> 563 unsigned curOffset = 0; 564 for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { 565 unsigned elems = 1; 566 EVT elemtype = vtparts[j]; 567 if (vtparts[j].isVector()) { 568 elems = vtparts[j].getVectorNumElements(); 569 elemtype = vtparts[j].getVectorElementType(); 570 } 571 for (unsigned k=0,ke=elems; k!=ke; ++k) { 572 unsigned sz = elemtype.getSizeInBits(); 573 if (elemtype.isInteger() && (sz < 32)) sz = 32; 574 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 575 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, 576 MVT::i32), 577 DAG.getConstant(sz, MVT::i32), 578 DAG.getConstant(1, MVT::i32), 579 InFlag }; 580 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 581 DeclareParamOps, 5); 582 InFlag = Chain.getValue(1); 583 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], 584 DAG.getConstant(curOffset, 585 getPointerTy())); 586 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 587 MachinePointerInfo(), false, false, false, 0); 588 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 589 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 590 DAG.getConstant(0, MVT::i32), theVal, 591 InFlag }; 592 Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs, 593 CopyParamOps, 5); 594 InFlag = Chain.getValue(1); 595 ++paramCount; 596 } 597 } 598 } 599 600 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 601 unsigned retAlignment = 0; 602 603 // Handle Result 604 unsigned retCount = 0; 605 if (Ins.size() > 0) { 606 SmallVector<EVT, 16> resvtparts; 607 ComputeValueVTs(*this, retTy, resvtparts); 608 609 // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or 610 // individual .reg .b<size> func_retval<0..> for non ABI 611 unsigned resultsz = 0; 612 for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) { 613 unsigned elems = 1; 614 EVT elemtype = resvtparts[i]; 615 if (resvtparts[i].isVector()) { 616 elems = resvtparts[i].getVectorNumElements(); 617 elemtype = resvtparts[i].getVectorElementType(); 618 } 619 for (unsigned j=0,je=elems; j!=je; ++j) { 620 unsigned sz = elemtype.getSizeInBits(); 621 if (isABI == false) { 622 if (elemtype.isInteger() && (sz < 32)) sz = 32; 623 } 624 else { 625 if (elemtype.isInteger() && (sz < 8)) sz = 8; 626 } 627 if (isABI == false) { 628 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 629 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32), 630 DAG.getConstant(sz, MVT::i32), 631 DAG.getConstant(retCount, MVT::i32), 632 InFlag }; 633 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 634 DeclareRetOps, 5); 635 InFlag = Chain.getValue(1); 636 ++retCount; 637 } 638 resultsz += sz; 639 } 640 } 641 if (isABI) { 642 if (retTy->isPrimitiveType() || retTy->isIntegerTy() || 643 retTy->isPointerTy() ) { 644 // Scalar needs to be at least 32bit wide 645 if (resultsz < 32) 646 resultsz = 32; 647 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 648 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), 649 DAG.getConstant(resultsz, MVT::i32), 650 DAG.getConstant(0, MVT::i32), InFlag }; 651 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 652 DeclareRetOps, 5); 653 InFlag = Chain.getValue(1); 654 } 655 else { 656 if (Func) { // direct call 657 if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) 658 retAlignment = getTargetData()->getABITypeAlignment(retTy); 659 } else { // indirect call 660 const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction()); 661 if (!llvm::getAlign(*CallI, 0, retAlignment)) 662 retAlignment = getTargetData()->getABITypeAlignment(retTy); 663 } 664 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 665 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, 666 MVT::i32), 667 DAG.getConstant(resultsz/8, MVT::i32), 668 DAG.getConstant(0, MVT::i32), InFlag }; 669 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 670 DeclareRetOps, 5); 671 InFlag = Chain.getValue(1); 672 } 673 } 674 } 675 676 if (!Func) { 677 // This is indirect function call case : PTX requires a prototype of the 678 // form 679 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 680 // to be emitted, and the label has to used as the last arg of call 681 // instruction. 682 // The prototype is embedded in a string and put as the operand for an 683 // INLINEASM SDNode. 684 SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); 685 std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); 686 const char *asmstr = nvTM->getManagedStrPool()-> 687 getManagedString(proto_string.c_str())->c_str(); 688 SDValue InlineAsmOps[] = { Chain, 689 DAG.getTargetExternalSymbol(asmstr, 690 getPointerTy()), 691 DAG.getMDNode(0), 692 DAG.getTargetConstant(0, MVT::i32), InFlag }; 693 Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5); 694 InFlag = Chain.getValue(1); 695 } 696 // Op to just print "call" 697 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 698 SDValue PrintCallOps[] = { Chain, 699 DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1) 700 : retCount, MVT::i32), 701 InFlag }; 702 Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl, 703 PrintCallVTs, PrintCallOps, 3); 704 InFlag = Chain.getValue(1); 705 706 // Ops to print out the function name 707 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 708 SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 709 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3); 710 InFlag = Chain.getValue(1); 711 712 // Ops to print out the param list 713 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 714 SDValue CallArgBeginOps[] = { Chain, InFlag }; 715 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 716 CallArgBeginOps, 2); 717 InFlag = Chain.getValue(1); 718 719 for (unsigned i=0, e=paramCount; i!=e; ++i) { 720 unsigned opcode; 721 if (i==(e-1)) 722 opcode = NVPTXISD::LastCallArg; 723 else 724 opcode = NVPTXISD::CallArg; 725 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 726 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), 727 DAG.getConstant(i, MVT::i32), 728 InFlag }; 729 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); 730 InFlag = Chain.getValue(1); 731 } 732 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 733 SDValue CallArgEndOps[] = { Chain, 734 DAG.getConstant(Func ? 1 : 0, MVT::i32), 735 InFlag }; 736 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 737 3); 738 InFlag = Chain.getValue(1); 739 740 if (!Func) { 741 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 742 SDValue PrototypeOps[] = { Chain, 743 DAG.getConstant(uniqueCallSite, MVT::i32), 744 InFlag }; 745 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); 746 InFlag = Chain.getValue(1); 747 } 748 749 // Generate loads from param memory/moves from registers for result 750 if (Ins.size() > 0) { 751 if (isABI) { 752 unsigned resoffset = 0; 753 for (unsigned i=0,e=Ins.size(); i!=e; ++i) { 754 unsigned sz = Ins[i].VT.getSizeInBits(); 755 if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; 756 std::vector<EVT> LoadRetVTs; 757 LoadRetVTs.push_back(Ins[i].VT); 758 LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); 759 std::vector<SDValue> LoadRetOps; 760 LoadRetOps.push_back(Chain); 761 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 762 LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); 763 LoadRetOps.push_back(InFlag); 764 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, 765 &LoadRetOps[0], LoadRetOps.size()); 766 Chain = retval.getValue(1); 767 InFlag = retval.getValue(2); 768 InVals.push_back(retval); 769 resoffset += sz/8; 770 } 771 } 772 else { 773 SmallVector<EVT, 16> resvtparts; 774 ComputeValueVTs(*this, retTy, resvtparts); 775 776 assert(Ins.size() == resvtparts.size() && 777 "Unexpected number of return values in non-ABI case"); 778 unsigned paramNum = 0; 779 for (unsigned i=0,e=Ins.size(); i!=e; ++i) { 780 assert(EVT(Ins[i].VT) == resvtparts[i] && 781 "Unexpected EVT type in non-ABI case"); 782 unsigned numelems = 1; 783 EVT elemtype = Ins[i].VT; 784 if (Ins[i].VT.isVector()) { 785 numelems = Ins[i].VT.getVectorNumElements(); 786 elemtype = Ins[i].VT.getVectorElementType(); 787 } 788 std::vector<SDValue> tempRetVals; 789 for (unsigned j=0; j<numelems; ++j) { 790 std::vector<EVT> MoveRetVTs; 791 MoveRetVTs.push_back(elemtype); 792 MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue); 793 std::vector<SDValue> MoveRetOps; 794 MoveRetOps.push_back(Chain); 795 MoveRetOps.push_back(DAG.getConstant(0, MVT::i32)); 796 MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32)); 797 MoveRetOps.push_back(InFlag); 798 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, 799 &MoveRetOps[0], MoveRetOps.size()); 800 Chain = retval.getValue(1); 801 InFlag = retval.getValue(2); 802 tempRetVals.push_back(retval); 803 ++paramNum; 804 } 805 if (Ins[i].VT.isVector()) 806 InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT, 807 &tempRetVals[0], tempRetVals.size())); 808 else 809 InVals.push_back(tempRetVals[0]); 810 } 811 } 812 } 813 Chain = DAG.getCALLSEQ_END(Chain, 814 DAG.getIntPtrConstant(uniqueCallSite, true), 815 DAG.getIntPtrConstant(uniqueCallSite+1, true), 816 InFlag); 817 uniqueCallSite++; 818 819 // set isTailCall to false for now, until we figure out how to express 820 // tail call optimization in PTX 821 isTailCall = false; 822 return Chain; 823} 824 825// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 826// (see LegalizeDAG.cpp). This is slow and uses local memory. 827// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 828SDValue NVPTXTargetLowering:: 829LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 830 SDNode *Node = Op.getNode(); 831 DebugLoc dl = Node->getDebugLoc(); 832 SmallVector<SDValue, 8> Ops; 833 unsigned NumOperands = Node->getNumOperands(); 834 for (unsigned i=0; i < NumOperands; ++i) { 835 SDValue SubOp = Node->getOperand(i); 836 EVT VVT = SubOp.getNode()->getValueType(0); 837 EVT EltVT = VVT.getVectorElementType(); 838 unsigned NumSubElem = VVT.getVectorNumElements(); 839 for (unsigned j=0; j < NumSubElem; ++j) { 840 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 841 DAG.getIntPtrConstant(j))); 842 } 843 } 844 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), 845 &Ops[0], Ops.size()); 846} 847 848SDValue NVPTXTargetLowering:: 849LowerOperation(SDValue Op, SelectionDAG &DAG) const { 850 switch (Op.getOpcode()) { 851 case ISD::RETURNADDR: return SDValue(); 852 case ISD::FRAMEADDR: return SDValue(); 853 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 854 case ISD::INTRINSIC_W_CHAIN: return Op; 855 case ISD::BUILD_VECTOR: 856 case ISD::EXTRACT_SUBVECTOR: 857 return Op; 858 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 859 default: 860 llvm_unreachable("Custom lowering not defined for operation"); 861 } 862} 863 864SDValue 865NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, 866 EVT v) const { 867 std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); 868 std::stringstream suffix; 869 suffix << idx; 870 *name += suffix.str(); 871 return DAG.getTargetExternalSymbol(name->c_str(), v); 872} 873 874SDValue 875NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 876 return getExtSymb(DAG, ".PARAM", idx, v); 877} 878 879SDValue 880NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { 881 return getExtSymb(DAG, ".HLPPARAM", idx); 882} 883 884// Check to see if the kernel argument is image*_t or sampler_t 885 886bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { 887 static const char *const specialTypes[] = { 888 "struct._image2d_t", 889 "struct._image3d_t", 890 "struct._sampler_t" 891 }; 892 893 const Type *Ty = arg->getType(); 894 const PointerType *PTy = dyn_cast<PointerType>(Ty); 895 896 if (!PTy) 897 return false; 898 899 if (!context) 900 return false; 901 902 const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); 903 const std::string TypeName = STy ? STy->getName() : ""; 904 905 for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) 906 if (TypeName == specialTypes[i]) 907 return true; 908 909 return false; 910} 911 912SDValue 913NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, 914 CallingConv::ID CallConv, bool isVarArg, 915 const SmallVectorImpl<ISD::InputArg> &Ins, 916 DebugLoc dl, SelectionDAG &DAG, 917 SmallVectorImpl<SDValue> &InVals) const { 918 MachineFunction &MF = DAG.getMachineFunction(); 919 const TargetData *TD = getTargetData(); 920 921 const Function *F = MF.getFunction(); 922 const AttrListPtr &PAL = F->getAttributes(); 923 924 SDValue Root = DAG.getRoot(); 925 std::vector<SDValue> OutChains; 926 927 bool isKernel = llvm::isKernelFunction(*F); 928 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 929 930 std::vector<Type *> argTypes; 931 std::vector<const Argument *> theArgs; 932 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 933 I != E; ++I) { 934 theArgs.push_back(I); 935 argTypes.push_back(I->getType()); 936 } 937 assert(argTypes.size() == Ins.size() && 938 "Ins types and function types did not match"); 939 940 int idx = 0; 941 for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { 942 Type *Ty = argTypes[i]; 943 EVT ObjectVT = getValueType(Ty); 944 assert(ObjectVT == Ins[i].VT && 945 "Ins type did not match function type"); 946 947 // If the kernel argument is image*_t or sampler_t, convert it to 948 // a i32 constant holding the parameter position. This can later 949 // matched in the AsmPrinter to output the correct mangled name. 950 if (isImageOrSamplerVal(theArgs[i], 951 (theArgs[i]->getParent() ? 952 theArgs[i]->getParent()->getParent() : 0))) { 953 assert(isKernel && "Only kernels can have image/sampler params"); 954 InVals.push_back(DAG.getConstant(i+1, MVT::i32)); 955 continue; 956 } 957 958 if (theArgs[i]->use_empty()) { 959 // argument is dead 960 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); 961 continue; 962 } 963 964 // In the following cases, assign a node order of "idx+1" 965 // to newly created nodes. The SDNOdes for params have to 966 // appear in the same order as their order of appearance 967 // in the original function. "idx+1" holds that order. 968 if (PAL.paramHasAttr(i+1, Attribute::ByVal) == false) { 969 // A plain scalar. 970 if (isABI || isKernel) { 971 // If ABI, load from the param symbol 972 SDValue Arg = getParamSymbol(DAG, idx); 973 Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( 974 F->getContext()), 975 llvm::ADDRESS_SPACE_PARAM)); 976 SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, 977 MachinePointerInfo(srcValue), false, false, 978 false, 979 TD->getABITypeAlignment(ObjectVT.getTypeForEVT( 980 F->getContext()))); 981 if (p.getNode()) 982 DAG.AssignOrdering(p.getNode(), idx+1); 983 InVals.push_back(p); 984 } 985 else { 986 // If no ABI, just move the param symbol 987 SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); 988 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 989 if (p.getNode()) 990 DAG.AssignOrdering(p.getNode(), idx+1); 991 InVals.push_back(p); 992 } 993 continue; 994 } 995 996 // Param has ByVal attribute 997 if (isABI || isKernel) { 998 // Return MoveParam(param symbol). 999 // Ideally, the param symbol can be returned directly, 1000 // but when SDNode builder decides to use it in a CopyToReg(), 1001 // machine instruction fails because TargetExternalSymbol 1002 // (not lowered) is target dependent, and CopyToReg assumes 1003 // the source is lowered. 1004 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1005 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 1006 if (p.getNode()) 1007 DAG.AssignOrdering(p.getNode(), idx+1); 1008 if (isKernel) 1009 InVals.push_back(p); 1010 else { 1011 SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 1012 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), 1013 p); 1014 InVals.push_back(p2); 1015 } 1016 } else { 1017 // Have to move a set of param symbols to registers and 1018 // store them locally and return the local pointer in InVals 1019 const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); 1020 assert(elemPtrType && 1021 "Byval parameter should be a pointer type"); 1022 Type *elemType = elemPtrType->getElementType(); 1023 // Compute the constituent parts 1024 SmallVector<EVT, 16> vtparts; 1025 SmallVector<uint64_t, 16> offsets; 1026 ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); 1027 unsigned totalsize = 0; 1028 for (unsigned j=0, je=vtparts.size(); j!=je; ++j) 1029 totalsize += vtparts[j].getStoreSizeInBits(); 1030 SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> 1031 CreateStackObject(totalsize/8, 16, false), 1032 getPointerTy()); 1033 unsigned sizesofar = 0; 1034 std::vector<SDValue> theChains; 1035 for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { 1036 unsigned numElems = 1; 1037 if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); 1038 for (unsigned k=0, ke=numElems; k!=ke; ++k) { 1039 EVT tmpvt = vtparts[j]; 1040 if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); 1041 SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, 1042 getParamSymbol(DAG, idx, tmpvt)); 1043 SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, 1044 DAG.getConstant(sizesofar, getPointerTy())); 1045 theChains.push_back(DAG.getStore(Chain, dl, arg, addr, 1046 MachinePointerInfo(), false, false, 0)); 1047 sizesofar += tmpvt.getStoreSizeInBits()/8; 1048 ++idx; 1049 } 1050 } 1051 --idx; 1052 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0], 1053 theChains.size()); 1054 InVals.push_back(localcopy); 1055 } 1056 } 1057 1058 // Clang will check explicit VarArg and issue error if any. However, Clang 1059 // will let code with 1060 // implicit var arg like f() pass. 1061 // We treat this case as if the arg list is empty. 1062 //if (F.isVarArg()) { 1063 // assert(0 && "VarArg not supported yet!"); 1064 //} 1065 1066 if (!OutChains.empty()) 1067 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1068 &OutChains[0], OutChains.size())); 1069 1070 return Chain; 1071} 1072 1073SDValue 1074NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1075 bool isVarArg, 1076 const SmallVectorImpl<ISD::OutputArg> &Outs, 1077 const SmallVectorImpl<SDValue> &OutVals, 1078 DebugLoc dl, SelectionDAG &DAG) const { 1079 1080 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1081 1082 unsigned sizesofar = 0; 1083 unsigned idx = 0; 1084 for (unsigned i=0, e=Outs.size(); i!=e; ++i) { 1085 SDValue theVal = OutVals[i]; 1086 EVT theValType = theVal.getValueType(); 1087 unsigned numElems = 1; 1088 if (theValType.isVector()) numElems = theValType.getVectorNumElements(); 1089 for (unsigned j=0,je=numElems; j!=je; ++j) { 1090 SDValue tmpval = theVal; 1091 if (theValType.isVector()) 1092 tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 1093 theValType.getVectorElementType(), 1094 tmpval, DAG.getIntPtrConstant(j)); 1095 Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, 1096 dl, MVT::Other, 1097 Chain, 1098 DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), 1099 tmpval); 1100 if (theValType.isVector()) 1101 sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; 1102 else 1103 sizesofar += theValType.getStoreSizeInBits()/8; 1104 ++idx; 1105 } 1106 } 1107 1108 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 1109} 1110 1111void 1112NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 1113 std::string &Constraint, 1114 std::vector<SDValue> &Ops, 1115 SelectionDAG &DAG) const 1116{ 1117 if (Constraint.length() > 1) 1118 return; 1119 else 1120 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 1121} 1122 1123// NVPTX suuport vector of legal types of any length in Intrinsics because the 1124// NVPTX specific type legalizer 1125// will legalize them to the PTX supported length. 1126bool 1127NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { 1128 if (isTypeLegal(VT)) 1129 return true; 1130 if (VT.isVector()) { 1131 MVT eVT = VT.getVectorElementType(); 1132 if (isTypeLegal(eVT)) 1133 return true; 1134 } 1135 return false; 1136} 1137 1138 1139// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 1140// TgtMemIntrinsic 1141// because we need the information that is only available in the "Value" type 1142// of destination 1143// pointer. In particular, the address space information. 1144bool 1145NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, 1146 unsigned Intrinsic) const { 1147 switch (Intrinsic) { 1148 default: 1149 return false; 1150 1151 case Intrinsic::nvvm_atomic_load_add_f32: 1152 Info.opc = ISD::INTRINSIC_W_CHAIN; 1153 Info.memVT = MVT::f32; 1154 Info.ptrVal = I.getArgOperand(0); 1155 Info.offset = 0; 1156 Info.vol = 0; 1157 Info.readMem = true; 1158 Info.writeMem = true; 1159 Info.align = 0; 1160 return true; 1161 1162 case Intrinsic::nvvm_atomic_load_inc_32: 1163 case Intrinsic::nvvm_atomic_load_dec_32: 1164 Info.opc = ISD::INTRINSIC_W_CHAIN; 1165 Info.memVT = MVT::i32; 1166 Info.ptrVal = I.getArgOperand(0); 1167 Info.offset = 0; 1168 Info.vol = 0; 1169 Info.readMem = true; 1170 Info.writeMem = true; 1171 Info.align = 0; 1172 return true; 1173 1174 case Intrinsic::nvvm_ldu_global_i: 1175 case Intrinsic::nvvm_ldu_global_f: 1176 case Intrinsic::nvvm_ldu_global_p: 1177 1178 Info.opc = ISD::INTRINSIC_W_CHAIN; 1179 if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 1180 Info.memVT = MVT::i32; 1181 else if (Intrinsic == Intrinsic::nvvm_ldu_global_p) 1182 Info.memVT = getPointerTy(); 1183 else 1184 Info.memVT = MVT::f32; 1185 Info.ptrVal = I.getArgOperand(0); 1186 Info.offset = 0; 1187 Info.vol = 0; 1188 Info.readMem = true; 1189 Info.writeMem = false; 1190 Info.align = 0; 1191 return true; 1192 1193 } 1194 return false; 1195} 1196 1197/// isLegalAddressingMode - Return true if the addressing mode represented 1198/// by AM is legal for this target, for a load/store of the specified type. 1199/// Used to guide target specific optimizations, like loop strength reduction 1200/// (LoopStrengthReduce.cpp) and memory optimization for address mode 1201/// (CodeGenPrepare.cpp) 1202bool 1203NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, 1204 Type *Ty) const { 1205 1206 // AddrMode - This represents an addressing mode of: 1207 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 1208 // 1209 // The legal address modes are 1210 // - [avar] 1211 // - [areg] 1212 // - [areg+immoff] 1213 // - [immAddr] 1214 1215 if (AM.BaseGV) { 1216 if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) 1217 return false; 1218 return true; 1219 } 1220 1221 switch (AM.Scale) { 1222 case 0: // "r", "r+i" or "i" is allowed 1223 break; 1224 case 1: 1225 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 1226 return false; 1227 // Otherwise we have r+i. 1228 break; 1229 default: 1230 // No scale > 1 is allowed 1231 return false; 1232 } 1233 return true; 1234} 1235 1236//===----------------------------------------------------------------------===// 1237// NVPTX Inline Assembly Support 1238//===----------------------------------------------------------------------===// 1239 1240/// getConstraintType - Given a constraint letter, return the type of 1241/// constraint it is for this target. 1242NVPTXTargetLowering::ConstraintType 1243NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { 1244 if (Constraint.size() == 1) { 1245 switch (Constraint[0]) { 1246 default: 1247 break; 1248 case 'r': 1249 case 'h': 1250 case 'c': 1251 case 'l': 1252 case 'f': 1253 case 'd': 1254 case '0': 1255 case 'N': 1256 return C_RegisterClass; 1257 } 1258 } 1259 return TargetLowering::getConstraintType(Constraint); 1260} 1261 1262 1263std::pair<unsigned, const TargetRegisterClass*> 1264NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 1265 EVT VT) const { 1266 if (Constraint.size() == 1) { 1267 switch (Constraint[0]) { 1268 case 'c': 1269 return std::make_pair(0U, &NVPTX::Int8RegsRegClass); 1270 case 'h': 1271 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 1272 case 'r': 1273 return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 1274 case 'l': 1275 case 'N': 1276 return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 1277 case 'f': 1278 return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 1279 case 'd': 1280 return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 1281 } 1282 } 1283 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 1284} 1285 1286 1287 1288/// getFunctionAlignment - Return the Log2 alignment of this function. 1289unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { 1290 return 4; 1291} 1292