1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines an instruction selector for the NVPTX target. 11// 12//===----------------------------------------------------------------------===// 13 14#include "NVPTXISelDAGToDAG.h" 15#include "NVPTXUtilities.h" 16#include "llvm/Analysis/ValueTracking.h" 17#include "llvm/IR/GlobalValue.h" 18#include "llvm/IR/Instructions.h" 19#include "llvm/Support/CommandLine.h" 20#include "llvm/Support/Debug.h" 21#include "llvm/Support/ErrorHandling.h" 22#include "llvm/Support/raw_ostream.h" 23#include "llvm/Target/TargetIntrinsicInfo.h" 24 25using namespace llvm; 26 27#define DEBUG_TYPE "nvptx-isel" 28 29static cl::opt<int> UsePrecDivF32( 30 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, 31 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" 32 " IEEE Compliant F32 div.rnd if available."), 33 cl::init(2)); 34 35static cl::opt<bool> 36UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, 37 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), 38 cl::init(true)); 39 40static cl::opt<bool> 41FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, 42 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), 43 cl::init(false)); 44 45 46/// createNVPTXISelDag - This pass converts a legalized DAG into a 47/// NVPTX-specific DAG, ready for instruction scheduling. 48FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, 49 llvm::CodeGenOpt::Level OptLevel) { 50 return new NVPTXDAGToDAGISel(TM, OptLevel); 51} 52 53NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, 54 CodeGenOpt::Level OptLevel) 55 : SelectionDAGISel(tm, OptLevel), TM(tm) { 56 doMulWide = (OptLevel > 0); 57} 58 59bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 60 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget()); 61 return SelectionDAGISel::runOnMachineFunction(MF); 62} 63 64int NVPTXDAGToDAGISel::getDivF32Level() const { 65 if (UsePrecDivF32.getNumOccurrences() > 0) { 66 // If nvptx-prec-div32=N is used on the command-line, always honor it 67 return UsePrecDivF32; 68 } else { 69 // Otherwise, use div.approx if fast math is enabled 70 if (TM.Options.UnsafeFPMath) 71 return 0; 72 else 73 return 2; 74 } 75} 76 77bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { 78 if (UsePrecSqrtF32.getNumOccurrences() > 0) { 79 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it 80 return UsePrecSqrtF32; 81 } else { 82 // Otherwise, use sqrt.approx if fast math is enabled 83 return !TM.Options.UnsafeFPMath; 84 } 85} 86 87bool NVPTXDAGToDAGISel::useF32FTZ() const { 88 if (FtzEnabled.getNumOccurrences() > 0) { 89 // If nvptx-f32ftz is used on the command-line, always honor it 90 return FtzEnabled; 91 } else { 92 const Function *F = MF->getFunction(); 93 // Otherwise, check for an nvptx-f32ftz attribute on the function 94 if (F->hasFnAttribute("nvptx-f32ftz")) 95 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; 96 else 97 return false; 98 } 99} 100 101bool NVPTXDAGToDAGISel::allowFMA() const { 102 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); 103 return TL->allowFMA(*MF, OptLevel); 104} 105 106/// Select - Select instructions not customized! Used for 107/// expanded, promoted and normal instructions. 108SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { 109 110 if (N->isMachineOpcode()) { 111 N->setNodeId(-1); 112 return nullptr; // Already selected. 113 } 114 115 SDNode *ResNode = nullptr; 116 switch (N->getOpcode()) { 117 case ISD::LOAD: 118 ResNode = SelectLoad(N); 119 break; 120 case ISD::STORE: 121 ResNode = SelectStore(N); 122 break; 123 case NVPTXISD::LoadV2: 124 case NVPTXISD::LoadV4: 125 ResNode = SelectLoadVector(N); 126 break; 127 case NVPTXISD::LDGV2: 128 case NVPTXISD::LDGV4: 129 case NVPTXISD::LDUV2: 130 case NVPTXISD::LDUV4: 131 ResNode = SelectLDGLDU(N); 132 break; 133 case NVPTXISD::StoreV2: 134 case NVPTXISD::StoreV4: 135 ResNode = SelectStoreVector(N); 136 break; 137 case NVPTXISD::LoadParam: 138 case NVPTXISD::LoadParamV2: 139 case NVPTXISD::LoadParamV4: 140 ResNode = SelectLoadParam(N); 141 break; 142 case NVPTXISD::StoreRetval: 143 case NVPTXISD::StoreRetvalV2: 144 case NVPTXISD::StoreRetvalV4: 145 ResNode = SelectStoreRetval(N); 146 break; 147 case NVPTXISD::StoreParam: 148 case NVPTXISD::StoreParamV2: 149 case NVPTXISD::StoreParamV4: 150 case NVPTXISD::StoreParamS32: 151 case NVPTXISD::StoreParamU32: 152 ResNode = SelectStoreParam(N); 153 break; 154 case ISD::INTRINSIC_WO_CHAIN: 155 ResNode = SelectIntrinsicNoChain(N); 156 break; 157 case ISD::INTRINSIC_W_CHAIN: 158 ResNode = SelectIntrinsicChain(N); 159 break; 160 case NVPTXISD::Tex1DFloatS32: 161 case NVPTXISD::Tex1DFloatFloat: 162 case NVPTXISD::Tex1DFloatFloatLevel: 163 case NVPTXISD::Tex1DFloatFloatGrad: 164 case NVPTXISD::Tex1DS32S32: 165 case NVPTXISD::Tex1DS32Float: 166 case NVPTXISD::Tex1DS32FloatLevel: 167 case NVPTXISD::Tex1DS32FloatGrad: 168 case NVPTXISD::Tex1DU32S32: 169 case NVPTXISD::Tex1DU32Float: 170 case NVPTXISD::Tex1DU32FloatLevel: 171 case NVPTXISD::Tex1DU32FloatGrad: 172 case NVPTXISD::Tex1DArrayFloatS32: 173 case NVPTXISD::Tex1DArrayFloatFloat: 174 case NVPTXISD::Tex1DArrayFloatFloatLevel: 175 case NVPTXISD::Tex1DArrayFloatFloatGrad: 176 case NVPTXISD::Tex1DArrayS32S32: 177 case NVPTXISD::Tex1DArrayS32Float: 178 case NVPTXISD::Tex1DArrayS32FloatLevel: 179 case NVPTXISD::Tex1DArrayS32FloatGrad: 180 case NVPTXISD::Tex1DArrayU32S32: 181 case NVPTXISD::Tex1DArrayU32Float: 182 case NVPTXISD::Tex1DArrayU32FloatLevel: 183 case NVPTXISD::Tex1DArrayU32FloatGrad: 184 case NVPTXISD::Tex2DFloatS32: 185 case NVPTXISD::Tex2DFloatFloat: 186 case NVPTXISD::Tex2DFloatFloatLevel: 187 case NVPTXISD::Tex2DFloatFloatGrad: 188 case NVPTXISD::Tex2DS32S32: 189 case NVPTXISD::Tex2DS32Float: 190 case NVPTXISD::Tex2DS32FloatLevel: 191 case NVPTXISD::Tex2DS32FloatGrad: 192 case NVPTXISD::Tex2DU32S32: 193 case NVPTXISD::Tex2DU32Float: 194 case NVPTXISD::Tex2DU32FloatLevel: 195 case NVPTXISD::Tex2DU32FloatGrad: 196 case NVPTXISD::Tex2DArrayFloatS32: 197 case NVPTXISD::Tex2DArrayFloatFloat: 198 case NVPTXISD::Tex2DArrayFloatFloatLevel: 199 case NVPTXISD::Tex2DArrayFloatFloatGrad: 200 case NVPTXISD::Tex2DArrayS32S32: 201 case NVPTXISD::Tex2DArrayS32Float: 202 case NVPTXISD::Tex2DArrayS32FloatLevel: 203 case NVPTXISD::Tex2DArrayS32FloatGrad: 204 case NVPTXISD::Tex2DArrayU32S32: 205 case NVPTXISD::Tex2DArrayU32Float: 206 case NVPTXISD::Tex2DArrayU32FloatLevel: 207 case NVPTXISD::Tex2DArrayU32FloatGrad: 208 case NVPTXISD::Tex3DFloatS32: 209 case NVPTXISD::Tex3DFloatFloat: 210 case NVPTXISD::Tex3DFloatFloatLevel: 211 case NVPTXISD::Tex3DFloatFloatGrad: 212 case NVPTXISD::Tex3DS32S32: 213 case NVPTXISD::Tex3DS32Float: 214 case NVPTXISD::Tex3DS32FloatLevel: 215 case NVPTXISD::Tex3DS32FloatGrad: 216 case NVPTXISD::Tex3DU32S32: 217 case NVPTXISD::Tex3DU32Float: 218 case NVPTXISD::Tex3DU32FloatLevel: 219 case NVPTXISD::Tex3DU32FloatGrad: 220 case NVPTXISD::TexCubeFloatFloat: 221 case NVPTXISD::TexCubeFloatFloatLevel: 222 case NVPTXISD::TexCubeS32Float: 223 case NVPTXISD::TexCubeS32FloatLevel: 224 case NVPTXISD::TexCubeU32Float: 225 case NVPTXISD::TexCubeU32FloatLevel: 226 case NVPTXISD::TexCubeArrayFloatFloat: 227 case NVPTXISD::TexCubeArrayFloatFloatLevel: 228 case NVPTXISD::TexCubeArrayS32Float: 229 case NVPTXISD::TexCubeArrayS32FloatLevel: 230 case NVPTXISD::TexCubeArrayU32Float: 231 case NVPTXISD::TexCubeArrayU32FloatLevel: 232 case NVPTXISD::Tld4R2DFloatFloat: 233 case NVPTXISD::Tld4G2DFloatFloat: 234 case NVPTXISD::Tld4B2DFloatFloat: 235 case NVPTXISD::Tld4A2DFloatFloat: 236 case NVPTXISD::Tld4R2DS64Float: 237 case NVPTXISD::Tld4G2DS64Float: 238 case NVPTXISD::Tld4B2DS64Float: 239 case NVPTXISD::Tld4A2DS64Float: 240 case NVPTXISD::Tld4R2DU64Float: 241 case NVPTXISD::Tld4G2DU64Float: 242 case NVPTXISD::Tld4B2DU64Float: 243 case NVPTXISD::Tld4A2DU64Float: 244 case NVPTXISD::TexUnified1DFloatS32: 245 case NVPTXISD::TexUnified1DFloatFloat: 246 case NVPTXISD::TexUnified1DFloatFloatLevel: 247 case NVPTXISD::TexUnified1DFloatFloatGrad: 248 case NVPTXISD::TexUnified1DS32S32: 249 case NVPTXISD::TexUnified1DS32Float: 250 case NVPTXISD::TexUnified1DS32FloatLevel: 251 case NVPTXISD::TexUnified1DS32FloatGrad: 252 case NVPTXISD::TexUnified1DU32S32: 253 case NVPTXISD::TexUnified1DU32Float: 254 case NVPTXISD::TexUnified1DU32FloatLevel: 255 case NVPTXISD::TexUnified1DU32FloatGrad: 256 case NVPTXISD::TexUnified1DArrayFloatS32: 257 case NVPTXISD::TexUnified1DArrayFloatFloat: 258 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 259 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 260 case NVPTXISD::TexUnified1DArrayS32S32: 261 case NVPTXISD::TexUnified1DArrayS32Float: 262 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 263 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 264 case NVPTXISD::TexUnified1DArrayU32S32: 265 case NVPTXISD::TexUnified1DArrayU32Float: 266 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 267 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 268 case NVPTXISD::TexUnified2DFloatS32: 269 case NVPTXISD::TexUnified2DFloatFloat: 270 case NVPTXISD::TexUnified2DFloatFloatLevel: 271 case NVPTXISD::TexUnified2DFloatFloatGrad: 272 case NVPTXISD::TexUnified2DS32S32: 273 case NVPTXISD::TexUnified2DS32Float: 274 case NVPTXISD::TexUnified2DS32FloatLevel: 275 case NVPTXISD::TexUnified2DS32FloatGrad: 276 case NVPTXISD::TexUnified2DU32S32: 277 case NVPTXISD::TexUnified2DU32Float: 278 case NVPTXISD::TexUnified2DU32FloatLevel: 279 case NVPTXISD::TexUnified2DU32FloatGrad: 280 case NVPTXISD::TexUnified2DArrayFloatS32: 281 case NVPTXISD::TexUnified2DArrayFloatFloat: 282 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 283 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 284 case NVPTXISD::TexUnified2DArrayS32S32: 285 case NVPTXISD::TexUnified2DArrayS32Float: 286 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 287 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 288 case NVPTXISD::TexUnified2DArrayU32S32: 289 case NVPTXISD::TexUnified2DArrayU32Float: 290 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 291 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 292 case NVPTXISD::TexUnified3DFloatS32: 293 case NVPTXISD::TexUnified3DFloatFloat: 294 case NVPTXISD::TexUnified3DFloatFloatLevel: 295 case NVPTXISD::TexUnified3DFloatFloatGrad: 296 case NVPTXISD::TexUnified3DS32S32: 297 case NVPTXISD::TexUnified3DS32Float: 298 case NVPTXISD::TexUnified3DS32FloatLevel: 299 case NVPTXISD::TexUnified3DS32FloatGrad: 300 case NVPTXISD::TexUnified3DU32S32: 301 case NVPTXISD::TexUnified3DU32Float: 302 case NVPTXISD::TexUnified3DU32FloatLevel: 303 case NVPTXISD::TexUnified3DU32FloatGrad: 304 case NVPTXISD::TexUnifiedCubeFloatFloat: 305 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 306 case NVPTXISD::TexUnifiedCubeS32Float: 307 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 308 case NVPTXISD::TexUnifiedCubeU32Float: 309 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 310 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 311 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 312 case NVPTXISD::TexUnifiedCubeArrayS32Float: 313 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 314 case NVPTXISD::TexUnifiedCubeArrayU32Float: 315 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 316 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 317 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 318 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 319 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 320 case NVPTXISD::Tld4UnifiedR2DS64Float: 321 case NVPTXISD::Tld4UnifiedG2DS64Float: 322 case NVPTXISD::Tld4UnifiedB2DS64Float: 323 case NVPTXISD::Tld4UnifiedA2DS64Float: 324 case NVPTXISD::Tld4UnifiedR2DU64Float: 325 case NVPTXISD::Tld4UnifiedG2DU64Float: 326 case NVPTXISD::Tld4UnifiedB2DU64Float: 327 case NVPTXISD::Tld4UnifiedA2DU64Float: 328 ResNode = SelectTextureIntrinsic(N); 329 break; 330 case NVPTXISD::Suld1DI8Clamp: 331 case NVPTXISD::Suld1DI16Clamp: 332 case NVPTXISD::Suld1DI32Clamp: 333 case NVPTXISD::Suld1DI64Clamp: 334 case NVPTXISD::Suld1DV2I8Clamp: 335 case NVPTXISD::Suld1DV2I16Clamp: 336 case NVPTXISD::Suld1DV2I32Clamp: 337 case NVPTXISD::Suld1DV2I64Clamp: 338 case NVPTXISD::Suld1DV4I8Clamp: 339 case NVPTXISD::Suld1DV4I16Clamp: 340 case NVPTXISD::Suld1DV4I32Clamp: 341 case NVPTXISD::Suld1DArrayI8Clamp: 342 case NVPTXISD::Suld1DArrayI16Clamp: 343 case NVPTXISD::Suld1DArrayI32Clamp: 344 case NVPTXISD::Suld1DArrayI64Clamp: 345 case NVPTXISD::Suld1DArrayV2I8Clamp: 346 case NVPTXISD::Suld1DArrayV2I16Clamp: 347 case NVPTXISD::Suld1DArrayV2I32Clamp: 348 case NVPTXISD::Suld1DArrayV2I64Clamp: 349 case NVPTXISD::Suld1DArrayV4I8Clamp: 350 case NVPTXISD::Suld1DArrayV4I16Clamp: 351 case NVPTXISD::Suld1DArrayV4I32Clamp: 352 case NVPTXISD::Suld2DI8Clamp: 353 case NVPTXISD::Suld2DI16Clamp: 354 case NVPTXISD::Suld2DI32Clamp: 355 case NVPTXISD::Suld2DI64Clamp: 356 case NVPTXISD::Suld2DV2I8Clamp: 357 case NVPTXISD::Suld2DV2I16Clamp: 358 case NVPTXISD::Suld2DV2I32Clamp: 359 case NVPTXISD::Suld2DV2I64Clamp: 360 case NVPTXISD::Suld2DV4I8Clamp: 361 case NVPTXISD::Suld2DV4I16Clamp: 362 case NVPTXISD::Suld2DV4I32Clamp: 363 case NVPTXISD::Suld2DArrayI8Clamp: 364 case NVPTXISD::Suld2DArrayI16Clamp: 365 case NVPTXISD::Suld2DArrayI32Clamp: 366 case NVPTXISD::Suld2DArrayI64Clamp: 367 case NVPTXISD::Suld2DArrayV2I8Clamp: 368 case NVPTXISD::Suld2DArrayV2I16Clamp: 369 case NVPTXISD::Suld2DArrayV2I32Clamp: 370 case NVPTXISD::Suld2DArrayV2I64Clamp: 371 case NVPTXISD::Suld2DArrayV4I8Clamp: 372 case NVPTXISD::Suld2DArrayV4I16Clamp: 373 case NVPTXISD::Suld2DArrayV4I32Clamp: 374 case NVPTXISD::Suld3DI8Clamp: 375 case NVPTXISD::Suld3DI16Clamp: 376 case NVPTXISD::Suld3DI32Clamp: 377 case NVPTXISD::Suld3DI64Clamp: 378 case NVPTXISD::Suld3DV2I8Clamp: 379 case NVPTXISD::Suld3DV2I16Clamp: 380 case NVPTXISD::Suld3DV2I32Clamp: 381 case NVPTXISD::Suld3DV2I64Clamp: 382 case NVPTXISD::Suld3DV4I8Clamp: 383 case NVPTXISD::Suld3DV4I16Clamp: 384 case NVPTXISD::Suld3DV4I32Clamp: 385 case NVPTXISD::Suld1DI8Trap: 386 case NVPTXISD::Suld1DI16Trap: 387 case NVPTXISD::Suld1DI32Trap: 388 case NVPTXISD::Suld1DI64Trap: 389 case NVPTXISD::Suld1DV2I8Trap: 390 case NVPTXISD::Suld1DV2I16Trap: 391 case NVPTXISD::Suld1DV2I32Trap: 392 case NVPTXISD::Suld1DV2I64Trap: 393 case NVPTXISD::Suld1DV4I8Trap: 394 case NVPTXISD::Suld1DV4I16Trap: 395 case NVPTXISD::Suld1DV4I32Trap: 396 case NVPTXISD::Suld1DArrayI8Trap: 397 case NVPTXISD::Suld1DArrayI16Trap: 398 case NVPTXISD::Suld1DArrayI32Trap: 399 case NVPTXISD::Suld1DArrayI64Trap: 400 case NVPTXISD::Suld1DArrayV2I8Trap: 401 case NVPTXISD::Suld1DArrayV2I16Trap: 402 case NVPTXISD::Suld1DArrayV2I32Trap: 403 case NVPTXISD::Suld1DArrayV2I64Trap: 404 case NVPTXISD::Suld1DArrayV4I8Trap: 405 case NVPTXISD::Suld1DArrayV4I16Trap: 406 case NVPTXISD::Suld1DArrayV4I32Trap: 407 case NVPTXISD::Suld2DI8Trap: 408 case NVPTXISD::Suld2DI16Trap: 409 case NVPTXISD::Suld2DI32Trap: 410 case NVPTXISD::Suld2DI64Trap: 411 case NVPTXISD::Suld2DV2I8Trap: 412 case NVPTXISD::Suld2DV2I16Trap: 413 case NVPTXISD::Suld2DV2I32Trap: 414 case NVPTXISD::Suld2DV2I64Trap: 415 case NVPTXISD::Suld2DV4I8Trap: 416 case NVPTXISD::Suld2DV4I16Trap: 417 case NVPTXISD::Suld2DV4I32Trap: 418 case NVPTXISD::Suld2DArrayI8Trap: 419 case NVPTXISD::Suld2DArrayI16Trap: 420 case NVPTXISD::Suld2DArrayI32Trap: 421 case NVPTXISD::Suld2DArrayI64Trap: 422 case NVPTXISD::Suld2DArrayV2I8Trap: 423 case NVPTXISD::Suld2DArrayV2I16Trap: 424 case NVPTXISD::Suld2DArrayV2I32Trap: 425 case NVPTXISD::Suld2DArrayV2I64Trap: 426 case NVPTXISD::Suld2DArrayV4I8Trap: 427 case NVPTXISD::Suld2DArrayV4I16Trap: 428 case NVPTXISD::Suld2DArrayV4I32Trap: 429 case NVPTXISD::Suld3DI8Trap: 430 case NVPTXISD::Suld3DI16Trap: 431 case NVPTXISD::Suld3DI32Trap: 432 case NVPTXISD::Suld3DI64Trap: 433 case NVPTXISD::Suld3DV2I8Trap: 434 case NVPTXISD::Suld3DV2I16Trap: 435 case NVPTXISD::Suld3DV2I32Trap: 436 case NVPTXISD::Suld3DV2I64Trap: 437 case NVPTXISD::Suld3DV4I8Trap: 438 case NVPTXISD::Suld3DV4I16Trap: 439 case NVPTXISD::Suld3DV4I32Trap: 440 case NVPTXISD::Suld1DI8Zero: 441 case NVPTXISD::Suld1DI16Zero: 442 case NVPTXISD::Suld1DI32Zero: 443 case NVPTXISD::Suld1DI64Zero: 444 case NVPTXISD::Suld1DV2I8Zero: 445 case NVPTXISD::Suld1DV2I16Zero: 446 case NVPTXISD::Suld1DV2I32Zero: 447 case NVPTXISD::Suld1DV2I64Zero: 448 case NVPTXISD::Suld1DV4I8Zero: 449 case NVPTXISD::Suld1DV4I16Zero: 450 case NVPTXISD::Suld1DV4I32Zero: 451 case NVPTXISD::Suld1DArrayI8Zero: 452 case NVPTXISD::Suld1DArrayI16Zero: 453 case NVPTXISD::Suld1DArrayI32Zero: 454 case NVPTXISD::Suld1DArrayI64Zero: 455 case NVPTXISD::Suld1DArrayV2I8Zero: 456 case NVPTXISD::Suld1DArrayV2I16Zero: 457 case NVPTXISD::Suld1DArrayV2I32Zero: 458 case NVPTXISD::Suld1DArrayV2I64Zero: 459 case NVPTXISD::Suld1DArrayV4I8Zero: 460 case NVPTXISD::Suld1DArrayV4I16Zero: 461 case NVPTXISD::Suld1DArrayV4I32Zero: 462 case NVPTXISD::Suld2DI8Zero: 463 case NVPTXISD::Suld2DI16Zero: 464 case NVPTXISD::Suld2DI32Zero: 465 case NVPTXISD::Suld2DI64Zero: 466 case NVPTXISD::Suld2DV2I8Zero: 467 case NVPTXISD::Suld2DV2I16Zero: 468 case NVPTXISD::Suld2DV2I32Zero: 469 case NVPTXISD::Suld2DV2I64Zero: 470 case NVPTXISD::Suld2DV4I8Zero: 471 case NVPTXISD::Suld2DV4I16Zero: 472 case NVPTXISD::Suld2DV4I32Zero: 473 case NVPTXISD::Suld2DArrayI8Zero: 474 case NVPTXISD::Suld2DArrayI16Zero: 475 case NVPTXISD::Suld2DArrayI32Zero: 476 case NVPTXISD::Suld2DArrayI64Zero: 477 case NVPTXISD::Suld2DArrayV2I8Zero: 478 case NVPTXISD::Suld2DArrayV2I16Zero: 479 case NVPTXISD::Suld2DArrayV2I32Zero: 480 case NVPTXISD::Suld2DArrayV2I64Zero: 481 case NVPTXISD::Suld2DArrayV4I8Zero: 482 case NVPTXISD::Suld2DArrayV4I16Zero: 483 case NVPTXISD::Suld2DArrayV4I32Zero: 484 case NVPTXISD::Suld3DI8Zero: 485 case NVPTXISD::Suld3DI16Zero: 486 case NVPTXISD::Suld3DI32Zero: 487 case NVPTXISD::Suld3DI64Zero: 488 case NVPTXISD::Suld3DV2I8Zero: 489 case NVPTXISD::Suld3DV2I16Zero: 490 case NVPTXISD::Suld3DV2I32Zero: 491 case NVPTXISD::Suld3DV2I64Zero: 492 case NVPTXISD::Suld3DV4I8Zero: 493 case NVPTXISD::Suld3DV4I16Zero: 494 case NVPTXISD::Suld3DV4I32Zero: 495 ResNode = SelectSurfaceIntrinsic(N); 496 break; 497 case ISD::AND: 498 case ISD::SRA: 499 case ISD::SRL: 500 // Try to select BFE 501 ResNode = SelectBFE(N); 502 break; 503 case ISD::ADDRSPACECAST: 504 ResNode = SelectAddrSpaceCast(N); 505 break; 506 default: 507 break; 508 } 509 if (ResNode) 510 return ResNode; 511 return SelectCode(N); 512} 513 514SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) { 515 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 516 switch (IID) { 517 default: 518 return NULL; 519 case Intrinsic::nvvm_ldg_global_f: 520 case Intrinsic::nvvm_ldg_global_i: 521 case Intrinsic::nvvm_ldg_global_p: 522 case Intrinsic::nvvm_ldu_global_f: 523 case Intrinsic::nvvm_ldu_global_i: 524 case Intrinsic::nvvm_ldu_global_p: 525 return SelectLDGLDU(N); 526 } 527} 528 529static unsigned int getCodeAddrSpace(MemSDNode *N) { 530 const Value *Src = N->getMemOperand()->getValue(); 531 532 if (!Src) 533 return NVPTX::PTXLdStInstCode::GENERIC; 534 535 if (auto *PT = dyn_cast<PointerType>(Src->getType())) { 536 switch (PT->getAddressSpace()) { 537 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; 538 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; 539 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; 540 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; 541 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; 542 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; 543 default: break; 544 } 545 } 546 return NVPTX::PTXLdStInstCode::GENERIC; 547} 548 549static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, 550 unsigned CodeAddrSpace, MachineFunction *F) { 551 // To use non-coherent caching, the load has to be from global 552 // memory and we have to prove that the memory area is not written 553 // to anywhere for the duration of the kernel call, not even after 554 // the load. 555 // 556 // To ensure that there are no writes to the memory, we require the 557 // underlying pointer to be a noalias (__restrict) kernel parameter 558 // that is never used for a write. We can only do this for kernel 559 // functions since from within a device function, we cannot know if 560 // there were or will be writes to the memory from the caller - or we 561 // could, but then we would have to do inter-procedural analysis. 562 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL || 563 !isKernelFunction(*F->getFunction())) { 564 return false; 565 } 566 567 // We use GetUnderlyingObjects() here instead of 568 // GetUnderlyingObject() mainly because the former looks through phi 569 // nodes while the latter does not. We need to look through phi 570 // nodes to handle pointer induction variables. 571 SmallVector<Value *, 8> Objs; 572 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()), 573 Objs, F->getDataLayout()); 574 for (Value *Obj : Objs) { 575 auto *A = dyn_cast<const Argument>(Obj); 576 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false; 577 } 578 579 return true; 580} 581 582SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) { 583 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 584 switch (IID) { 585 default: 586 return nullptr; 587 case Intrinsic::nvvm_texsurf_handle_internal: 588 return SelectTexSurfHandle(N); 589 } 590} 591 592SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { 593 // Op 0 is the intrinsic ID 594 SDValue Wrapper = N->getOperand(1); 595 SDValue GlobalVal = Wrapper.getOperand(0); 596 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64, 597 GlobalVal); 598} 599 600SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 601 SDValue Src = N->getOperand(0); 602 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); 603 unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); 604 unsigned DstAddrSpace = CastN->getDestAddressSpace(); 605 606 assert(SrcAddrSpace != DstAddrSpace && 607 "addrspacecast must be between different address spaces"); 608 609 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { 610 // Specific to generic 611 unsigned Opc; 612 switch (SrcAddrSpace) { 613 default: report_fatal_error("Bad address space in addrspacecast"); 614 case ADDRESS_SPACE_GLOBAL: 615 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; 616 break; 617 case ADDRESS_SPACE_SHARED: 618 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes; 619 break; 620 case ADDRESS_SPACE_CONST: 621 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes; 622 break; 623 case ADDRESS_SPACE_LOCAL: 624 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes; 625 break; 626 } 627 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); 628 } else { 629 // Generic to specific 630 if (SrcAddrSpace != 0) 631 report_fatal_error("Cannot cast between two non-generic address spaces"); 632 unsigned Opc; 633 switch (DstAddrSpace) { 634 default: report_fatal_error("Bad address space in addrspacecast"); 635 case ADDRESS_SPACE_GLOBAL: 636 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 637 : NVPTX::cvta_to_global_yes; 638 break; 639 case ADDRESS_SPACE_SHARED: 640 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64 641 : NVPTX::cvta_to_shared_yes; 642 break; 643 case ADDRESS_SPACE_CONST: 644 Opc = 645 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes; 646 break; 647 case ADDRESS_SPACE_LOCAL: 648 Opc = 649 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes; 650 break; 651 case ADDRESS_SPACE_PARAM: 652 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 653 : NVPTX::nvvm_ptr_gen_to_param; 654 break; 655 } 656 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); 657 } 658} 659 660SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { 661 SDLoc dl(N); 662 LoadSDNode *LD = cast<LoadSDNode>(N); 663 EVT LoadedVT = LD->getMemoryVT(); 664 SDNode *NVPTXLD = nullptr; 665 666 // do not support pre/post inc/dec 667 if (LD->isIndexed()) 668 return nullptr; 669 670 if (!LoadedVT.isSimple()) 671 return nullptr; 672 673 // Address Space Setting 674 unsigned int codeAddrSpace = getCodeAddrSpace(LD); 675 676 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) { 677 return SelectLDGLDU(N); 678 } 679 680 // Volatile Setting 681 // - .volatile is only availalble for .global and .shared 682 bool isVolatile = LD->isVolatile(); 683 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 684 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 685 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 686 isVolatile = false; 687 688 // Vector Setting 689 MVT SimpleVT = LoadedVT.getSimpleVT(); 690 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 691 if (SimpleVT.isVector()) { 692 unsigned num = SimpleVT.getVectorNumElements(); 693 if (num == 2) 694 vecType = NVPTX::PTXLdStInstCode::V2; 695 else if (num == 4) 696 vecType = NVPTX::PTXLdStInstCode::V4; 697 else 698 return nullptr; 699 } 700 701 // Type Setting: fromType + fromTypeWidth 702 // 703 // Sign : ISD::SEXTLOAD 704 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 705 // type is integer 706 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 707 MVT ScalarVT = SimpleVT.getScalarType(); 708 // Read at least 8 bits (predicates are stored as 8-bit values) 709 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 710 unsigned int fromType; 711 if ((LD->getExtensionType() == ISD::SEXTLOAD)) 712 fromType = NVPTX::PTXLdStInstCode::Signed; 713 else if (ScalarVT.isFloatingPoint()) 714 fromType = NVPTX::PTXLdStInstCode::Float; 715 else 716 fromType = NVPTX::PTXLdStInstCode::Unsigned; 717 718 // Create the machine instruction DAG 719 SDValue Chain = N->getOperand(0); 720 SDValue N1 = N->getOperand(1); 721 SDValue Addr; 722 SDValue Offset, Base; 723 unsigned Opcode; 724 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; 725 726 if (SelectDirectAddr(N1, Addr)) { 727 switch (TargetVT) { 728 case MVT::i8: 729 Opcode = NVPTX::LD_i8_avar; 730 break; 731 case MVT::i16: 732 Opcode = NVPTX::LD_i16_avar; 733 break; 734 case MVT::i32: 735 Opcode = NVPTX::LD_i32_avar; 736 break; 737 case MVT::i64: 738 Opcode = NVPTX::LD_i64_avar; 739 break; 740 case MVT::f32: 741 Opcode = NVPTX::LD_f32_avar; 742 break; 743 case MVT::f64: 744 Opcode = NVPTX::LD_f64_avar; 745 break; 746 default: 747 return nullptr; 748 } 749 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), 750 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 751 getI32Imm(fromTypeWidth, dl), Addr, Chain }; 752 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 753 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) 754 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { 755 switch (TargetVT) { 756 case MVT::i8: 757 Opcode = NVPTX::LD_i8_asi; 758 break; 759 case MVT::i16: 760 Opcode = NVPTX::LD_i16_asi; 761 break; 762 case MVT::i32: 763 Opcode = NVPTX::LD_i32_asi; 764 break; 765 case MVT::i64: 766 Opcode = NVPTX::LD_i64_asi; 767 break; 768 case MVT::f32: 769 Opcode = NVPTX::LD_f32_asi; 770 break; 771 case MVT::f64: 772 Opcode = NVPTX::LD_f64_asi; 773 break; 774 default: 775 return nullptr; 776 } 777 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), 778 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 779 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 780 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 781 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset) 782 : SelectADDRri(N1.getNode(), N1, Base, Offset)) { 783 if (TM.is64Bit()) { 784 switch (TargetVT) { 785 case MVT::i8: 786 Opcode = NVPTX::LD_i8_ari_64; 787 break; 788 case MVT::i16: 789 Opcode = NVPTX::LD_i16_ari_64; 790 break; 791 case MVT::i32: 792 Opcode = NVPTX::LD_i32_ari_64; 793 break; 794 case MVT::i64: 795 Opcode = NVPTX::LD_i64_ari_64; 796 break; 797 case MVT::f32: 798 Opcode = NVPTX::LD_f32_ari_64; 799 break; 800 case MVT::f64: 801 Opcode = NVPTX::LD_f64_ari_64; 802 break; 803 default: 804 return nullptr; 805 } 806 } else { 807 switch (TargetVT) { 808 case MVT::i8: 809 Opcode = NVPTX::LD_i8_ari; 810 break; 811 case MVT::i16: 812 Opcode = NVPTX::LD_i16_ari; 813 break; 814 case MVT::i32: 815 Opcode = NVPTX::LD_i32_ari; 816 break; 817 case MVT::i64: 818 Opcode = NVPTX::LD_i64_ari; 819 break; 820 case MVT::f32: 821 Opcode = NVPTX::LD_f32_ari; 822 break; 823 case MVT::f64: 824 Opcode = NVPTX::LD_f64_ari; 825 break; 826 default: 827 return nullptr; 828 } 829 } 830 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), 831 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 832 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 833 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 834 } else { 835 if (TM.is64Bit()) { 836 switch (TargetVT) { 837 case MVT::i8: 838 Opcode = NVPTX::LD_i8_areg_64; 839 break; 840 case MVT::i16: 841 Opcode = NVPTX::LD_i16_areg_64; 842 break; 843 case MVT::i32: 844 Opcode = NVPTX::LD_i32_areg_64; 845 break; 846 case MVT::i64: 847 Opcode = NVPTX::LD_i64_areg_64; 848 break; 849 case MVT::f32: 850 Opcode = NVPTX::LD_f32_areg_64; 851 break; 852 case MVT::f64: 853 Opcode = NVPTX::LD_f64_areg_64; 854 break; 855 default: 856 return nullptr; 857 } 858 } else { 859 switch (TargetVT) { 860 case MVT::i8: 861 Opcode = NVPTX::LD_i8_areg; 862 break; 863 case MVT::i16: 864 Opcode = NVPTX::LD_i16_areg; 865 break; 866 case MVT::i32: 867 Opcode = NVPTX::LD_i32_areg; 868 break; 869 case MVT::i64: 870 Opcode = NVPTX::LD_i64_areg; 871 break; 872 case MVT::f32: 873 Opcode = NVPTX::LD_f32_areg; 874 break; 875 case MVT::f64: 876 Opcode = NVPTX::LD_f64_areg; 877 break; 878 default: 879 return nullptr; 880 } 881 } 882 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), 883 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 884 getI32Imm(fromTypeWidth, dl), N1, Chain }; 885 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 886 } 887 888 if (NVPTXLD) { 889 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 890 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 891 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); 892 } 893 894 return NVPTXLD; 895} 896 897SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { 898 899 SDValue Chain = N->getOperand(0); 900 SDValue Op1 = N->getOperand(1); 901 SDValue Addr, Offset, Base; 902 unsigned Opcode; 903 SDLoc DL(N); 904 SDNode *LD; 905 MemSDNode *MemSD = cast<MemSDNode>(N); 906 EVT LoadedVT = MemSD->getMemoryVT(); 907 908 if (!LoadedVT.isSimple()) 909 return nullptr; 910 911 // Address Space Setting 912 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); 913 914 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { 915 return SelectLDGLDU(N); 916 } 917 918 // Volatile Setting 919 // - .volatile is only availalble for .global and .shared 920 bool IsVolatile = MemSD->isVolatile(); 921 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 922 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 923 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 924 IsVolatile = false; 925 926 // Vector Setting 927 MVT SimpleVT = LoadedVT.getSimpleVT(); 928 929 // Type Setting: fromType + fromTypeWidth 930 // 931 // Sign : ISD::SEXTLOAD 932 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 933 // type is integer 934 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 935 MVT ScalarVT = SimpleVT.getScalarType(); 936 // Read at least 8 bits (predicates are stored as 8-bit values) 937 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 938 unsigned int FromType; 939 // The last operand holds the original LoadSDNode::getExtensionType() value 940 unsigned ExtensionType = cast<ConstantSDNode>( 941 N->getOperand(N->getNumOperands() - 1))->getZExtValue(); 942 if (ExtensionType == ISD::SEXTLOAD) 943 FromType = NVPTX::PTXLdStInstCode::Signed; 944 else if (ScalarVT.isFloatingPoint()) 945 FromType = NVPTX::PTXLdStInstCode::Float; 946 else 947 FromType = NVPTX::PTXLdStInstCode::Unsigned; 948 949 unsigned VecType; 950 951 switch (N->getOpcode()) { 952 case NVPTXISD::LoadV2: 953 VecType = NVPTX::PTXLdStInstCode::V2; 954 break; 955 case NVPTXISD::LoadV4: 956 VecType = NVPTX::PTXLdStInstCode::V4; 957 break; 958 default: 959 return nullptr; 960 } 961 962 EVT EltVT = N->getValueType(0); 963 964 if (SelectDirectAddr(Op1, Addr)) { 965 switch (N->getOpcode()) { 966 default: 967 return nullptr; 968 case NVPTXISD::LoadV2: 969 switch (EltVT.getSimpleVT().SimpleTy) { 970 default: 971 return nullptr; 972 case MVT::i8: 973 Opcode = NVPTX::LDV_i8_v2_avar; 974 break; 975 case MVT::i16: 976 Opcode = NVPTX::LDV_i16_v2_avar; 977 break; 978 case MVT::i32: 979 Opcode = NVPTX::LDV_i32_v2_avar; 980 break; 981 case MVT::i64: 982 Opcode = NVPTX::LDV_i64_v2_avar; 983 break; 984 case MVT::f32: 985 Opcode = NVPTX::LDV_f32_v2_avar; 986 break; 987 case MVT::f64: 988 Opcode = NVPTX::LDV_f64_v2_avar; 989 break; 990 } 991 break; 992 case NVPTXISD::LoadV4: 993 switch (EltVT.getSimpleVT().SimpleTy) { 994 default: 995 return nullptr; 996 case MVT::i8: 997 Opcode = NVPTX::LDV_i8_v4_avar; 998 break; 999 case MVT::i16: 1000 Opcode = NVPTX::LDV_i16_v4_avar; 1001 break; 1002 case MVT::i32: 1003 Opcode = NVPTX::LDV_i32_v4_avar; 1004 break; 1005 case MVT::f32: 1006 Opcode = NVPTX::LDV_f32_v4_avar; 1007 break; 1008 } 1009 break; 1010 } 1011 1012 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1013 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1014 getI32Imm(FromTypeWidth, DL), Addr, Chain }; 1015 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1016 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) 1017 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { 1018 switch (N->getOpcode()) { 1019 default: 1020 return nullptr; 1021 case NVPTXISD::LoadV2: 1022 switch (EltVT.getSimpleVT().SimpleTy) { 1023 default: 1024 return nullptr; 1025 case MVT::i8: 1026 Opcode = NVPTX::LDV_i8_v2_asi; 1027 break; 1028 case MVT::i16: 1029 Opcode = NVPTX::LDV_i16_v2_asi; 1030 break; 1031 case MVT::i32: 1032 Opcode = NVPTX::LDV_i32_v2_asi; 1033 break; 1034 case MVT::i64: 1035 Opcode = NVPTX::LDV_i64_v2_asi; 1036 break; 1037 case MVT::f32: 1038 Opcode = NVPTX::LDV_f32_v2_asi; 1039 break; 1040 case MVT::f64: 1041 Opcode = NVPTX::LDV_f64_v2_asi; 1042 break; 1043 } 1044 break; 1045 case NVPTXISD::LoadV4: 1046 switch (EltVT.getSimpleVT().SimpleTy) { 1047 default: 1048 return nullptr; 1049 case MVT::i8: 1050 Opcode = NVPTX::LDV_i8_v4_asi; 1051 break; 1052 case MVT::i16: 1053 Opcode = NVPTX::LDV_i16_v4_asi; 1054 break; 1055 case MVT::i32: 1056 Opcode = NVPTX::LDV_i32_v4_asi; 1057 break; 1058 case MVT::f32: 1059 Opcode = NVPTX::LDV_f32_v4_asi; 1060 break; 1061 } 1062 break; 1063 } 1064 1065 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1066 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1067 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1068 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1069 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1070 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1071 if (TM.is64Bit()) { 1072 switch (N->getOpcode()) { 1073 default: 1074 return nullptr; 1075 case NVPTXISD::LoadV2: 1076 switch (EltVT.getSimpleVT().SimpleTy) { 1077 default: 1078 return nullptr; 1079 case MVT::i8: 1080 Opcode = NVPTX::LDV_i8_v2_ari_64; 1081 break; 1082 case MVT::i16: 1083 Opcode = NVPTX::LDV_i16_v2_ari_64; 1084 break; 1085 case MVT::i32: 1086 Opcode = NVPTX::LDV_i32_v2_ari_64; 1087 break; 1088 case MVT::i64: 1089 Opcode = NVPTX::LDV_i64_v2_ari_64; 1090 break; 1091 case MVT::f32: 1092 Opcode = NVPTX::LDV_f32_v2_ari_64; 1093 break; 1094 case MVT::f64: 1095 Opcode = NVPTX::LDV_f64_v2_ari_64; 1096 break; 1097 } 1098 break; 1099 case NVPTXISD::LoadV4: 1100 switch (EltVT.getSimpleVT().SimpleTy) { 1101 default: 1102 return nullptr; 1103 case MVT::i8: 1104 Opcode = NVPTX::LDV_i8_v4_ari_64; 1105 break; 1106 case MVT::i16: 1107 Opcode = NVPTX::LDV_i16_v4_ari_64; 1108 break; 1109 case MVT::i32: 1110 Opcode = NVPTX::LDV_i32_v4_ari_64; 1111 break; 1112 case MVT::f32: 1113 Opcode = NVPTX::LDV_f32_v4_ari_64; 1114 break; 1115 } 1116 break; 1117 } 1118 } else { 1119 switch (N->getOpcode()) { 1120 default: 1121 return nullptr; 1122 case NVPTXISD::LoadV2: 1123 switch (EltVT.getSimpleVT().SimpleTy) { 1124 default: 1125 return nullptr; 1126 case MVT::i8: 1127 Opcode = NVPTX::LDV_i8_v2_ari; 1128 break; 1129 case MVT::i16: 1130 Opcode = NVPTX::LDV_i16_v2_ari; 1131 break; 1132 case MVT::i32: 1133 Opcode = NVPTX::LDV_i32_v2_ari; 1134 break; 1135 case MVT::i64: 1136 Opcode = NVPTX::LDV_i64_v2_ari; 1137 break; 1138 case MVT::f32: 1139 Opcode = NVPTX::LDV_f32_v2_ari; 1140 break; 1141 case MVT::f64: 1142 Opcode = NVPTX::LDV_f64_v2_ari; 1143 break; 1144 } 1145 break; 1146 case NVPTXISD::LoadV4: 1147 switch (EltVT.getSimpleVT().SimpleTy) { 1148 default: 1149 return nullptr; 1150 case MVT::i8: 1151 Opcode = NVPTX::LDV_i8_v4_ari; 1152 break; 1153 case MVT::i16: 1154 Opcode = NVPTX::LDV_i16_v4_ari; 1155 break; 1156 case MVT::i32: 1157 Opcode = NVPTX::LDV_i32_v4_ari; 1158 break; 1159 case MVT::f32: 1160 Opcode = NVPTX::LDV_f32_v4_ari; 1161 break; 1162 } 1163 break; 1164 } 1165 } 1166 1167 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1168 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1169 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1170 1171 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1172 } else { 1173 if (TM.is64Bit()) { 1174 switch (N->getOpcode()) { 1175 default: 1176 return nullptr; 1177 case NVPTXISD::LoadV2: 1178 switch (EltVT.getSimpleVT().SimpleTy) { 1179 default: 1180 return nullptr; 1181 case MVT::i8: 1182 Opcode = NVPTX::LDV_i8_v2_areg_64; 1183 break; 1184 case MVT::i16: 1185 Opcode = NVPTX::LDV_i16_v2_areg_64; 1186 break; 1187 case MVT::i32: 1188 Opcode = NVPTX::LDV_i32_v2_areg_64; 1189 break; 1190 case MVT::i64: 1191 Opcode = NVPTX::LDV_i64_v2_areg_64; 1192 break; 1193 case MVT::f32: 1194 Opcode = NVPTX::LDV_f32_v2_areg_64; 1195 break; 1196 case MVT::f64: 1197 Opcode = NVPTX::LDV_f64_v2_areg_64; 1198 break; 1199 } 1200 break; 1201 case NVPTXISD::LoadV4: 1202 switch (EltVT.getSimpleVT().SimpleTy) { 1203 default: 1204 return nullptr; 1205 case MVT::i8: 1206 Opcode = NVPTX::LDV_i8_v4_areg_64; 1207 break; 1208 case MVT::i16: 1209 Opcode = NVPTX::LDV_i16_v4_areg_64; 1210 break; 1211 case MVT::i32: 1212 Opcode = NVPTX::LDV_i32_v4_areg_64; 1213 break; 1214 case MVT::f32: 1215 Opcode = NVPTX::LDV_f32_v4_areg_64; 1216 break; 1217 } 1218 break; 1219 } 1220 } else { 1221 switch (N->getOpcode()) { 1222 default: 1223 return nullptr; 1224 case NVPTXISD::LoadV2: 1225 switch (EltVT.getSimpleVT().SimpleTy) { 1226 default: 1227 return nullptr; 1228 case MVT::i8: 1229 Opcode = NVPTX::LDV_i8_v2_areg; 1230 break; 1231 case MVT::i16: 1232 Opcode = NVPTX::LDV_i16_v2_areg; 1233 break; 1234 case MVT::i32: 1235 Opcode = NVPTX::LDV_i32_v2_areg; 1236 break; 1237 case MVT::i64: 1238 Opcode = NVPTX::LDV_i64_v2_areg; 1239 break; 1240 case MVT::f32: 1241 Opcode = NVPTX::LDV_f32_v2_areg; 1242 break; 1243 case MVT::f64: 1244 Opcode = NVPTX::LDV_f64_v2_areg; 1245 break; 1246 } 1247 break; 1248 case NVPTXISD::LoadV4: 1249 switch (EltVT.getSimpleVT().SimpleTy) { 1250 default: 1251 return nullptr; 1252 case MVT::i8: 1253 Opcode = NVPTX::LDV_i8_v4_areg; 1254 break; 1255 case MVT::i16: 1256 Opcode = NVPTX::LDV_i16_v4_areg; 1257 break; 1258 case MVT::i32: 1259 Opcode = NVPTX::LDV_i32_v4_areg; 1260 break; 1261 case MVT::f32: 1262 Opcode = NVPTX::LDV_f32_v4_areg; 1263 break; 1264 } 1265 break; 1266 } 1267 } 1268 1269 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1270 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1271 getI32Imm(FromTypeWidth, DL), Op1, Chain }; 1272 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1273 } 1274 1275 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 1276 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 1277 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 1278 1279 return LD; 1280} 1281 1282SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { 1283 1284 SDValue Chain = N->getOperand(0); 1285 SDValue Op1; 1286 MemSDNode *Mem; 1287 bool IsLDG = true; 1288 1289 // If this is an LDG intrinsic, the address is the third operand. Its its an 1290 // LDG/LDU SD node (from custom vector handling), then its the second operand 1291 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 1292 Op1 = N->getOperand(2); 1293 Mem = cast<MemIntrinsicSDNode>(N); 1294 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 1295 switch (IID) { 1296 default: 1297 return NULL; 1298 case Intrinsic::nvvm_ldg_global_f: 1299 case Intrinsic::nvvm_ldg_global_i: 1300 case Intrinsic::nvvm_ldg_global_p: 1301 IsLDG = true; 1302 break; 1303 case Intrinsic::nvvm_ldu_global_f: 1304 case Intrinsic::nvvm_ldu_global_i: 1305 case Intrinsic::nvvm_ldu_global_p: 1306 IsLDG = false; 1307 break; 1308 } 1309 } else { 1310 Op1 = N->getOperand(1); 1311 Mem = cast<MemSDNode>(N); 1312 } 1313 1314 unsigned Opcode; 1315 SDLoc DL(N); 1316 SDNode *LD; 1317 SDValue Base, Offset, Addr; 1318 1319 EVT EltVT = Mem->getMemoryVT(); 1320 if (EltVT.isVector()) { 1321 EltVT = EltVT.getVectorElementType(); 1322 } 1323 1324 if (SelectDirectAddr(Op1, Addr)) { 1325 switch (N->getOpcode()) { 1326 default: 1327 return nullptr; 1328 case ISD::INTRINSIC_W_CHAIN: 1329 if (IsLDG) { 1330 switch (EltVT.getSimpleVT().SimpleTy) { 1331 default: 1332 return nullptr; 1333 case MVT::i8: 1334 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar; 1335 break; 1336 case MVT::i16: 1337 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar; 1338 break; 1339 case MVT::i32: 1340 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar; 1341 break; 1342 case MVT::i64: 1343 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar; 1344 break; 1345 case MVT::f32: 1346 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar; 1347 break; 1348 case MVT::f64: 1349 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar; 1350 break; 1351 } 1352 } else { 1353 switch (EltVT.getSimpleVT().SimpleTy) { 1354 default: 1355 return nullptr; 1356 case MVT::i8: 1357 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar; 1358 break; 1359 case MVT::i16: 1360 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar; 1361 break; 1362 case MVT::i32: 1363 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar; 1364 break; 1365 case MVT::i64: 1366 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar; 1367 break; 1368 case MVT::f32: 1369 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar; 1370 break; 1371 case MVT::f64: 1372 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar; 1373 break; 1374 } 1375 } 1376 break; 1377 case NVPTXISD::LDGV2: 1378 switch (EltVT.getSimpleVT().SimpleTy) { 1379 default: 1380 return nullptr; 1381 case MVT::i8: 1382 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar; 1383 break; 1384 case MVT::i16: 1385 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar; 1386 break; 1387 case MVT::i32: 1388 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar; 1389 break; 1390 case MVT::i64: 1391 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar; 1392 break; 1393 case MVT::f32: 1394 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar; 1395 break; 1396 case MVT::f64: 1397 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar; 1398 break; 1399 } 1400 break; 1401 case NVPTXISD::LDUV2: 1402 switch (EltVT.getSimpleVT().SimpleTy) { 1403 default: 1404 return nullptr; 1405 case MVT::i8: 1406 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar; 1407 break; 1408 case MVT::i16: 1409 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar; 1410 break; 1411 case MVT::i32: 1412 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar; 1413 break; 1414 case MVT::i64: 1415 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar; 1416 break; 1417 case MVT::f32: 1418 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar; 1419 break; 1420 case MVT::f64: 1421 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar; 1422 break; 1423 } 1424 break; 1425 case NVPTXISD::LDGV4: 1426 switch (EltVT.getSimpleVT().SimpleTy) { 1427 default: 1428 return nullptr; 1429 case MVT::i8: 1430 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar; 1431 break; 1432 case MVT::i16: 1433 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar; 1434 break; 1435 case MVT::i32: 1436 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar; 1437 break; 1438 case MVT::f32: 1439 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar; 1440 break; 1441 } 1442 break; 1443 case NVPTXISD::LDUV4: 1444 switch (EltVT.getSimpleVT().SimpleTy) { 1445 default: 1446 return nullptr; 1447 case MVT::i8: 1448 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar; 1449 break; 1450 case MVT::i16: 1451 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar; 1452 break; 1453 case MVT::i32: 1454 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar; 1455 break; 1456 case MVT::f32: 1457 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar; 1458 break; 1459 } 1460 break; 1461 } 1462 1463 SDValue Ops[] = { Addr, Chain }; 1464 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1465 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1466 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1467 if (TM.is64Bit()) { 1468 switch (N->getOpcode()) { 1469 default: 1470 return nullptr; 1471 case ISD::LOAD: 1472 case ISD::INTRINSIC_W_CHAIN: 1473 if (IsLDG) { 1474 switch (EltVT.getSimpleVT().SimpleTy) { 1475 default: 1476 return nullptr; 1477 case MVT::i8: 1478 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64; 1479 break; 1480 case MVT::i16: 1481 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64; 1482 break; 1483 case MVT::i32: 1484 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64; 1485 break; 1486 case MVT::i64: 1487 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64; 1488 break; 1489 case MVT::f32: 1490 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64; 1491 break; 1492 case MVT::f64: 1493 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64; 1494 break; 1495 } 1496 } else { 1497 switch (EltVT.getSimpleVT().SimpleTy) { 1498 default: 1499 return nullptr; 1500 case MVT::i8: 1501 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64; 1502 break; 1503 case MVT::i16: 1504 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64; 1505 break; 1506 case MVT::i32: 1507 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64; 1508 break; 1509 case MVT::i64: 1510 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64; 1511 break; 1512 case MVT::f32: 1513 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64; 1514 break; 1515 case MVT::f64: 1516 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64; 1517 break; 1518 } 1519 } 1520 break; 1521 case NVPTXISD::LoadV2: 1522 case NVPTXISD::LDGV2: 1523 switch (EltVT.getSimpleVT().SimpleTy) { 1524 default: 1525 return nullptr; 1526 case MVT::i8: 1527 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64; 1528 break; 1529 case MVT::i16: 1530 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64; 1531 break; 1532 case MVT::i32: 1533 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64; 1534 break; 1535 case MVT::i64: 1536 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64; 1537 break; 1538 case MVT::f32: 1539 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64; 1540 break; 1541 case MVT::f64: 1542 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64; 1543 break; 1544 } 1545 break; 1546 case NVPTXISD::LDUV2: 1547 switch (EltVT.getSimpleVT().SimpleTy) { 1548 default: 1549 return nullptr; 1550 case MVT::i8: 1551 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64; 1552 break; 1553 case MVT::i16: 1554 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64; 1555 break; 1556 case MVT::i32: 1557 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64; 1558 break; 1559 case MVT::i64: 1560 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64; 1561 break; 1562 case MVT::f32: 1563 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64; 1564 break; 1565 case MVT::f64: 1566 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64; 1567 break; 1568 } 1569 break; 1570 case NVPTXISD::LoadV4: 1571 case NVPTXISD::LDGV4: 1572 switch (EltVT.getSimpleVT().SimpleTy) { 1573 default: 1574 return nullptr; 1575 case MVT::i8: 1576 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64; 1577 break; 1578 case MVT::i16: 1579 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64; 1580 break; 1581 case MVT::i32: 1582 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64; 1583 break; 1584 case MVT::f32: 1585 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64; 1586 break; 1587 } 1588 break; 1589 case NVPTXISD::LDUV4: 1590 switch (EltVT.getSimpleVT().SimpleTy) { 1591 default: 1592 return nullptr; 1593 case MVT::i8: 1594 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64; 1595 break; 1596 case MVT::i16: 1597 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64; 1598 break; 1599 case MVT::i32: 1600 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64; 1601 break; 1602 case MVT::f32: 1603 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64; 1604 break; 1605 } 1606 break; 1607 } 1608 } else { 1609 switch (N->getOpcode()) { 1610 default: 1611 return nullptr; 1612 case ISD::LOAD: 1613 case ISD::INTRINSIC_W_CHAIN: 1614 if (IsLDG) { 1615 switch (EltVT.getSimpleVT().SimpleTy) { 1616 default: 1617 return nullptr; 1618 case MVT::i8: 1619 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari; 1620 break; 1621 case MVT::i16: 1622 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari; 1623 break; 1624 case MVT::i32: 1625 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari; 1626 break; 1627 case MVT::i64: 1628 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari; 1629 break; 1630 case MVT::f32: 1631 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari; 1632 break; 1633 case MVT::f64: 1634 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari; 1635 break; 1636 } 1637 } else { 1638 switch (EltVT.getSimpleVT().SimpleTy) { 1639 default: 1640 return nullptr; 1641 case MVT::i8: 1642 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari; 1643 break; 1644 case MVT::i16: 1645 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari; 1646 break; 1647 case MVT::i32: 1648 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari; 1649 break; 1650 case MVT::i64: 1651 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari; 1652 break; 1653 case MVT::f32: 1654 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari; 1655 break; 1656 case MVT::f64: 1657 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari; 1658 break; 1659 } 1660 } 1661 break; 1662 case NVPTXISD::LoadV2: 1663 case NVPTXISD::LDGV2: 1664 switch (EltVT.getSimpleVT().SimpleTy) { 1665 default: 1666 return nullptr; 1667 case MVT::i8: 1668 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32; 1669 break; 1670 case MVT::i16: 1671 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32; 1672 break; 1673 case MVT::i32: 1674 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32; 1675 break; 1676 case MVT::i64: 1677 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32; 1678 break; 1679 case MVT::f32: 1680 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32; 1681 break; 1682 case MVT::f64: 1683 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32; 1684 break; 1685 } 1686 break; 1687 case NVPTXISD::LDUV2: 1688 switch (EltVT.getSimpleVT().SimpleTy) { 1689 default: 1690 return nullptr; 1691 case MVT::i8: 1692 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32; 1693 break; 1694 case MVT::i16: 1695 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32; 1696 break; 1697 case MVT::i32: 1698 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32; 1699 break; 1700 case MVT::i64: 1701 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32; 1702 break; 1703 case MVT::f32: 1704 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32; 1705 break; 1706 case MVT::f64: 1707 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32; 1708 break; 1709 } 1710 break; 1711 case NVPTXISD::LoadV4: 1712 case NVPTXISD::LDGV4: 1713 switch (EltVT.getSimpleVT().SimpleTy) { 1714 default: 1715 return nullptr; 1716 case MVT::i8: 1717 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32; 1718 break; 1719 case MVT::i16: 1720 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32; 1721 break; 1722 case MVT::i32: 1723 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32; 1724 break; 1725 case MVT::f32: 1726 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32; 1727 break; 1728 } 1729 break; 1730 case NVPTXISD::LDUV4: 1731 switch (EltVT.getSimpleVT().SimpleTy) { 1732 default: 1733 return nullptr; 1734 case MVT::i8: 1735 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32; 1736 break; 1737 case MVT::i16: 1738 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32; 1739 break; 1740 case MVT::i32: 1741 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32; 1742 break; 1743 case MVT::f32: 1744 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32; 1745 break; 1746 } 1747 break; 1748 } 1749 } 1750 1751 SDValue Ops[] = { Base, Offset, Chain }; 1752 1753 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1754 } else { 1755 if (TM.is64Bit()) { 1756 switch (N->getOpcode()) { 1757 default: 1758 return nullptr; 1759 case ISD::LOAD: 1760 case ISD::INTRINSIC_W_CHAIN: 1761 if (IsLDG) { 1762 switch (EltVT.getSimpleVT().SimpleTy) { 1763 default: 1764 return nullptr; 1765 case MVT::i8: 1766 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64; 1767 break; 1768 case MVT::i16: 1769 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64; 1770 break; 1771 case MVT::i32: 1772 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64; 1773 break; 1774 case MVT::i64: 1775 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64; 1776 break; 1777 case MVT::f32: 1778 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64; 1779 break; 1780 case MVT::f64: 1781 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64; 1782 break; 1783 } 1784 } else { 1785 switch (EltVT.getSimpleVT().SimpleTy) { 1786 default: 1787 return nullptr; 1788 case MVT::i8: 1789 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64; 1790 break; 1791 case MVT::i16: 1792 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64; 1793 break; 1794 case MVT::i32: 1795 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64; 1796 break; 1797 case MVT::i64: 1798 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64; 1799 break; 1800 case MVT::f32: 1801 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64; 1802 break; 1803 case MVT::f64: 1804 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64; 1805 break; 1806 } 1807 } 1808 break; 1809 case NVPTXISD::LoadV2: 1810 case NVPTXISD::LDGV2: 1811 switch (EltVT.getSimpleVT().SimpleTy) { 1812 default: 1813 return nullptr; 1814 case MVT::i8: 1815 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64; 1816 break; 1817 case MVT::i16: 1818 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64; 1819 break; 1820 case MVT::i32: 1821 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64; 1822 break; 1823 case MVT::i64: 1824 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64; 1825 break; 1826 case MVT::f32: 1827 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64; 1828 break; 1829 case MVT::f64: 1830 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64; 1831 break; 1832 } 1833 break; 1834 case NVPTXISD::LDUV2: 1835 switch (EltVT.getSimpleVT().SimpleTy) { 1836 default: 1837 return nullptr; 1838 case MVT::i8: 1839 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64; 1840 break; 1841 case MVT::i16: 1842 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64; 1843 break; 1844 case MVT::i32: 1845 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64; 1846 break; 1847 case MVT::i64: 1848 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64; 1849 break; 1850 case MVT::f32: 1851 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64; 1852 break; 1853 case MVT::f64: 1854 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64; 1855 break; 1856 } 1857 break; 1858 case NVPTXISD::LoadV4: 1859 case NVPTXISD::LDGV4: 1860 switch (EltVT.getSimpleVT().SimpleTy) { 1861 default: 1862 return nullptr; 1863 case MVT::i8: 1864 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64; 1865 break; 1866 case MVT::i16: 1867 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64; 1868 break; 1869 case MVT::i32: 1870 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64; 1871 break; 1872 case MVT::f32: 1873 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64; 1874 break; 1875 } 1876 break; 1877 case NVPTXISD::LDUV4: 1878 switch (EltVT.getSimpleVT().SimpleTy) { 1879 default: 1880 return nullptr; 1881 case MVT::i8: 1882 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64; 1883 break; 1884 case MVT::i16: 1885 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64; 1886 break; 1887 case MVT::i32: 1888 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64; 1889 break; 1890 case MVT::f32: 1891 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64; 1892 break; 1893 } 1894 break; 1895 } 1896 } else { 1897 switch (N->getOpcode()) { 1898 default: 1899 return nullptr; 1900 case ISD::LOAD: 1901 case ISD::INTRINSIC_W_CHAIN: 1902 if (IsLDG) { 1903 switch (EltVT.getSimpleVT().SimpleTy) { 1904 default: 1905 return nullptr; 1906 case MVT::i8: 1907 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg; 1908 break; 1909 case MVT::i16: 1910 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg; 1911 break; 1912 case MVT::i32: 1913 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg; 1914 break; 1915 case MVT::i64: 1916 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg; 1917 break; 1918 case MVT::f32: 1919 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg; 1920 break; 1921 case MVT::f64: 1922 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg; 1923 break; 1924 } 1925 } else { 1926 switch (EltVT.getSimpleVT().SimpleTy) { 1927 default: 1928 return nullptr; 1929 case MVT::i8: 1930 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg; 1931 break; 1932 case MVT::i16: 1933 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg; 1934 break; 1935 case MVT::i32: 1936 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg; 1937 break; 1938 case MVT::i64: 1939 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg; 1940 break; 1941 case MVT::f32: 1942 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg; 1943 break; 1944 case MVT::f64: 1945 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg; 1946 break; 1947 } 1948 } 1949 break; 1950 case NVPTXISD::LoadV2: 1951 case NVPTXISD::LDGV2: 1952 switch (EltVT.getSimpleVT().SimpleTy) { 1953 default: 1954 return nullptr; 1955 case MVT::i8: 1956 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32; 1957 break; 1958 case MVT::i16: 1959 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32; 1960 break; 1961 case MVT::i32: 1962 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32; 1963 break; 1964 case MVT::i64: 1965 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32; 1966 break; 1967 case MVT::f32: 1968 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32; 1969 break; 1970 case MVT::f64: 1971 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32; 1972 break; 1973 } 1974 break; 1975 case NVPTXISD::LDUV2: 1976 switch (EltVT.getSimpleVT().SimpleTy) { 1977 default: 1978 return nullptr; 1979 case MVT::i8: 1980 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32; 1981 break; 1982 case MVT::i16: 1983 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32; 1984 break; 1985 case MVT::i32: 1986 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32; 1987 break; 1988 case MVT::i64: 1989 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32; 1990 break; 1991 case MVT::f32: 1992 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32; 1993 break; 1994 case MVT::f64: 1995 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32; 1996 break; 1997 } 1998 break; 1999 case NVPTXISD::LoadV4: 2000 case NVPTXISD::LDGV4: 2001 switch (EltVT.getSimpleVT().SimpleTy) { 2002 default: 2003 return nullptr; 2004 case MVT::i8: 2005 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32; 2006 break; 2007 case MVT::i16: 2008 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32; 2009 break; 2010 case MVT::i32: 2011 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32; 2012 break; 2013 case MVT::f32: 2014 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32; 2015 break; 2016 } 2017 break; 2018 case NVPTXISD::LDUV4: 2019 switch (EltVT.getSimpleVT().SimpleTy) { 2020 default: 2021 return nullptr; 2022 case MVT::i8: 2023 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32; 2024 break; 2025 case MVT::i16: 2026 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32; 2027 break; 2028 case MVT::i32: 2029 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32; 2030 break; 2031 case MVT::f32: 2032 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32; 2033 break; 2034 } 2035 break; 2036 } 2037 } 2038 2039 SDValue Ops[] = { Op1, Chain }; 2040 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 2041 } 2042 2043 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2044 MemRefs0[0] = Mem->getMemOperand(); 2045 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 2046 2047 return LD; 2048} 2049 2050SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { 2051 SDLoc dl(N); 2052 StoreSDNode *ST = cast<StoreSDNode>(N); 2053 EVT StoreVT = ST->getMemoryVT(); 2054 SDNode *NVPTXST = nullptr; 2055 2056 // do not support pre/post inc/dec 2057 if (ST->isIndexed()) 2058 return nullptr; 2059 2060 if (!StoreVT.isSimple()) 2061 return nullptr; 2062 2063 // Address Space Setting 2064 unsigned int codeAddrSpace = getCodeAddrSpace(ST); 2065 2066 // Volatile Setting 2067 // - .volatile is only availalble for .global and .shared 2068 bool isVolatile = ST->isVolatile(); 2069 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 2070 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 2071 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 2072 isVolatile = false; 2073 2074 // Vector Setting 2075 MVT SimpleVT = StoreVT.getSimpleVT(); 2076 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 2077 if (SimpleVT.isVector()) { 2078 unsigned num = SimpleVT.getVectorNumElements(); 2079 if (num == 2) 2080 vecType = NVPTX::PTXLdStInstCode::V2; 2081 else if (num == 4) 2082 vecType = NVPTX::PTXLdStInstCode::V4; 2083 else 2084 return nullptr; 2085 } 2086 2087 // Type Setting: toType + toTypeWidth 2088 // - for integer type, always use 'u' 2089 // 2090 MVT ScalarVT = SimpleVT.getScalarType(); 2091 unsigned toTypeWidth = ScalarVT.getSizeInBits(); 2092 unsigned int toType; 2093 if (ScalarVT.isFloatingPoint()) 2094 toType = NVPTX::PTXLdStInstCode::Float; 2095 else 2096 toType = NVPTX::PTXLdStInstCode::Unsigned; 2097 2098 // Create the machine instruction DAG 2099 SDValue Chain = N->getOperand(0); 2100 SDValue N1 = N->getOperand(1); 2101 SDValue N2 = N->getOperand(2); 2102 SDValue Addr; 2103 SDValue Offset, Base; 2104 unsigned Opcode; 2105 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy; 2106 2107 if (SelectDirectAddr(N2, Addr)) { 2108 switch (SourceVT) { 2109 case MVT::i8: 2110 Opcode = NVPTX::ST_i8_avar; 2111 break; 2112 case MVT::i16: 2113 Opcode = NVPTX::ST_i16_avar; 2114 break; 2115 case MVT::i32: 2116 Opcode = NVPTX::ST_i32_avar; 2117 break; 2118 case MVT::i64: 2119 Opcode = NVPTX::ST_i64_avar; 2120 break; 2121 case MVT::f32: 2122 Opcode = NVPTX::ST_f32_avar; 2123 break; 2124 case MVT::f64: 2125 Opcode = NVPTX::ST_f64_avar; 2126 break; 2127 default: 2128 return nullptr; 2129 } 2130 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), 2131 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), 2132 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr, 2133 Chain }; 2134 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2135 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 2136 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 2137 switch (SourceVT) { 2138 case MVT::i8: 2139 Opcode = NVPTX::ST_i8_asi; 2140 break; 2141 case MVT::i16: 2142 Opcode = NVPTX::ST_i16_asi; 2143 break; 2144 case MVT::i32: 2145 Opcode = NVPTX::ST_i32_asi; 2146 break; 2147 case MVT::i64: 2148 Opcode = NVPTX::ST_i64_asi; 2149 break; 2150 case MVT::f32: 2151 Opcode = NVPTX::ST_f32_asi; 2152 break; 2153 case MVT::f64: 2154 Opcode = NVPTX::ST_f64_asi; 2155 break; 2156 default: 2157 return nullptr; 2158 } 2159 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), 2160 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), 2161 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base, 2162 Offset, Chain }; 2163 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2164 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 2165 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 2166 if (TM.is64Bit()) { 2167 switch (SourceVT) { 2168 case MVT::i8: 2169 Opcode = NVPTX::ST_i8_ari_64; 2170 break; 2171 case MVT::i16: 2172 Opcode = NVPTX::ST_i16_ari_64; 2173 break; 2174 case MVT::i32: 2175 Opcode = NVPTX::ST_i32_ari_64; 2176 break; 2177 case MVT::i64: 2178 Opcode = NVPTX::ST_i64_ari_64; 2179 break; 2180 case MVT::f32: 2181 Opcode = NVPTX::ST_f32_ari_64; 2182 break; 2183 case MVT::f64: 2184 Opcode = NVPTX::ST_f64_ari_64; 2185 break; 2186 default: 2187 return nullptr; 2188 } 2189 } else { 2190 switch (SourceVT) { 2191 case MVT::i8: 2192 Opcode = NVPTX::ST_i8_ari; 2193 break; 2194 case MVT::i16: 2195 Opcode = NVPTX::ST_i16_ari; 2196 break; 2197 case MVT::i32: 2198 Opcode = NVPTX::ST_i32_ari; 2199 break; 2200 case MVT::i64: 2201 Opcode = NVPTX::ST_i64_ari; 2202 break; 2203 case MVT::f32: 2204 Opcode = NVPTX::ST_f32_ari; 2205 break; 2206 case MVT::f64: 2207 Opcode = NVPTX::ST_f64_ari; 2208 break; 2209 default: 2210 return nullptr; 2211 } 2212 } 2213 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), 2214 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), 2215 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base, 2216 Offset, Chain }; 2217 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2218 } else { 2219 if (TM.is64Bit()) { 2220 switch (SourceVT) { 2221 case MVT::i8: 2222 Opcode = NVPTX::ST_i8_areg_64; 2223 break; 2224 case MVT::i16: 2225 Opcode = NVPTX::ST_i16_areg_64; 2226 break; 2227 case MVT::i32: 2228 Opcode = NVPTX::ST_i32_areg_64; 2229 break; 2230 case MVT::i64: 2231 Opcode = NVPTX::ST_i64_areg_64; 2232 break; 2233 case MVT::f32: 2234 Opcode = NVPTX::ST_f32_areg_64; 2235 break; 2236 case MVT::f64: 2237 Opcode = NVPTX::ST_f64_areg_64; 2238 break; 2239 default: 2240 return nullptr; 2241 } 2242 } else { 2243 switch (SourceVT) { 2244 case MVT::i8: 2245 Opcode = NVPTX::ST_i8_areg; 2246 break; 2247 case MVT::i16: 2248 Opcode = NVPTX::ST_i16_areg; 2249 break; 2250 case MVT::i32: 2251 Opcode = NVPTX::ST_i32_areg; 2252 break; 2253 case MVT::i64: 2254 Opcode = NVPTX::ST_i64_areg; 2255 break; 2256 case MVT::f32: 2257 Opcode = NVPTX::ST_f32_areg; 2258 break; 2259 case MVT::f64: 2260 Opcode = NVPTX::ST_f64_areg; 2261 break; 2262 default: 2263 return nullptr; 2264 } 2265 } 2266 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), 2267 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), 2268 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2, 2269 Chain }; 2270 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2271 } 2272 2273 if (NVPTXST) { 2274 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2275 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2276 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); 2277 } 2278 2279 return NVPTXST; 2280} 2281 2282SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { 2283 SDValue Chain = N->getOperand(0); 2284 SDValue Op1 = N->getOperand(1); 2285 SDValue Addr, Offset, Base; 2286 unsigned Opcode; 2287 SDLoc DL(N); 2288 SDNode *ST; 2289 EVT EltVT = Op1.getValueType(); 2290 MemSDNode *MemSD = cast<MemSDNode>(N); 2291 EVT StoreVT = MemSD->getMemoryVT(); 2292 2293 // Address Space Setting 2294 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); 2295 2296 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { 2297 report_fatal_error("Cannot store to pointer that points to constant " 2298 "memory space"); 2299 } 2300 2301 // Volatile Setting 2302 // - .volatile is only availalble for .global and .shared 2303 bool IsVolatile = MemSD->isVolatile(); 2304 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 2305 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 2306 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 2307 IsVolatile = false; 2308 2309 // Type Setting: toType + toTypeWidth 2310 // - for integer type, always use 'u' 2311 assert(StoreVT.isSimple() && "Store value is not simple"); 2312 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); 2313 unsigned ToTypeWidth = ScalarVT.getSizeInBits(); 2314 unsigned ToType; 2315 if (ScalarVT.isFloatingPoint()) 2316 ToType = NVPTX::PTXLdStInstCode::Float; 2317 else 2318 ToType = NVPTX::PTXLdStInstCode::Unsigned; 2319 2320 SmallVector<SDValue, 12> StOps; 2321 SDValue N2; 2322 unsigned VecType; 2323 2324 switch (N->getOpcode()) { 2325 case NVPTXISD::StoreV2: 2326 VecType = NVPTX::PTXLdStInstCode::V2; 2327 StOps.push_back(N->getOperand(1)); 2328 StOps.push_back(N->getOperand(2)); 2329 N2 = N->getOperand(3); 2330 break; 2331 case NVPTXISD::StoreV4: 2332 VecType = NVPTX::PTXLdStInstCode::V4; 2333 StOps.push_back(N->getOperand(1)); 2334 StOps.push_back(N->getOperand(2)); 2335 StOps.push_back(N->getOperand(3)); 2336 StOps.push_back(N->getOperand(4)); 2337 N2 = N->getOperand(5); 2338 break; 2339 default: 2340 return nullptr; 2341 } 2342 2343 StOps.push_back(getI32Imm(IsVolatile, DL)); 2344 StOps.push_back(getI32Imm(CodeAddrSpace, DL)); 2345 StOps.push_back(getI32Imm(VecType, DL)); 2346 StOps.push_back(getI32Imm(ToType, DL)); 2347 StOps.push_back(getI32Imm(ToTypeWidth, DL)); 2348 2349 if (SelectDirectAddr(N2, Addr)) { 2350 switch (N->getOpcode()) { 2351 default: 2352 return nullptr; 2353 case NVPTXISD::StoreV2: 2354 switch (EltVT.getSimpleVT().SimpleTy) { 2355 default: 2356 return nullptr; 2357 case MVT::i8: 2358 Opcode = NVPTX::STV_i8_v2_avar; 2359 break; 2360 case MVT::i16: 2361 Opcode = NVPTX::STV_i16_v2_avar; 2362 break; 2363 case MVT::i32: 2364 Opcode = NVPTX::STV_i32_v2_avar; 2365 break; 2366 case MVT::i64: 2367 Opcode = NVPTX::STV_i64_v2_avar; 2368 break; 2369 case MVT::f32: 2370 Opcode = NVPTX::STV_f32_v2_avar; 2371 break; 2372 case MVT::f64: 2373 Opcode = NVPTX::STV_f64_v2_avar; 2374 break; 2375 } 2376 break; 2377 case NVPTXISD::StoreV4: 2378 switch (EltVT.getSimpleVT().SimpleTy) { 2379 default: 2380 return nullptr; 2381 case MVT::i8: 2382 Opcode = NVPTX::STV_i8_v4_avar; 2383 break; 2384 case MVT::i16: 2385 Opcode = NVPTX::STV_i16_v4_avar; 2386 break; 2387 case MVT::i32: 2388 Opcode = NVPTX::STV_i32_v4_avar; 2389 break; 2390 case MVT::f32: 2391 Opcode = NVPTX::STV_f32_v4_avar; 2392 break; 2393 } 2394 break; 2395 } 2396 StOps.push_back(Addr); 2397 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 2398 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 2399 switch (N->getOpcode()) { 2400 default: 2401 return nullptr; 2402 case NVPTXISD::StoreV2: 2403 switch (EltVT.getSimpleVT().SimpleTy) { 2404 default: 2405 return nullptr; 2406 case MVT::i8: 2407 Opcode = NVPTX::STV_i8_v2_asi; 2408 break; 2409 case MVT::i16: 2410 Opcode = NVPTX::STV_i16_v2_asi; 2411 break; 2412 case MVT::i32: 2413 Opcode = NVPTX::STV_i32_v2_asi; 2414 break; 2415 case MVT::i64: 2416 Opcode = NVPTX::STV_i64_v2_asi; 2417 break; 2418 case MVT::f32: 2419 Opcode = NVPTX::STV_f32_v2_asi; 2420 break; 2421 case MVT::f64: 2422 Opcode = NVPTX::STV_f64_v2_asi; 2423 break; 2424 } 2425 break; 2426 case NVPTXISD::StoreV4: 2427 switch (EltVT.getSimpleVT().SimpleTy) { 2428 default: 2429 return nullptr; 2430 case MVT::i8: 2431 Opcode = NVPTX::STV_i8_v4_asi; 2432 break; 2433 case MVT::i16: 2434 Opcode = NVPTX::STV_i16_v4_asi; 2435 break; 2436 case MVT::i32: 2437 Opcode = NVPTX::STV_i32_v4_asi; 2438 break; 2439 case MVT::f32: 2440 Opcode = NVPTX::STV_f32_v4_asi; 2441 break; 2442 } 2443 break; 2444 } 2445 StOps.push_back(Base); 2446 StOps.push_back(Offset); 2447 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 2448 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 2449 if (TM.is64Bit()) { 2450 switch (N->getOpcode()) { 2451 default: 2452 return nullptr; 2453 case NVPTXISD::StoreV2: 2454 switch (EltVT.getSimpleVT().SimpleTy) { 2455 default: 2456 return nullptr; 2457 case MVT::i8: 2458 Opcode = NVPTX::STV_i8_v2_ari_64; 2459 break; 2460 case MVT::i16: 2461 Opcode = NVPTX::STV_i16_v2_ari_64; 2462 break; 2463 case MVT::i32: 2464 Opcode = NVPTX::STV_i32_v2_ari_64; 2465 break; 2466 case MVT::i64: 2467 Opcode = NVPTX::STV_i64_v2_ari_64; 2468 break; 2469 case MVT::f32: 2470 Opcode = NVPTX::STV_f32_v2_ari_64; 2471 break; 2472 case MVT::f64: 2473 Opcode = NVPTX::STV_f64_v2_ari_64; 2474 break; 2475 } 2476 break; 2477 case NVPTXISD::StoreV4: 2478 switch (EltVT.getSimpleVT().SimpleTy) { 2479 default: 2480 return nullptr; 2481 case MVT::i8: 2482 Opcode = NVPTX::STV_i8_v4_ari_64; 2483 break; 2484 case MVT::i16: 2485 Opcode = NVPTX::STV_i16_v4_ari_64; 2486 break; 2487 case MVT::i32: 2488 Opcode = NVPTX::STV_i32_v4_ari_64; 2489 break; 2490 case MVT::f32: 2491 Opcode = NVPTX::STV_f32_v4_ari_64; 2492 break; 2493 } 2494 break; 2495 } 2496 } else { 2497 switch (N->getOpcode()) { 2498 default: 2499 return nullptr; 2500 case NVPTXISD::StoreV2: 2501 switch (EltVT.getSimpleVT().SimpleTy) { 2502 default: 2503 return nullptr; 2504 case MVT::i8: 2505 Opcode = NVPTX::STV_i8_v2_ari; 2506 break; 2507 case MVT::i16: 2508 Opcode = NVPTX::STV_i16_v2_ari; 2509 break; 2510 case MVT::i32: 2511 Opcode = NVPTX::STV_i32_v2_ari; 2512 break; 2513 case MVT::i64: 2514 Opcode = NVPTX::STV_i64_v2_ari; 2515 break; 2516 case MVT::f32: 2517 Opcode = NVPTX::STV_f32_v2_ari; 2518 break; 2519 case MVT::f64: 2520 Opcode = NVPTX::STV_f64_v2_ari; 2521 break; 2522 } 2523 break; 2524 case NVPTXISD::StoreV4: 2525 switch (EltVT.getSimpleVT().SimpleTy) { 2526 default: 2527 return nullptr; 2528 case MVT::i8: 2529 Opcode = NVPTX::STV_i8_v4_ari; 2530 break; 2531 case MVT::i16: 2532 Opcode = NVPTX::STV_i16_v4_ari; 2533 break; 2534 case MVT::i32: 2535 Opcode = NVPTX::STV_i32_v4_ari; 2536 break; 2537 case MVT::f32: 2538 Opcode = NVPTX::STV_f32_v4_ari; 2539 break; 2540 } 2541 break; 2542 } 2543 } 2544 StOps.push_back(Base); 2545 StOps.push_back(Offset); 2546 } else { 2547 if (TM.is64Bit()) { 2548 switch (N->getOpcode()) { 2549 default: 2550 return nullptr; 2551 case NVPTXISD::StoreV2: 2552 switch (EltVT.getSimpleVT().SimpleTy) { 2553 default: 2554 return nullptr; 2555 case MVT::i8: 2556 Opcode = NVPTX::STV_i8_v2_areg_64; 2557 break; 2558 case MVT::i16: 2559 Opcode = NVPTX::STV_i16_v2_areg_64; 2560 break; 2561 case MVT::i32: 2562 Opcode = NVPTX::STV_i32_v2_areg_64; 2563 break; 2564 case MVT::i64: 2565 Opcode = NVPTX::STV_i64_v2_areg_64; 2566 break; 2567 case MVT::f32: 2568 Opcode = NVPTX::STV_f32_v2_areg_64; 2569 break; 2570 case MVT::f64: 2571 Opcode = NVPTX::STV_f64_v2_areg_64; 2572 break; 2573 } 2574 break; 2575 case NVPTXISD::StoreV4: 2576 switch (EltVT.getSimpleVT().SimpleTy) { 2577 default: 2578 return nullptr; 2579 case MVT::i8: 2580 Opcode = NVPTX::STV_i8_v4_areg_64; 2581 break; 2582 case MVT::i16: 2583 Opcode = NVPTX::STV_i16_v4_areg_64; 2584 break; 2585 case MVT::i32: 2586 Opcode = NVPTX::STV_i32_v4_areg_64; 2587 break; 2588 case MVT::f32: 2589 Opcode = NVPTX::STV_f32_v4_areg_64; 2590 break; 2591 } 2592 break; 2593 } 2594 } else { 2595 switch (N->getOpcode()) { 2596 default: 2597 return nullptr; 2598 case NVPTXISD::StoreV2: 2599 switch (EltVT.getSimpleVT().SimpleTy) { 2600 default: 2601 return nullptr; 2602 case MVT::i8: 2603 Opcode = NVPTX::STV_i8_v2_areg; 2604 break; 2605 case MVT::i16: 2606 Opcode = NVPTX::STV_i16_v2_areg; 2607 break; 2608 case MVT::i32: 2609 Opcode = NVPTX::STV_i32_v2_areg; 2610 break; 2611 case MVT::i64: 2612 Opcode = NVPTX::STV_i64_v2_areg; 2613 break; 2614 case MVT::f32: 2615 Opcode = NVPTX::STV_f32_v2_areg; 2616 break; 2617 case MVT::f64: 2618 Opcode = NVPTX::STV_f64_v2_areg; 2619 break; 2620 } 2621 break; 2622 case NVPTXISD::StoreV4: 2623 switch (EltVT.getSimpleVT().SimpleTy) { 2624 default: 2625 return nullptr; 2626 case MVT::i8: 2627 Opcode = NVPTX::STV_i8_v4_areg; 2628 break; 2629 case MVT::i16: 2630 Opcode = NVPTX::STV_i16_v4_areg; 2631 break; 2632 case MVT::i32: 2633 Opcode = NVPTX::STV_i32_v4_areg; 2634 break; 2635 case MVT::f32: 2636 Opcode = NVPTX::STV_f32_v4_areg; 2637 break; 2638 } 2639 break; 2640 } 2641 } 2642 StOps.push_back(N2); 2643 } 2644 2645 StOps.push_back(Chain); 2646 2647 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps); 2648 2649 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2650 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2651 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); 2652 2653 return ST; 2654} 2655 2656SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { 2657 SDValue Chain = Node->getOperand(0); 2658 SDValue Offset = Node->getOperand(2); 2659 SDValue Flag = Node->getOperand(3); 2660 SDLoc DL(Node); 2661 MemSDNode *Mem = cast<MemSDNode>(Node); 2662 2663 unsigned VecSize; 2664 switch (Node->getOpcode()) { 2665 default: 2666 return nullptr; 2667 case NVPTXISD::LoadParam: 2668 VecSize = 1; 2669 break; 2670 case NVPTXISD::LoadParamV2: 2671 VecSize = 2; 2672 break; 2673 case NVPTXISD::LoadParamV4: 2674 VecSize = 4; 2675 break; 2676 } 2677 2678 EVT EltVT = Node->getValueType(0); 2679 EVT MemVT = Mem->getMemoryVT(); 2680 2681 unsigned Opc = 0; 2682 2683 switch (VecSize) { 2684 default: 2685 return nullptr; 2686 case 1: 2687 switch (MemVT.getSimpleVT().SimpleTy) { 2688 default: 2689 return nullptr; 2690 case MVT::i1: 2691 Opc = NVPTX::LoadParamMemI8; 2692 break; 2693 case MVT::i8: 2694 Opc = NVPTX::LoadParamMemI8; 2695 break; 2696 case MVT::i16: 2697 Opc = NVPTX::LoadParamMemI16; 2698 break; 2699 case MVT::i32: 2700 Opc = NVPTX::LoadParamMemI32; 2701 break; 2702 case MVT::i64: 2703 Opc = NVPTX::LoadParamMemI64; 2704 break; 2705 case MVT::f32: 2706 Opc = NVPTX::LoadParamMemF32; 2707 break; 2708 case MVT::f64: 2709 Opc = NVPTX::LoadParamMemF64; 2710 break; 2711 } 2712 break; 2713 case 2: 2714 switch (MemVT.getSimpleVT().SimpleTy) { 2715 default: 2716 return nullptr; 2717 case MVT::i1: 2718 Opc = NVPTX::LoadParamMemV2I8; 2719 break; 2720 case MVT::i8: 2721 Opc = NVPTX::LoadParamMemV2I8; 2722 break; 2723 case MVT::i16: 2724 Opc = NVPTX::LoadParamMemV2I16; 2725 break; 2726 case MVT::i32: 2727 Opc = NVPTX::LoadParamMemV2I32; 2728 break; 2729 case MVT::i64: 2730 Opc = NVPTX::LoadParamMemV2I64; 2731 break; 2732 case MVT::f32: 2733 Opc = NVPTX::LoadParamMemV2F32; 2734 break; 2735 case MVT::f64: 2736 Opc = NVPTX::LoadParamMemV2F64; 2737 break; 2738 } 2739 break; 2740 case 4: 2741 switch (MemVT.getSimpleVT().SimpleTy) { 2742 default: 2743 return nullptr; 2744 case MVT::i1: 2745 Opc = NVPTX::LoadParamMemV4I8; 2746 break; 2747 case MVT::i8: 2748 Opc = NVPTX::LoadParamMemV4I8; 2749 break; 2750 case MVT::i16: 2751 Opc = NVPTX::LoadParamMemV4I16; 2752 break; 2753 case MVT::i32: 2754 Opc = NVPTX::LoadParamMemV4I32; 2755 break; 2756 case MVT::f32: 2757 Opc = NVPTX::LoadParamMemV4F32; 2758 break; 2759 } 2760 break; 2761 } 2762 2763 SDVTList VTs; 2764 if (VecSize == 1) { 2765 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); 2766 } else if (VecSize == 2) { 2767 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); 2768 } else { 2769 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; 2770 VTs = CurDAG->getVTList(EVTs); 2771 } 2772 2773 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2774 2775 SmallVector<SDValue, 2> Ops; 2776 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2777 Ops.push_back(Chain); 2778 Ops.push_back(Flag); 2779 2780 return CurDAG->getMachineNode(Opc, DL, VTs, Ops); 2781} 2782 2783SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { 2784 SDLoc DL(N); 2785 SDValue Chain = N->getOperand(0); 2786 SDValue Offset = N->getOperand(1); 2787 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2788 MemSDNode *Mem = cast<MemSDNode>(N); 2789 2790 // How many elements do we have? 2791 unsigned NumElts = 1; 2792 switch (N->getOpcode()) { 2793 default: 2794 return nullptr; 2795 case NVPTXISD::StoreRetval: 2796 NumElts = 1; 2797 break; 2798 case NVPTXISD::StoreRetvalV2: 2799 NumElts = 2; 2800 break; 2801 case NVPTXISD::StoreRetvalV4: 2802 NumElts = 4; 2803 break; 2804 } 2805 2806 // Build vector of operands 2807 SmallVector<SDValue, 6> Ops; 2808 for (unsigned i = 0; i < NumElts; ++i) 2809 Ops.push_back(N->getOperand(i + 2)); 2810 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2811 Ops.push_back(Chain); 2812 2813 // Determine target opcode 2814 // If we have an i1, use an 8-bit store. The lowering code in 2815 // NVPTXISelLowering will have already emitted an upcast. 2816 unsigned Opcode = 0; 2817 switch (NumElts) { 2818 default: 2819 return nullptr; 2820 case 1: 2821 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2822 default: 2823 return nullptr; 2824 case MVT::i1: 2825 Opcode = NVPTX::StoreRetvalI8; 2826 break; 2827 case MVT::i8: 2828 Opcode = NVPTX::StoreRetvalI8; 2829 break; 2830 case MVT::i16: 2831 Opcode = NVPTX::StoreRetvalI16; 2832 break; 2833 case MVT::i32: 2834 Opcode = NVPTX::StoreRetvalI32; 2835 break; 2836 case MVT::i64: 2837 Opcode = NVPTX::StoreRetvalI64; 2838 break; 2839 case MVT::f32: 2840 Opcode = NVPTX::StoreRetvalF32; 2841 break; 2842 case MVT::f64: 2843 Opcode = NVPTX::StoreRetvalF64; 2844 break; 2845 } 2846 break; 2847 case 2: 2848 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2849 default: 2850 return nullptr; 2851 case MVT::i1: 2852 Opcode = NVPTX::StoreRetvalV2I8; 2853 break; 2854 case MVT::i8: 2855 Opcode = NVPTX::StoreRetvalV2I8; 2856 break; 2857 case MVT::i16: 2858 Opcode = NVPTX::StoreRetvalV2I16; 2859 break; 2860 case MVT::i32: 2861 Opcode = NVPTX::StoreRetvalV2I32; 2862 break; 2863 case MVT::i64: 2864 Opcode = NVPTX::StoreRetvalV2I64; 2865 break; 2866 case MVT::f32: 2867 Opcode = NVPTX::StoreRetvalV2F32; 2868 break; 2869 case MVT::f64: 2870 Opcode = NVPTX::StoreRetvalV2F64; 2871 break; 2872 } 2873 break; 2874 case 4: 2875 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2876 default: 2877 return nullptr; 2878 case MVT::i1: 2879 Opcode = NVPTX::StoreRetvalV4I8; 2880 break; 2881 case MVT::i8: 2882 Opcode = NVPTX::StoreRetvalV4I8; 2883 break; 2884 case MVT::i16: 2885 Opcode = NVPTX::StoreRetvalV4I16; 2886 break; 2887 case MVT::i32: 2888 Opcode = NVPTX::StoreRetvalV4I32; 2889 break; 2890 case MVT::f32: 2891 Opcode = NVPTX::StoreRetvalV4F32; 2892 break; 2893 } 2894 break; 2895 } 2896 2897 SDNode *Ret = 2898 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); 2899 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2900 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2901 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 2902 2903 return Ret; 2904} 2905 2906SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { 2907 SDLoc DL(N); 2908 SDValue Chain = N->getOperand(0); 2909 SDValue Param = N->getOperand(1); 2910 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); 2911 SDValue Offset = N->getOperand(2); 2912 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2913 MemSDNode *Mem = cast<MemSDNode>(N); 2914 SDValue Flag = N->getOperand(N->getNumOperands() - 1); 2915 2916 // How many elements do we have? 2917 unsigned NumElts = 1; 2918 switch (N->getOpcode()) { 2919 default: 2920 return nullptr; 2921 case NVPTXISD::StoreParamU32: 2922 case NVPTXISD::StoreParamS32: 2923 case NVPTXISD::StoreParam: 2924 NumElts = 1; 2925 break; 2926 case NVPTXISD::StoreParamV2: 2927 NumElts = 2; 2928 break; 2929 case NVPTXISD::StoreParamV4: 2930 NumElts = 4; 2931 break; 2932 } 2933 2934 // Build vector of operands 2935 SmallVector<SDValue, 8> Ops; 2936 for (unsigned i = 0; i < NumElts; ++i) 2937 Ops.push_back(N->getOperand(i + 3)); 2938 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32)); 2939 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2940 Ops.push_back(Chain); 2941 Ops.push_back(Flag); 2942 2943 // Determine target opcode 2944 // If we have an i1, use an 8-bit store. The lowering code in 2945 // NVPTXISelLowering will have already emitted an upcast. 2946 unsigned Opcode = 0; 2947 switch (N->getOpcode()) { 2948 default: 2949 switch (NumElts) { 2950 default: 2951 return nullptr; 2952 case 1: 2953 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2954 default: 2955 return nullptr; 2956 case MVT::i1: 2957 Opcode = NVPTX::StoreParamI8; 2958 break; 2959 case MVT::i8: 2960 Opcode = NVPTX::StoreParamI8; 2961 break; 2962 case MVT::i16: 2963 Opcode = NVPTX::StoreParamI16; 2964 break; 2965 case MVT::i32: 2966 Opcode = NVPTX::StoreParamI32; 2967 break; 2968 case MVT::i64: 2969 Opcode = NVPTX::StoreParamI64; 2970 break; 2971 case MVT::f32: 2972 Opcode = NVPTX::StoreParamF32; 2973 break; 2974 case MVT::f64: 2975 Opcode = NVPTX::StoreParamF64; 2976 break; 2977 } 2978 break; 2979 case 2: 2980 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2981 default: 2982 return nullptr; 2983 case MVT::i1: 2984 Opcode = NVPTX::StoreParamV2I8; 2985 break; 2986 case MVT::i8: 2987 Opcode = NVPTX::StoreParamV2I8; 2988 break; 2989 case MVT::i16: 2990 Opcode = NVPTX::StoreParamV2I16; 2991 break; 2992 case MVT::i32: 2993 Opcode = NVPTX::StoreParamV2I32; 2994 break; 2995 case MVT::i64: 2996 Opcode = NVPTX::StoreParamV2I64; 2997 break; 2998 case MVT::f32: 2999 Opcode = NVPTX::StoreParamV2F32; 3000 break; 3001 case MVT::f64: 3002 Opcode = NVPTX::StoreParamV2F64; 3003 break; 3004 } 3005 break; 3006 case 4: 3007 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 3008 default: 3009 return nullptr; 3010 case MVT::i1: 3011 Opcode = NVPTX::StoreParamV4I8; 3012 break; 3013 case MVT::i8: 3014 Opcode = NVPTX::StoreParamV4I8; 3015 break; 3016 case MVT::i16: 3017 Opcode = NVPTX::StoreParamV4I16; 3018 break; 3019 case MVT::i32: 3020 Opcode = NVPTX::StoreParamV4I32; 3021 break; 3022 case MVT::f32: 3023 Opcode = NVPTX::StoreParamV4F32; 3024 break; 3025 } 3026 break; 3027 } 3028 break; 3029 // Special case: if we have a sign-extend/zero-extend node, insert the 3030 // conversion instruction first, and use that as the value operand to 3031 // the selected StoreParam node. 3032 case NVPTXISD::StoreParamU32: { 3033 Opcode = NVPTX::StoreParamI32; 3034 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 3035 MVT::i32); 3036 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, 3037 MVT::i32, Ops[0], CvtNone); 3038 Ops[0] = SDValue(Cvt, 0); 3039 break; 3040 } 3041 case NVPTXISD::StoreParamS32: { 3042 Opcode = NVPTX::StoreParamI32; 3043 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 3044 MVT::i32); 3045 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, 3046 MVT::i32, Ops[0], CvtNone); 3047 Ops[0] = SDValue(Cvt, 0); 3048 break; 3049 } 3050 } 3051 3052 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 3053 SDNode *Ret = 3054 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops); 3055 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 3056 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 3057 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 3058 3059 return Ret; 3060} 3061 3062SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { 3063 SDValue Chain = N->getOperand(0); 3064 SDNode *Ret = nullptr; 3065 unsigned Opc = 0; 3066 SmallVector<SDValue, 8> Ops; 3067 3068 switch (N->getOpcode()) { 3069 default: return nullptr; 3070 case NVPTXISD::Tex1DFloatS32: 3071 Opc = NVPTX::TEX_1D_F32_S32; 3072 break; 3073 case NVPTXISD::Tex1DFloatFloat: 3074 Opc = NVPTX::TEX_1D_F32_F32; 3075 break; 3076 case NVPTXISD::Tex1DFloatFloatLevel: 3077 Opc = NVPTX::TEX_1D_F32_F32_LEVEL; 3078 break; 3079 case NVPTXISD::Tex1DFloatFloatGrad: 3080 Opc = NVPTX::TEX_1D_F32_F32_GRAD; 3081 break; 3082 case NVPTXISD::Tex1DS32S32: 3083 Opc = NVPTX::TEX_1D_S32_S32; 3084 break; 3085 case NVPTXISD::Tex1DS32Float: 3086 Opc = NVPTX::TEX_1D_S32_F32; 3087 break; 3088 case NVPTXISD::Tex1DS32FloatLevel: 3089 Opc = NVPTX::TEX_1D_S32_F32_LEVEL; 3090 break; 3091 case NVPTXISD::Tex1DS32FloatGrad: 3092 Opc = NVPTX::TEX_1D_S32_F32_GRAD; 3093 break; 3094 case NVPTXISD::Tex1DU32S32: 3095 Opc = NVPTX::TEX_1D_U32_S32; 3096 break; 3097 case NVPTXISD::Tex1DU32Float: 3098 Opc = NVPTX::TEX_1D_U32_F32; 3099 break; 3100 case NVPTXISD::Tex1DU32FloatLevel: 3101 Opc = NVPTX::TEX_1D_U32_F32_LEVEL; 3102 break; 3103 case NVPTXISD::Tex1DU32FloatGrad: 3104 Opc = NVPTX::TEX_1D_U32_F32_GRAD; 3105 break; 3106 case NVPTXISD::Tex1DArrayFloatS32: 3107 Opc = NVPTX::TEX_1D_ARRAY_F32_S32; 3108 break; 3109 case NVPTXISD::Tex1DArrayFloatFloat: 3110 Opc = NVPTX::TEX_1D_ARRAY_F32_F32; 3111 break; 3112 case NVPTXISD::Tex1DArrayFloatFloatLevel: 3113 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; 3114 break; 3115 case NVPTXISD::Tex1DArrayFloatFloatGrad: 3116 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; 3117 break; 3118 case NVPTXISD::Tex1DArrayS32S32: 3119 Opc = NVPTX::TEX_1D_ARRAY_S32_S32; 3120 break; 3121 case NVPTXISD::Tex1DArrayS32Float: 3122 Opc = NVPTX::TEX_1D_ARRAY_S32_F32; 3123 break; 3124 case NVPTXISD::Tex1DArrayS32FloatLevel: 3125 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; 3126 break; 3127 case NVPTXISD::Tex1DArrayS32FloatGrad: 3128 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; 3129 break; 3130 case NVPTXISD::Tex1DArrayU32S32: 3131 Opc = NVPTX::TEX_1D_ARRAY_U32_S32; 3132 break; 3133 case NVPTXISD::Tex1DArrayU32Float: 3134 Opc = NVPTX::TEX_1D_ARRAY_U32_F32; 3135 break; 3136 case NVPTXISD::Tex1DArrayU32FloatLevel: 3137 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; 3138 break; 3139 case NVPTXISD::Tex1DArrayU32FloatGrad: 3140 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; 3141 break; 3142 case NVPTXISD::Tex2DFloatS32: 3143 Opc = NVPTX::TEX_2D_F32_S32; 3144 break; 3145 case NVPTXISD::Tex2DFloatFloat: 3146 Opc = NVPTX::TEX_2D_F32_F32; 3147 break; 3148 case NVPTXISD::Tex2DFloatFloatLevel: 3149 Opc = NVPTX::TEX_2D_F32_F32_LEVEL; 3150 break; 3151 case NVPTXISD::Tex2DFloatFloatGrad: 3152 Opc = NVPTX::TEX_2D_F32_F32_GRAD; 3153 break; 3154 case NVPTXISD::Tex2DS32S32: 3155 Opc = NVPTX::TEX_2D_S32_S32; 3156 break; 3157 case NVPTXISD::Tex2DS32Float: 3158 Opc = NVPTX::TEX_2D_S32_F32; 3159 break; 3160 case NVPTXISD::Tex2DS32FloatLevel: 3161 Opc = NVPTX::TEX_2D_S32_F32_LEVEL; 3162 break; 3163 case NVPTXISD::Tex2DS32FloatGrad: 3164 Opc = NVPTX::TEX_2D_S32_F32_GRAD; 3165 break; 3166 case NVPTXISD::Tex2DU32S32: 3167 Opc = NVPTX::TEX_2D_U32_S32; 3168 break; 3169 case NVPTXISD::Tex2DU32Float: 3170 Opc = NVPTX::TEX_2D_U32_F32; 3171 break; 3172 case NVPTXISD::Tex2DU32FloatLevel: 3173 Opc = NVPTX::TEX_2D_U32_F32_LEVEL; 3174 break; 3175 case NVPTXISD::Tex2DU32FloatGrad: 3176 Opc = NVPTX::TEX_2D_U32_F32_GRAD; 3177 break; 3178 case NVPTXISD::Tex2DArrayFloatS32: 3179 Opc = NVPTX::TEX_2D_ARRAY_F32_S32; 3180 break; 3181 case NVPTXISD::Tex2DArrayFloatFloat: 3182 Opc = NVPTX::TEX_2D_ARRAY_F32_F32; 3183 break; 3184 case NVPTXISD::Tex2DArrayFloatFloatLevel: 3185 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; 3186 break; 3187 case NVPTXISD::Tex2DArrayFloatFloatGrad: 3188 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; 3189 break; 3190 case NVPTXISD::Tex2DArrayS32S32: 3191 Opc = NVPTX::TEX_2D_ARRAY_S32_S32; 3192 break; 3193 case NVPTXISD::Tex2DArrayS32Float: 3194 Opc = NVPTX::TEX_2D_ARRAY_S32_F32; 3195 break; 3196 case NVPTXISD::Tex2DArrayS32FloatLevel: 3197 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; 3198 break; 3199 case NVPTXISD::Tex2DArrayS32FloatGrad: 3200 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; 3201 break; 3202 case NVPTXISD::Tex2DArrayU32S32: 3203 Opc = NVPTX::TEX_2D_ARRAY_U32_S32; 3204 break; 3205 case NVPTXISD::Tex2DArrayU32Float: 3206 Opc = NVPTX::TEX_2D_ARRAY_U32_F32; 3207 break; 3208 case NVPTXISD::Tex2DArrayU32FloatLevel: 3209 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; 3210 break; 3211 case NVPTXISD::Tex2DArrayU32FloatGrad: 3212 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; 3213 break; 3214 case NVPTXISD::Tex3DFloatS32: 3215 Opc = NVPTX::TEX_3D_F32_S32; 3216 break; 3217 case NVPTXISD::Tex3DFloatFloat: 3218 Opc = NVPTX::TEX_3D_F32_F32; 3219 break; 3220 case NVPTXISD::Tex3DFloatFloatLevel: 3221 Opc = NVPTX::TEX_3D_F32_F32_LEVEL; 3222 break; 3223 case NVPTXISD::Tex3DFloatFloatGrad: 3224 Opc = NVPTX::TEX_3D_F32_F32_GRAD; 3225 break; 3226 case NVPTXISD::Tex3DS32S32: 3227 Opc = NVPTX::TEX_3D_S32_S32; 3228 break; 3229 case NVPTXISD::Tex3DS32Float: 3230 Opc = NVPTX::TEX_3D_S32_F32; 3231 break; 3232 case NVPTXISD::Tex3DS32FloatLevel: 3233 Opc = NVPTX::TEX_3D_S32_F32_LEVEL; 3234 break; 3235 case NVPTXISD::Tex3DS32FloatGrad: 3236 Opc = NVPTX::TEX_3D_S32_F32_GRAD; 3237 break; 3238 case NVPTXISD::Tex3DU32S32: 3239 Opc = NVPTX::TEX_3D_U32_S32; 3240 break; 3241 case NVPTXISD::Tex3DU32Float: 3242 Opc = NVPTX::TEX_3D_U32_F32; 3243 break; 3244 case NVPTXISD::Tex3DU32FloatLevel: 3245 Opc = NVPTX::TEX_3D_U32_F32_LEVEL; 3246 break; 3247 case NVPTXISD::Tex3DU32FloatGrad: 3248 Opc = NVPTX::TEX_3D_U32_F32_GRAD; 3249 break; 3250 case NVPTXISD::TexCubeFloatFloat: 3251 Opc = NVPTX::TEX_CUBE_F32_F32; 3252 break; 3253 case NVPTXISD::TexCubeFloatFloatLevel: 3254 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; 3255 break; 3256 case NVPTXISD::TexCubeS32Float: 3257 Opc = NVPTX::TEX_CUBE_S32_F32; 3258 break; 3259 case NVPTXISD::TexCubeS32FloatLevel: 3260 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; 3261 break; 3262 case NVPTXISD::TexCubeU32Float: 3263 Opc = NVPTX::TEX_CUBE_U32_F32; 3264 break; 3265 case NVPTXISD::TexCubeU32FloatLevel: 3266 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; 3267 break; 3268 case NVPTXISD::TexCubeArrayFloatFloat: 3269 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; 3270 break; 3271 case NVPTXISD::TexCubeArrayFloatFloatLevel: 3272 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; 3273 break; 3274 case NVPTXISD::TexCubeArrayS32Float: 3275 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; 3276 break; 3277 case NVPTXISD::TexCubeArrayS32FloatLevel: 3278 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; 3279 break; 3280 case NVPTXISD::TexCubeArrayU32Float: 3281 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; 3282 break; 3283 case NVPTXISD::TexCubeArrayU32FloatLevel: 3284 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; 3285 break; 3286 case NVPTXISD::Tld4R2DFloatFloat: 3287 Opc = NVPTX::TLD4_R_2D_F32_F32; 3288 break; 3289 case NVPTXISD::Tld4G2DFloatFloat: 3290 Opc = NVPTX::TLD4_G_2D_F32_F32; 3291 break; 3292 case NVPTXISD::Tld4B2DFloatFloat: 3293 Opc = NVPTX::TLD4_B_2D_F32_F32; 3294 break; 3295 case NVPTXISD::Tld4A2DFloatFloat: 3296 Opc = NVPTX::TLD4_A_2D_F32_F32; 3297 break; 3298 case NVPTXISD::Tld4R2DS64Float: 3299 Opc = NVPTX::TLD4_R_2D_S32_F32; 3300 break; 3301 case NVPTXISD::Tld4G2DS64Float: 3302 Opc = NVPTX::TLD4_G_2D_S32_F32; 3303 break; 3304 case NVPTXISD::Tld4B2DS64Float: 3305 Opc = NVPTX::TLD4_B_2D_S32_F32; 3306 break; 3307 case NVPTXISD::Tld4A2DS64Float: 3308 Opc = NVPTX::TLD4_A_2D_S32_F32; 3309 break; 3310 case NVPTXISD::Tld4R2DU64Float: 3311 Opc = NVPTX::TLD4_R_2D_U32_F32; 3312 break; 3313 case NVPTXISD::Tld4G2DU64Float: 3314 Opc = NVPTX::TLD4_G_2D_U32_F32; 3315 break; 3316 case NVPTXISD::Tld4B2DU64Float: 3317 Opc = NVPTX::TLD4_B_2D_U32_F32; 3318 break; 3319 case NVPTXISD::Tld4A2DU64Float: 3320 Opc = NVPTX::TLD4_A_2D_U32_F32; 3321 break; 3322 case NVPTXISD::TexUnified1DFloatS32: 3323 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; 3324 break; 3325 case NVPTXISD::TexUnified1DFloatFloat: 3326 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; 3327 break; 3328 case NVPTXISD::TexUnified1DFloatFloatLevel: 3329 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; 3330 break; 3331 case NVPTXISD::TexUnified1DFloatFloatGrad: 3332 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; 3333 break; 3334 case NVPTXISD::TexUnified1DS32S32: 3335 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; 3336 break; 3337 case NVPTXISD::TexUnified1DS32Float: 3338 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; 3339 break; 3340 case NVPTXISD::TexUnified1DS32FloatLevel: 3341 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; 3342 break; 3343 case NVPTXISD::TexUnified1DS32FloatGrad: 3344 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; 3345 break; 3346 case NVPTXISD::TexUnified1DU32S32: 3347 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; 3348 break; 3349 case NVPTXISD::TexUnified1DU32Float: 3350 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; 3351 break; 3352 case NVPTXISD::TexUnified1DU32FloatLevel: 3353 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; 3354 break; 3355 case NVPTXISD::TexUnified1DU32FloatGrad: 3356 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; 3357 break; 3358 case NVPTXISD::TexUnified1DArrayFloatS32: 3359 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; 3360 break; 3361 case NVPTXISD::TexUnified1DArrayFloatFloat: 3362 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; 3363 break; 3364 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 3365 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; 3366 break; 3367 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 3368 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; 3369 break; 3370 case NVPTXISD::TexUnified1DArrayS32S32: 3371 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; 3372 break; 3373 case NVPTXISD::TexUnified1DArrayS32Float: 3374 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; 3375 break; 3376 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 3377 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; 3378 break; 3379 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 3380 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; 3381 break; 3382 case NVPTXISD::TexUnified1DArrayU32S32: 3383 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; 3384 break; 3385 case NVPTXISD::TexUnified1DArrayU32Float: 3386 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; 3387 break; 3388 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 3389 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; 3390 break; 3391 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 3392 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; 3393 break; 3394 case NVPTXISD::TexUnified2DFloatS32: 3395 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; 3396 break; 3397 case NVPTXISD::TexUnified2DFloatFloat: 3398 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; 3399 break; 3400 case NVPTXISD::TexUnified2DFloatFloatLevel: 3401 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; 3402 break; 3403 case NVPTXISD::TexUnified2DFloatFloatGrad: 3404 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; 3405 break; 3406 case NVPTXISD::TexUnified2DS32S32: 3407 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; 3408 break; 3409 case NVPTXISD::TexUnified2DS32Float: 3410 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; 3411 break; 3412 case NVPTXISD::TexUnified2DS32FloatLevel: 3413 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; 3414 break; 3415 case NVPTXISD::TexUnified2DS32FloatGrad: 3416 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; 3417 break; 3418 case NVPTXISD::TexUnified2DU32S32: 3419 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; 3420 break; 3421 case NVPTXISD::TexUnified2DU32Float: 3422 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; 3423 break; 3424 case NVPTXISD::TexUnified2DU32FloatLevel: 3425 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; 3426 break; 3427 case NVPTXISD::TexUnified2DU32FloatGrad: 3428 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; 3429 break; 3430 case NVPTXISD::TexUnified2DArrayFloatS32: 3431 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; 3432 break; 3433 case NVPTXISD::TexUnified2DArrayFloatFloat: 3434 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; 3435 break; 3436 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 3437 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; 3438 break; 3439 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 3440 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; 3441 break; 3442 case NVPTXISD::TexUnified2DArrayS32S32: 3443 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; 3444 break; 3445 case NVPTXISD::TexUnified2DArrayS32Float: 3446 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; 3447 break; 3448 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 3449 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; 3450 break; 3451 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 3452 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; 3453 break; 3454 case NVPTXISD::TexUnified2DArrayU32S32: 3455 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; 3456 break; 3457 case NVPTXISD::TexUnified2DArrayU32Float: 3458 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; 3459 break; 3460 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 3461 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; 3462 break; 3463 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 3464 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; 3465 break; 3466 case NVPTXISD::TexUnified3DFloatS32: 3467 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; 3468 break; 3469 case NVPTXISD::TexUnified3DFloatFloat: 3470 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; 3471 break; 3472 case NVPTXISD::TexUnified3DFloatFloatLevel: 3473 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; 3474 break; 3475 case NVPTXISD::TexUnified3DFloatFloatGrad: 3476 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; 3477 break; 3478 case NVPTXISD::TexUnified3DS32S32: 3479 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; 3480 break; 3481 case NVPTXISD::TexUnified3DS32Float: 3482 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; 3483 break; 3484 case NVPTXISD::TexUnified3DS32FloatLevel: 3485 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; 3486 break; 3487 case NVPTXISD::TexUnified3DS32FloatGrad: 3488 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; 3489 break; 3490 case NVPTXISD::TexUnified3DU32S32: 3491 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; 3492 break; 3493 case NVPTXISD::TexUnified3DU32Float: 3494 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; 3495 break; 3496 case NVPTXISD::TexUnified3DU32FloatLevel: 3497 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; 3498 break; 3499 case NVPTXISD::TexUnified3DU32FloatGrad: 3500 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; 3501 break; 3502 case NVPTXISD::TexUnifiedCubeFloatFloat: 3503 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; 3504 break; 3505 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 3506 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; 3507 break; 3508 case NVPTXISD::TexUnifiedCubeS32Float: 3509 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; 3510 break; 3511 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 3512 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; 3513 break; 3514 case NVPTXISD::TexUnifiedCubeU32Float: 3515 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; 3516 break; 3517 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 3518 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; 3519 break; 3520 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 3521 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; 3522 break; 3523 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 3524 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; 3525 break; 3526 case NVPTXISD::TexUnifiedCubeArrayS32Float: 3527 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; 3528 break; 3529 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 3530 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; 3531 break; 3532 case NVPTXISD::TexUnifiedCubeArrayU32Float: 3533 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; 3534 break; 3535 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 3536 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; 3537 break; 3538 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 3539 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; 3540 break; 3541 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 3542 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; 3543 break; 3544 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 3545 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; 3546 break; 3547 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 3548 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; 3549 break; 3550 case NVPTXISD::Tld4UnifiedR2DS64Float: 3551 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; 3552 break; 3553 case NVPTXISD::Tld4UnifiedG2DS64Float: 3554 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; 3555 break; 3556 case NVPTXISD::Tld4UnifiedB2DS64Float: 3557 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; 3558 break; 3559 case NVPTXISD::Tld4UnifiedA2DS64Float: 3560 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; 3561 break; 3562 case NVPTXISD::Tld4UnifiedR2DU64Float: 3563 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; 3564 break; 3565 case NVPTXISD::Tld4UnifiedG2DU64Float: 3566 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; 3567 break; 3568 case NVPTXISD::Tld4UnifiedB2DU64Float: 3569 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; 3570 break; 3571 case NVPTXISD::Tld4UnifiedA2DU64Float: 3572 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; 3573 break; 3574 } 3575 3576 // Copy over operands 3577 for (unsigned i = 1; i < N->getNumOperands(); ++i) { 3578 Ops.push_back(N->getOperand(i)); 3579 } 3580 3581 Ops.push_back(Chain); 3582 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3583 return Ret; 3584} 3585 3586SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { 3587 SDValue Chain = N->getOperand(0); 3588 SDValue TexHandle = N->getOperand(1); 3589 SDNode *Ret = nullptr; 3590 unsigned Opc = 0; 3591 SmallVector<SDValue, 8> Ops; 3592 switch (N->getOpcode()) { 3593 default: return nullptr; 3594 case NVPTXISD::Suld1DI8Clamp: 3595 Opc = NVPTX::SULD_1D_I8_CLAMP; 3596 Ops.push_back(TexHandle); 3597 Ops.push_back(N->getOperand(2)); 3598 Ops.push_back(Chain); 3599 break; 3600 case NVPTXISD::Suld1DI16Clamp: 3601 Opc = NVPTX::SULD_1D_I16_CLAMP; 3602 Ops.push_back(TexHandle); 3603 Ops.push_back(N->getOperand(2)); 3604 Ops.push_back(Chain); 3605 break; 3606 case NVPTXISD::Suld1DI32Clamp: 3607 Opc = NVPTX::SULD_1D_I32_CLAMP; 3608 Ops.push_back(TexHandle); 3609 Ops.push_back(N->getOperand(2)); 3610 Ops.push_back(Chain); 3611 break; 3612 case NVPTXISD::Suld1DI64Clamp: 3613 Opc = NVPTX::SULD_1D_I64_CLAMP; 3614 Ops.push_back(TexHandle); 3615 Ops.push_back(N->getOperand(2)); 3616 Ops.push_back(Chain); 3617 break; 3618 case NVPTXISD::Suld1DV2I8Clamp: 3619 Opc = NVPTX::SULD_1D_V2I8_CLAMP; 3620 Ops.push_back(TexHandle); 3621 Ops.push_back(N->getOperand(2)); 3622 Ops.push_back(Chain); 3623 break; 3624 case NVPTXISD::Suld1DV2I16Clamp: 3625 Opc = NVPTX::SULD_1D_V2I16_CLAMP; 3626 Ops.push_back(TexHandle); 3627 Ops.push_back(N->getOperand(2)); 3628 Ops.push_back(Chain); 3629 break; 3630 case NVPTXISD::Suld1DV2I32Clamp: 3631 Opc = NVPTX::SULD_1D_V2I32_CLAMP; 3632 Ops.push_back(TexHandle); 3633 Ops.push_back(N->getOperand(2)); 3634 Ops.push_back(Chain); 3635 break; 3636 case NVPTXISD::Suld1DV2I64Clamp: 3637 Opc = NVPTX::SULD_1D_V2I64_CLAMP; 3638 Ops.push_back(TexHandle); 3639 Ops.push_back(N->getOperand(2)); 3640 Ops.push_back(Chain); 3641 break; 3642 case NVPTXISD::Suld1DV4I8Clamp: 3643 Opc = NVPTX::SULD_1D_V4I8_CLAMP; 3644 Ops.push_back(TexHandle); 3645 Ops.push_back(N->getOperand(2)); 3646 Ops.push_back(Chain); 3647 break; 3648 case NVPTXISD::Suld1DV4I16Clamp: 3649 Opc = NVPTX::SULD_1D_V4I16_CLAMP; 3650 Ops.push_back(TexHandle); 3651 Ops.push_back(N->getOperand(2)); 3652 Ops.push_back(Chain); 3653 break; 3654 case NVPTXISD::Suld1DV4I32Clamp: 3655 Opc = NVPTX::SULD_1D_V4I32_CLAMP; 3656 Ops.push_back(TexHandle); 3657 Ops.push_back(N->getOperand(2)); 3658 Ops.push_back(Chain); 3659 break; 3660 case NVPTXISD::Suld1DArrayI8Clamp: 3661 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; 3662 Ops.push_back(TexHandle); 3663 Ops.push_back(N->getOperand(2)); 3664 Ops.push_back(N->getOperand(3)); 3665 Ops.push_back(Chain); 3666 break; 3667 case NVPTXISD::Suld1DArrayI16Clamp: 3668 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; 3669 Ops.push_back(TexHandle); 3670 Ops.push_back(N->getOperand(2)); 3671 Ops.push_back(N->getOperand(3)); 3672 Ops.push_back(Chain); 3673 break; 3674 case NVPTXISD::Suld1DArrayI32Clamp: 3675 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; 3676 Ops.push_back(TexHandle); 3677 Ops.push_back(N->getOperand(2)); 3678 Ops.push_back(N->getOperand(3)); 3679 Ops.push_back(Chain); 3680 break; 3681 case NVPTXISD::Suld1DArrayI64Clamp: 3682 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; 3683 Ops.push_back(TexHandle); 3684 Ops.push_back(N->getOperand(2)); 3685 Ops.push_back(N->getOperand(3)); 3686 Ops.push_back(Chain); 3687 break; 3688 case NVPTXISD::Suld1DArrayV2I8Clamp: 3689 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; 3690 Ops.push_back(TexHandle); 3691 Ops.push_back(N->getOperand(2)); 3692 Ops.push_back(N->getOperand(3)); 3693 Ops.push_back(Chain); 3694 break; 3695 case NVPTXISD::Suld1DArrayV2I16Clamp: 3696 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; 3697 Ops.push_back(TexHandle); 3698 Ops.push_back(N->getOperand(2)); 3699 Ops.push_back(N->getOperand(3)); 3700 Ops.push_back(Chain); 3701 break; 3702 case NVPTXISD::Suld1DArrayV2I32Clamp: 3703 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; 3704 Ops.push_back(TexHandle); 3705 Ops.push_back(N->getOperand(2)); 3706 Ops.push_back(N->getOperand(3)); 3707 Ops.push_back(Chain); 3708 break; 3709 case NVPTXISD::Suld1DArrayV2I64Clamp: 3710 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; 3711 Ops.push_back(TexHandle); 3712 Ops.push_back(N->getOperand(2)); 3713 Ops.push_back(N->getOperand(3)); 3714 Ops.push_back(Chain); 3715 break; 3716 case NVPTXISD::Suld1DArrayV4I8Clamp: 3717 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; 3718 Ops.push_back(TexHandle); 3719 Ops.push_back(N->getOperand(2)); 3720 Ops.push_back(N->getOperand(3)); 3721 Ops.push_back(Chain); 3722 break; 3723 case NVPTXISD::Suld1DArrayV4I16Clamp: 3724 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; 3725 Ops.push_back(TexHandle); 3726 Ops.push_back(N->getOperand(2)); 3727 Ops.push_back(N->getOperand(3)); 3728 Ops.push_back(Chain); 3729 break; 3730 case NVPTXISD::Suld1DArrayV4I32Clamp: 3731 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; 3732 Ops.push_back(TexHandle); 3733 Ops.push_back(N->getOperand(2)); 3734 Ops.push_back(N->getOperand(3)); 3735 Ops.push_back(Chain); 3736 break; 3737 case NVPTXISD::Suld2DI8Clamp: 3738 Opc = NVPTX::SULD_2D_I8_CLAMP; 3739 Ops.push_back(TexHandle); 3740 Ops.push_back(N->getOperand(2)); 3741 Ops.push_back(N->getOperand(3)); 3742 Ops.push_back(Chain); 3743 break; 3744 case NVPTXISD::Suld2DI16Clamp: 3745 Opc = NVPTX::SULD_2D_I16_CLAMP; 3746 Ops.push_back(TexHandle); 3747 Ops.push_back(N->getOperand(2)); 3748 Ops.push_back(N->getOperand(3)); 3749 Ops.push_back(Chain); 3750 break; 3751 case NVPTXISD::Suld2DI32Clamp: 3752 Opc = NVPTX::SULD_2D_I32_CLAMP; 3753 Ops.push_back(TexHandle); 3754 Ops.push_back(N->getOperand(2)); 3755 Ops.push_back(N->getOperand(3)); 3756 Ops.push_back(Chain); 3757 break; 3758 case NVPTXISD::Suld2DI64Clamp: 3759 Opc = NVPTX::SULD_2D_I64_CLAMP; 3760 Ops.push_back(TexHandle); 3761 Ops.push_back(N->getOperand(2)); 3762 Ops.push_back(N->getOperand(3)); 3763 Ops.push_back(Chain); 3764 break; 3765 case NVPTXISD::Suld2DV2I8Clamp: 3766 Opc = NVPTX::SULD_2D_V2I8_CLAMP; 3767 Ops.push_back(TexHandle); 3768 Ops.push_back(N->getOperand(2)); 3769 Ops.push_back(N->getOperand(3)); 3770 Ops.push_back(Chain); 3771 break; 3772 case NVPTXISD::Suld2DV2I16Clamp: 3773 Opc = NVPTX::SULD_2D_V2I16_CLAMP; 3774 Ops.push_back(TexHandle); 3775 Ops.push_back(N->getOperand(2)); 3776 Ops.push_back(N->getOperand(3)); 3777 Ops.push_back(Chain); 3778 break; 3779 case NVPTXISD::Suld2DV2I32Clamp: 3780 Opc = NVPTX::SULD_2D_V2I32_CLAMP; 3781 Ops.push_back(TexHandle); 3782 Ops.push_back(N->getOperand(2)); 3783 Ops.push_back(N->getOperand(3)); 3784 Ops.push_back(Chain); 3785 break; 3786 case NVPTXISD::Suld2DV2I64Clamp: 3787 Opc = NVPTX::SULD_2D_V2I64_CLAMP; 3788 Ops.push_back(TexHandle); 3789 Ops.push_back(N->getOperand(2)); 3790 Ops.push_back(N->getOperand(3)); 3791 Ops.push_back(Chain); 3792 break; 3793 case NVPTXISD::Suld2DV4I8Clamp: 3794 Opc = NVPTX::SULD_2D_V4I8_CLAMP; 3795 Ops.push_back(TexHandle); 3796 Ops.push_back(N->getOperand(2)); 3797 Ops.push_back(N->getOperand(3)); 3798 Ops.push_back(Chain); 3799 break; 3800 case NVPTXISD::Suld2DV4I16Clamp: 3801 Opc = NVPTX::SULD_2D_V4I16_CLAMP; 3802 Ops.push_back(TexHandle); 3803 Ops.push_back(N->getOperand(2)); 3804 Ops.push_back(N->getOperand(3)); 3805 Ops.push_back(Chain); 3806 break; 3807 case NVPTXISD::Suld2DV4I32Clamp: 3808 Opc = NVPTX::SULD_2D_V4I32_CLAMP; 3809 Ops.push_back(TexHandle); 3810 Ops.push_back(N->getOperand(2)); 3811 Ops.push_back(N->getOperand(3)); 3812 Ops.push_back(Chain); 3813 break; 3814 case NVPTXISD::Suld2DArrayI8Clamp: 3815 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; 3816 Ops.push_back(TexHandle); 3817 Ops.push_back(N->getOperand(2)); 3818 Ops.push_back(N->getOperand(3)); 3819 Ops.push_back(N->getOperand(4)); 3820 Ops.push_back(Chain); 3821 break; 3822 case NVPTXISD::Suld2DArrayI16Clamp: 3823 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; 3824 Ops.push_back(TexHandle); 3825 Ops.push_back(N->getOperand(2)); 3826 Ops.push_back(N->getOperand(3)); 3827 Ops.push_back(N->getOperand(4)); 3828 Ops.push_back(Chain); 3829 break; 3830 case NVPTXISD::Suld2DArrayI32Clamp: 3831 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; 3832 Ops.push_back(TexHandle); 3833 Ops.push_back(N->getOperand(2)); 3834 Ops.push_back(N->getOperand(3)); 3835 Ops.push_back(N->getOperand(4)); 3836 Ops.push_back(Chain); 3837 break; 3838 case NVPTXISD::Suld2DArrayI64Clamp: 3839 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; 3840 Ops.push_back(TexHandle); 3841 Ops.push_back(N->getOperand(2)); 3842 Ops.push_back(N->getOperand(3)); 3843 Ops.push_back(N->getOperand(4)); 3844 Ops.push_back(Chain); 3845 break; 3846 case NVPTXISD::Suld2DArrayV2I8Clamp: 3847 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; 3848 Ops.push_back(TexHandle); 3849 Ops.push_back(N->getOperand(2)); 3850 Ops.push_back(N->getOperand(3)); 3851 Ops.push_back(N->getOperand(4)); 3852 Ops.push_back(Chain); 3853 break; 3854 case NVPTXISD::Suld2DArrayV2I16Clamp: 3855 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; 3856 Ops.push_back(TexHandle); 3857 Ops.push_back(N->getOperand(2)); 3858 Ops.push_back(N->getOperand(3)); 3859 Ops.push_back(N->getOperand(4)); 3860 Ops.push_back(Chain); 3861 break; 3862 case NVPTXISD::Suld2DArrayV2I32Clamp: 3863 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; 3864 Ops.push_back(TexHandle); 3865 Ops.push_back(N->getOperand(2)); 3866 Ops.push_back(N->getOperand(3)); 3867 Ops.push_back(N->getOperand(4)); 3868 Ops.push_back(Chain); 3869 break; 3870 case NVPTXISD::Suld2DArrayV2I64Clamp: 3871 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; 3872 Ops.push_back(TexHandle); 3873 Ops.push_back(N->getOperand(2)); 3874 Ops.push_back(N->getOperand(3)); 3875 Ops.push_back(N->getOperand(4)); 3876 Ops.push_back(Chain); 3877 break; 3878 case NVPTXISD::Suld2DArrayV4I8Clamp: 3879 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; 3880 Ops.push_back(TexHandle); 3881 Ops.push_back(N->getOperand(2)); 3882 Ops.push_back(N->getOperand(3)); 3883 Ops.push_back(N->getOperand(4)); 3884 Ops.push_back(Chain); 3885 break; 3886 case NVPTXISD::Suld2DArrayV4I16Clamp: 3887 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; 3888 Ops.push_back(TexHandle); 3889 Ops.push_back(N->getOperand(2)); 3890 Ops.push_back(N->getOperand(3)); 3891 Ops.push_back(N->getOperand(4)); 3892 Ops.push_back(Chain); 3893 break; 3894 case NVPTXISD::Suld2DArrayV4I32Clamp: 3895 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; 3896 Ops.push_back(TexHandle); 3897 Ops.push_back(N->getOperand(2)); 3898 Ops.push_back(N->getOperand(3)); 3899 Ops.push_back(N->getOperand(4)); 3900 Ops.push_back(Chain); 3901 break; 3902 case NVPTXISD::Suld3DI8Clamp: 3903 Opc = NVPTX::SULD_3D_I8_CLAMP; 3904 Ops.push_back(TexHandle); 3905 Ops.push_back(N->getOperand(2)); 3906 Ops.push_back(N->getOperand(3)); 3907 Ops.push_back(N->getOperand(4)); 3908 Ops.push_back(Chain); 3909 break; 3910 case NVPTXISD::Suld3DI16Clamp: 3911 Opc = NVPTX::SULD_3D_I16_CLAMP; 3912 Ops.push_back(TexHandle); 3913 Ops.push_back(N->getOperand(2)); 3914 Ops.push_back(N->getOperand(3)); 3915 Ops.push_back(N->getOperand(4)); 3916 Ops.push_back(Chain); 3917 break; 3918 case NVPTXISD::Suld3DI32Clamp: 3919 Opc = NVPTX::SULD_3D_I32_CLAMP; 3920 Ops.push_back(TexHandle); 3921 Ops.push_back(N->getOperand(2)); 3922 Ops.push_back(N->getOperand(3)); 3923 Ops.push_back(N->getOperand(4)); 3924 Ops.push_back(Chain); 3925 break; 3926 case NVPTXISD::Suld3DI64Clamp: 3927 Opc = NVPTX::SULD_3D_I64_CLAMP; 3928 Ops.push_back(TexHandle); 3929 Ops.push_back(N->getOperand(2)); 3930 Ops.push_back(N->getOperand(3)); 3931 Ops.push_back(N->getOperand(4)); 3932 Ops.push_back(Chain); 3933 break; 3934 case NVPTXISD::Suld3DV2I8Clamp: 3935 Opc = NVPTX::SULD_3D_V2I8_CLAMP; 3936 Ops.push_back(TexHandle); 3937 Ops.push_back(N->getOperand(2)); 3938 Ops.push_back(N->getOperand(3)); 3939 Ops.push_back(N->getOperand(4)); 3940 Ops.push_back(Chain); 3941 break; 3942 case NVPTXISD::Suld3DV2I16Clamp: 3943 Opc = NVPTX::SULD_3D_V2I16_CLAMP; 3944 Ops.push_back(TexHandle); 3945 Ops.push_back(N->getOperand(2)); 3946 Ops.push_back(N->getOperand(3)); 3947 Ops.push_back(N->getOperand(4)); 3948 Ops.push_back(Chain); 3949 break; 3950 case NVPTXISD::Suld3DV2I32Clamp: 3951 Opc = NVPTX::SULD_3D_V2I32_CLAMP; 3952 Ops.push_back(TexHandle); 3953 Ops.push_back(N->getOperand(2)); 3954 Ops.push_back(N->getOperand(3)); 3955 Ops.push_back(N->getOperand(4)); 3956 Ops.push_back(Chain); 3957 break; 3958 case NVPTXISD::Suld3DV2I64Clamp: 3959 Opc = NVPTX::SULD_3D_V2I64_CLAMP; 3960 Ops.push_back(TexHandle); 3961 Ops.push_back(N->getOperand(2)); 3962 Ops.push_back(N->getOperand(3)); 3963 Ops.push_back(N->getOperand(4)); 3964 Ops.push_back(Chain); 3965 break; 3966 case NVPTXISD::Suld3DV4I8Clamp: 3967 Opc = NVPTX::SULD_3D_V4I8_CLAMP; 3968 Ops.push_back(TexHandle); 3969 Ops.push_back(N->getOperand(2)); 3970 Ops.push_back(N->getOperand(3)); 3971 Ops.push_back(N->getOperand(4)); 3972 Ops.push_back(Chain); 3973 break; 3974 case NVPTXISD::Suld3DV4I16Clamp: 3975 Opc = NVPTX::SULD_3D_V4I16_CLAMP; 3976 Ops.push_back(TexHandle); 3977 Ops.push_back(N->getOperand(2)); 3978 Ops.push_back(N->getOperand(3)); 3979 Ops.push_back(N->getOperand(4)); 3980 Ops.push_back(Chain); 3981 break; 3982 case NVPTXISD::Suld3DV4I32Clamp: 3983 Opc = NVPTX::SULD_3D_V4I32_CLAMP; 3984 Ops.push_back(TexHandle); 3985 Ops.push_back(N->getOperand(2)); 3986 Ops.push_back(N->getOperand(3)); 3987 Ops.push_back(N->getOperand(4)); 3988 Ops.push_back(Chain); 3989 break; 3990 case NVPTXISD::Suld1DI8Trap: 3991 Opc = NVPTX::SULD_1D_I8_TRAP; 3992 Ops.push_back(TexHandle); 3993 Ops.push_back(N->getOperand(2)); 3994 Ops.push_back(Chain); 3995 break; 3996 case NVPTXISD::Suld1DI16Trap: 3997 Opc = NVPTX::SULD_1D_I16_TRAP; 3998 Ops.push_back(TexHandle); 3999 Ops.push_back(N->getOperand(2)); 4000 Ops.push_back(Chain); 4001 break; 4002 case NVPTXISD::Suld1DI32Trap: 4003 Opc = NVPTX::SULD_1D_I32_TRAP; 4004 Ops.push_back(TexHandle); 4005 Ops.push_back(N->getOperand(2)); 4006 Ops.push_back(Chain); 4007 break; 4008 case NVPTXISD::Suld1DI64Trap: 4009 Opc = NVPTX::SULD_1D_I64_TRAP; 4010 Ops.push_back(TexHandle); 4011 Ops.push_back(N->getOperand(2)); 4012 Ops.push_back(Chain); 4013 break; 4014 case NVPTXISD::Suld1DV2I8Trap: 4015 Opc = NVPTX::SULD_1D_V2I8_TRAP; 4016 Ops.push_back(TexHandle); 4017 Ops.push_back(N->getOperand(2)); 4018 Ops.push_back(Chain); 4019 break; 4020 case NVPTXISD::Suld1DV2I16Trap: 4021 Opc = NVPTX::SULD_1D_V2I16_TRAP; 4022 Ops.push_back(TexHandle); 4023 Ops.push_back(N->getOperand(2)); 4024 Ops.push_back(Chain); 4025 break; 4026 case NVPTXISD::Suld1DV2I32Trap: 4027 Opc = NVPTX::SULD_1D_V2I32_TRAP; 4028 Ops.push_back(TexHandle); 4029 Ops.push_back(N->getOperand(2)); 4030 Ops.push_back(Chain); 4031 break; 4032 case NVPTXISD::Suld1DV2I64Trap: 4033 Opc = NVPTX::SULD_1D_V2I64_TRAP; 4034 Ops.push_back(TexHandle); 4035 Ops.push_back(N->getOperand(2)); 4036 Ops.push_back(Chain); 4037 break; 4038 case NVPTXISD::Suld1DV4I8Trap: 4039 Opc = NVPTX::SULD_1D_V4I8_TRAP; 4040 Ops.push_back(TexHandle); 4041 Ops.push_back(N->getOperand(2)); 4042 Ops.push_back(Chain); 4043 break; 4044 case NVPTXISD::Suld1DV4I16Trap: 4045 Opc = NVPTX::SULD_1D_V4I16_TRAP; 4046 Ops.push_back(TexHandle); 4047 Ops.push_back(N->getOperand(2)); 4048 Ops.push_back(Chain); 4049 break; 4050 case NVPTXISD::Suld1DV4I32Trap: 4051 Opc = NVPTX::SULD_1D_V4I32_TRAP; 4052 Ops.push_back(TexHandle); 4053 Ops.push_back(N->getOperand(2)); 4054 Ops.push_back(Chain); 4055 break; 4056 case NVPTXISD::Suld1DArrayI8Trap: 4057 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; 4058 Ops.push_back(TexHandle); 4059 Ops.push_back(N->getOperand(2)); 4060 Ops.push_back(N->getOperand(3)); 4061 Ops.push_back(Chain); 4062 break; 4063 case NVPTXISD::Suld1DArrayI16Trap: 4064 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; 4065 Ops.push_back(TexHandle); 4066 Ops.push_back(N->getOperand(2)); 4067 Ops.push_back(N->getOperand(3)); 4068 Ops.push_back(Chain); 4069 break; 4070 case NVPTXISD::Suld1DArrayI32Trap: 4071 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; 4072 Ops.push_back(TexHandle); 4073 Ops.push_back(N->getOperand(2)); 4074 Ops.push_back(N->getOperand(3)); 4075 Ops.push_back(Chain); 4076 break; 4077 case NVPTXISD::Suld1DArrayI64Trap: 4078 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; 4079 Ops.push_back(TexHandle); 4080 Ops.push_back(N->getOperand(2)); 4081 Ops.push_back(N->getOperand(3)); 4082 Ops.push_back(Chain); 4083 break; 4084 case NVPTXISD::Suld1DArrayV2I8Trap: 4085 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; 4086 Ops.push_back(TexHandle); 4087 Ops.push_back(N->getOperand(2)); 4088 Ops.push_back(N->getOperand(3)); 4089 Ops.push_back(Chain); 4090 break; 4091 case NVPTXISD::Suld1DArrayV2I16Trap: 4092 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; 4093 Ops.push_back(TexHandle); 4094 Ops.push_back(N->getOperand(2)); 4095 Ops.push_back(N->getOperand(3)); 4096 Ops.push_back(Chain); 4097 break; 4098 case NVPTXISD::Suld1DArrayV2I32Trap: 4099 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; 4100 Ops.push_back(TexHandle); 4101 Ops.push_back(N->getOperand(2)); 4102 Ops.push_back(N->getOperand(3)); 4103 Ops.push_back(Chain); 4104 break; 4105 case NVPTXISD::Suld1DArrayV2I64Trap: 4106 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; 4107 Ops.push_back(TexHandle); 4108 Ops.push_back(N->getOperand(2)); 4109 Ops.push_back(N->getOperand(3)); 4110 Ops.push_back(Chain); 4111 break; 4112 case NVPTXISD::Suld1DArrayV4I8Trap: 4113 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; 4114 Ops.push_back(TexHandle); 4115 Ops.push_back(N->getOperand(2)); 4116 Ops.push_back(N->getOperand(3)); 4117 Ops.push_back(Chain); 4118 break; 4119 case NVPTXISD::Suld1DArrayV4I16Trap: 4120 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; 4121 Ops.push_back(TexHandle); 4122 Ops.push_back(N->getOperand(2)); 4123 Ops.push_back(N->getOperand(3)); 4124 Ops.push_back(Chain); 4125 break; 4126 case NVPTXISD::Suld1DArrayV4I32Trap: 4127 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; 4128 Ops.push_back(TexHandle); 4129 Ops.push_back(N->getOperand(2)); 4130 Ops.push_back(N->getOperand(3)); 4131 Ops.push_back(Chain); 4132 break; 4133 case NVPTXISD::Suld2DI8Trap: 4134 Opc = NVPTX::SULD_2D_I8_TRAP; 4135 Ops.push_back(TexHandle); 4136 Ops.push_back(N->getOperand(2)); 4137 Ops.push_back(N->getOperand(3)); 4138 Ops.push_back(Chain); 4139 break; 4140 case NVPTXISD::Suld2DI16Trap: 4141 Opc = NVPTX::SULD_2D_I16_TRAP; 4142 Ops.push_back(TexHandle); 4143 Ops.push_back(N->getOperand(2)); 4144 Ops.push_back(N->getOperand(3)); 4145 Ops.push_back(Chain); 4146 break; 4147 case NVPTXISD::Suld2DI32Trap: 4148 Opc = NVPTX::SULD_2D_I32_TRAP; 4149 Ops.push_back(TexHandle); 4150 Ops.push_back(N->getOperand(2)); 4151 Ops.push_back(N->getOperand(3)); 4152 Ops.push_back(Chain); 4153 break; 4154 case NVPTXISD::Suld2DI64Trap: 4155 Opc = NVPTX::SULD_2D_I64_TRAP; 4156 Ops.push_back(TexHandle); 4157 Ops.push_back(N->getOperand(2)); 4158 Ops.push_back(N->getOperand(3)); 4159 Ops.push_back(Chain); 4160 break; 4161 case NVPTXISD::Suld2DV2I8Trap: 4162 Opc = NVPTX::SULD_2D_V2I8_TRAP; 4163 Ops.push_back(TexHandle); 4164 Ops.push_back(N->getOperand(2)); 4165 Ops.push_back(N->getOperand(3)); 4166 Ops.push_back(Chain); 4167 break; 4168 case NVPTXISD::Suld2DV2I16Trap: 4169 Opc = NVPTX::SULD_2D_V2I16_TRAP; 4170 Ops.push_back(TexHandle); 4171 Ops.push_back(N->getOperand(2)); 4172 Ops.push_back(N->getOperand(3)); 4173 Ops.push_back(Chain); 4174 break; 4175 case NVPTXISD::Suld2DV2I32Trap: 4176 Opc = NVPTX::SULD_2D_V2I32_TRAP; 4177 Ops.push_back(TexHandle); 4178 Ops.push_back(N->getOperand(2)); 4179 Ops.push_back(N->getOperand(3)); 4180 Ops.push_back(Chain); 4181 break; 4182 case NVPTXISD::Suld2DV2I64Trap: 4183 Opc = NVPTX::SULD_2D_V2I64_TRAP; 4184 Ops.push_back(TexHandle); 4185 Ops.push_back(N->getOperand(2)); 4186 Ops.push_back(N->getOperand(3)); 4187 Ops.push_back(Chain); 4188 break; 4189 case NVPTXISD::Suld2DV4I8Trap: 4190 Opc = NVPTX::SULD_2D_V4I8_TRAP; 4191 Ops.push_back(TexHandle); 4192 Ops.push_back(N->getOperand(2)); 4193 Ops.push_back(N->getOperand(3)); 4194 Ops.push_back(Chain); 4195 break; 4196 case NVPTXISD::Suld2DV4I16Trap: 4197 Opc = NVPTX::SULD_2D_V4I16_TRAP; 4198 Ops.push_back(TexHandle); 4199 Ops.push_back(N->getOperand(2)); 4200 Ops.push_back(N->getOperand(3)); 4201 Ops.push_back(Chain); 4202 break; 4203 case NVPTXISD::Suld2DV4I32Trap: 4204 Opc = NVPTX::SULD_2D_V4I32_TRAP; 4205 Ops.push_back(TexHandle); 4206 Ops.push_back(N->getOperand(2)); 4207 Ops.push_back(N->getOperand(3)); 4208 Ops.push_back(Chain); 4209 break; 4210 case NVPTXISD::Suld2DArrayI8Trap: 4211 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; 4212 Ops.push_back(TexHandle); 4213 Ops.push_back(N->getOperand(2)); 4214 Ops.push_back(N->getOperand(3)); 4215 Ops.push_back(N->getOperand(4)); 4216 Ops.push_back(Chain); 4217 break; 4218 case NVPTXISD::Suld2DArrayI16Trap: 4219 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; 4220 Ops.push_back(TexHandle); 4221 Ops.push_back(N->getOperand(2)); 4222 Ops.push_back(N->getOperand(3)); 4223 Ops.push_back(N->getOperand(4)); 4224 Ops.push_back(Chain); 4225 break; 4226 case NVPTXISD::Suld2DArrayI32Trap: 4227 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; 4228 Ops.push_back(TexHandle); 4229 Ops.push_back(N->getOperand(2)); 4230 Ops.push_back(N->getOperand(3)); 4231 Ops.push_back(N->getOperand(4)); 4232 Ops.push_back(Chain); 4233 break; 4234 case NVPTXISD::Suld2DArrayI64Trap: 4235 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; 4236 Ops.push_back(TexHandle); 4237 Ops.push_back(N->getOperand(2)); 4238 Ops.push_back(N->getOperand(3)); 4239 Ops.push_back(N->getOperand(4)); 4240 Ops.push_back(Chain); 4241 break; 4242 case NVPTXISD::Suld2DArrayV2I8Trap: 4243 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; 4244 Ops.push_back(TexHandle); 4245 Ops.push_back(N->getOperand(2)); 4246 Ops.push_back(N->getOperand(3)); 4247 Ops.push_back(N->getOperand(4)); 4248 Ops.push_back(Chain); 4249 break; 4250 case NVPTXISD::Suld2DArrayV2I16Trap: 4251 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; 4252 Ops.push_back(TexHandle); 4253 Ops.push_back(N->getOperand(2)); 4254 Ops.push_back(N->getOperand(3)); 4255 Ops.push_back(N->getOperand(4)); 4256 Ops.push_back(Chain); 4257 break; 4258 case NVPTXISD::Suld2DArrayV2I32Trap: 4259 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; 4260 Ops.push_back(TexHandle); 4261 Ops.push_back(N->getOperand(2)); 4262 Ops.push_back(N->getOperand(3)); 4263 Ops.push_back(N->getOperand(4)); 4264 Ops.push_back(Chain); 4265 break; 4266 case NVPTXISD::Suld2DArrayV2I64Trap: 4267 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; 4268 Ops.push_back(TexHandle); 4269 Ops.push_back(N->getOperand(2)); 4270 Ops.push_back(N->getOperand(3)); 4271 Ops.push_back(N->getOperand(4)); 4272 Ops.push_back(Chain); 4273 break; 4274 case NVPTXISD::Suld2DArrayV4I8Trap: 4275 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; 4276 Ops.push_back(TexHandle); 4277 Ops.push_back(N->getOperand(2)); 4278 Ops.push_back(N->getOperand(3)); 4279 Ops.push_back(N->getOperand(4)); 4280 Ops.push_back(Chain); 4281 break; 4282 case NVPTXISD::Suld2DArrayV4I16Trap: 4283 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; 4284 Ops.push_back(TexHandle); 4285 Ops.push_back(N->getOperand(2)); 4286 Ops.push_back(N->getOperand(3)); 4287 Ops.push_back(N->getOperand(4)); 4288 Ops.push_back(Chain); 4289 break; 4290 case NVPTXISD::Suld2DArrayV4I32Trap: 4291 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; 4292 Ops.push_back(TexHandle); 4293 Ops.push_back(N->getOperand(2)); 4294 Ops.push_back(N->getOperand(3)); 4295 Ops.push_back(N->getOperand(4)); 4296 Ops.push_back(Chain); 4297 break; 4298 case NVPTXISD::Suld3DI8Trap: 4299 Opc = NVPTX::SULD_3D_I8_TRAP; 4300 Ops.push_back(TexHandle); 4301 Ops.push_back(N->getOperand(2)); 4302 Ops.push_back(N->getOperand(3)); 4303 Ops.push_back(N->getOperand(4)); 4304 Ops.push_back(Chain); 4305 break; 4306 case NVPTXISD::Suld3DI16Trap: 4307 Opc = NVPTX::SULD_3D_I16_TRAP; 4308 Ops.push_back(TexHandle); 4309 Ops.push_back(N->getOperand(2)); 4310 Ops.push_back(N->getOperand(3)); 4311 Ops.push_back(N->getOperand(4)); 4312 Ops.push_back(Chain); 4313 break; 4314 case NVPTXISD::Suld3DI32Trap: 4315 Opc = NVPTX::SULD_3D_I32_TRAP; 4316 Ops.push_back(TexHandle); 4317 Ops.push_back(N->getOperand(2)); 4318 Ops.push_back(N->getOperand(3)); 4319 Ops.push_back(N->getOperand(4)); 4320 Ops.push_back(Chain); 4321 break; 4322 case NVPTXISD::Suld3DI64Trap: 4323 Opc = NVPTX::SULD_3D_I64_TRAP; 4324 Ops.push_back(TexHandle); 4325 Ops.push_back(N->getOperand(2)); 4326 Ops.push_back(N->getOperand(3)); 4327 Ops.push_back(N->getOperand(4)); 4328 Ops.push_back(Chain); 4329 break; 4330 case NVPTXISD::Suld3DV2I8Trap: 4331 Opc = NVPTX::SULD_3D_V2I8_TRAP; 4332 Ops.push_back(TexHandle); 4333 Ops.push_back(N->getOperand(2)); 4334 Ops.push_back(N->getOperand(3)); 4335 Ops.push_back(N->getOperand(4)); 4336 Ops.push_back(Chain); 4337 break; 4338 case NVPTXISD::Suld3DV2I16Trap: 4339 Opc = NVPTX::SULD_3D_V2I16_TRAP; 4340 Ops.push_back(TexHandle); 4341 Ops.push_back(N->getOperand(2)); 4342 Ops.push_back(N->getOperand(3)); 4343 Ops.push_back(N->getOperand(4)); 4344 Ops.push_back(Chain); 4345 break; 4346 case NVPTXISD::Suld3DV2I32Trap: 4347 Opc = NVPTX::SULD_3D_V2I32_TRAP; 4348 Ops.push_back(TexHandle); 4349 Ops.push_back(N->getOperand(2)); 4350 Ops.push_back(N->getOperand(3)); 4351 Ops.push_back(N->getOperand(4)); 4352 Ops.push_back(Chain); 4353 break; 4354 case NVPTXISD::Suld3DV2I64Trap: 4355 Opc = NVPTX::SULD_3D_V2I64_TRAP; 4356 Ops.push_back(TexHandle); 4357 Ops.push_back(N->getOperand(2)); 4358 Ops.push_back(N->getOperand(3)); 4359 Ops.push_back(N->getOperand(4)); 4360 Ops.push_back(Chain); 4361 break; 4362 case NVPTXISD::Suld3DV4I8Trap: 4363 Opc = NVPTX::SULD_3D_V4I8_TRAP; 4364 Ops.push_back(TexHandle); 4365 Ops.push_back(N->getOperand(2)); 4366 Ops.push_back(N->getOperand(3)); 4367 Ops.push_back(N->getOperand(4)); 4368 Ops.push_back(Chain); 4369 break; 4370 case NVPTXISD::Suld3DV4I16Trap: 4371 Opc = NVPTX::SULD_3D_V4I16_TRAP; 4372 Ops.push_back(TexHandle); 4373 Ops.push_back(N->getOperand(2)); 4374 Ops.push_back(N->getOperand(3)); 4375 Ops.push_back(N->getOperand(4)); 4376 Ops.push_back(Chain); 4377 break; 4378 case NVPTXISD::Suld3DV4I32Trap: 4379 Opc = NVPTX::SULD_3D_V4I32_TRAP; 4380 Ops.push_back(TexHandle); 4381 Ops.push_back(N->getOperand(2)); 4382 Ops.push_back(N->getOperand(3)); 4383 Ops.push_back(N->getOperand(4)); 4384 Ops.push_back(Chain); 4385 break; 4386 case NVPTXISD::Suld1DI8Zero: 4387 Opc = NVPTX::SULD_1D_I8_ZERO; 4388 Ops.push_back(TexHandle); 4389 Ops.push_back(N->getOperand(2)); 4390 Ops.push_back(Chain); 4391 break; 4392 case NVPTXISD::Suld1DI16Zero: 4393 Opc = NVPTX::SULD_1D_I16_ZERO; 4394 Ops.push_back(TexHandle); 4395 Ops.push_back(N->getOperand(2)); 4396 Ops.push_back(Chain); 4397 break; 4398 case NVPTXISD::Suld1DI32Zero: 4399 Opc = NVPTX::SULD_1D_I32_ZERO; 4400 Ops.push_back(TexHandle); 4401 Ops.push_back(N->getOperand(2)); 4402 Ops.push_back(Chain); 4403 break; 4404 case NVPTXISD::Suld1DI64Zero: 4405 Opc = NVPTX::SULD_1D_I64_ZERO; 4406 Ops.push_back(TexHandle); 4407 Ops.push_back(N->getOperand(2)); 4408 Ops.push_back(Chain); 4409 break; 4410 case NVPTXISD::Suld1DV2I8Zero: 4411 Opc = NVPTX::SULD_1D_V2I8_ZERO; 4412 Ops.push_back(TexHandle); 4413 Ops.push_back(N->getOperand(2)); 4414 Ops.push_back(Chain); 4415 break; 4416 case NVPTXISD::Suld1DV2I16Zero: 4417 Opc = NVPTX::SULD_1D_V2I16_ZERO; 4418 Ops.push_back(TexHandle); 4419 Ops.push_back(N->getOperand(2)); 4420 Ops.push_back(Chain); 4421 break; 4422 case NVPTXISD::Suld1DV2I32Zero: 4423 Opc = NVPTX::SULD_1D_V2I32_ZERO; 4424 Ops.push_back(TexHandle); 4425 Ops.push_back(N->getOperand(2)); 4426 Ops.push_back(Chain); 4427 break; 4428 case NVPTXISD::Suld1DV2I64Zero: 4429 Opc = NVPTX::SULD_1D_V2I64_ZERO; 4430 Ops.push_back(TexHandle); 4431 Ops.push_back(N->getOperand(2)); 4432 Ops.push_back(Chain); 4433 break; 4434 case NVPTXISD::Suld1DV4I8Zero: 4435 Opc = NVPTX::SULD_1D_V4I8_ZERO; 4436 Ops.push_back(TexHandle); 4437 Ops.push_back(N->getOperand(2)); 4438 Ops.push_back(Chain); 4439 break; 4440 case NVPTXISD::Suld1DV4I16Zero: 4441 Opc = NVPTX::SULD_1D_V4I16_ZERO; 4442 Ops.push_back(TexHandle); 4443 Ops.push_back(N->getOperand(2)); 4444 Ops.push_back(Chain); 4445 break; 4446 case NVPTXISD::Suld1DV4I32Zero: 4447 Opc = NVPTX::SULD_1D_V4I32_ZERO; 4448 Ops.push_back(TexHandle); 4449 Ops.push_back(N->getOperand(2)); 4450 Ops.push_back(Chain); 4451 break; 4452 case NVPTXISD::Suld1DArrayI8Zero: 4453 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; 4454 Ops.push_back(TexHandle); 4455 Ops.push_back(N->getOperand(2)); 4456 Ops.push_back(N->getOperand(3)); 4457 Ops.push_back(Chain); 4458 break; 4459 case NVPTXISD::Suld1DArrayI16Zero: 4460 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; 4461 Ops.push_back(TexHandle); 4462 Ops.push_back(N->getOperand(2)); 4463 Ops.push_back(N->getOperand(3)); 4464 Ops.push_back(Chain); 4465 break; 4466 case NVPTXISD::Suld1DArrayI32Zero: 4467 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; 4468 Ops.push_back(TexHandle); 4469 Ops.push_back(N->getOperand(2)); 4470 Ops.push_back(N->getOperand(3)); 4471 Ops.push_back(Chain); 4472 break; 4473 case NVPTXISD::Suld1DArrayI64Zero: 4474 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; 4475 Ops.push_back(TexHandle); 4476 Ops.push_back(N->getOperand(2)); 4477 Ops.push_back(N->getOperand(3)); 4478 Ops.push_back(Chain); 4479 break; 4480 case NVPTXISD::Suld1DArrayV2I8Zero: 4481 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; 4482 Ops.push_back(TexHandle); 4483 Ops.push_back(N->getOperand(2)); 4484 Ops.push_back(N->getOperand(3)); 4485 Ops.push_back(Chain); 4486 break; 4487 case NVPTXISD::Suld1DArrayV2I16Zero: 4488 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; 4489 Ops.push_back(TexHandle); 4490 Ops.push_back(N->getOperand(2)); 4491 Ops.push_back(N->getOperand(3)); 4492 Ops.push_back(Chain); 4493 break; 4494 case NVPTXISD::Suld1DArrayV2I32Zero: 4495 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; 4496 Ops.push_back(TexHandle); 4497 Ops.push_back(N->getOperand(2)); 4498 Ops.push_back(N->getOperand(3)); 4499 Ops.push_back(Chain); 4500 break; 4501 case NVPTXISD::Suld1DArrayV2I64Zero: 4502 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; 4503 Ops.push_back(TexHandle); 4504 Ops.push_back(N->getOperand(2)); 4505 Ops.push_back(N->getOperand(3)); 4506 Ops.push_back(Chain); 4507 break; 4508 case NVPTXISD::Suld1DArrayV4I8Zero: 4509 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; 4510 Ops.push_back(TexHandle); 4511 Ops.push_back(N->getOperand(2)); 4512 Ops.push_back(N->getOperand(3)); 4513 Ops.push_back(Chain); 4514 break; 4515 case NVPTXISD::Suld1DArrayV4I16Zero: 4516 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; 4517 Ops.push_back(TexHandle); 4518 Ops.push_back(N->getOperand(2)); 4519 Ops.push_back(N->getOperand(3)); 4520 Ops.push_back(Chain); 4521 break; 4522 case NVPTXISD::Suld1DArrayV4I32Zero: 4523 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; 4524 Ops.push_back(TexHandle); 4525 Ops.push_back(N->getOperand(2)); 4526 Ops.push_back(N->getOperand(3)); 4527 Ops.push_back(Chain); 4528 break; 4529 case NVPTXISD::Suld2DI8Zero: 4530 Opc = NVPTX::SULD_2D_I8_ZERO; 4531 Ops.push_back(TexHandle); 4532 Ops.push_back(N->getOperand(2)); 4533 Ops.push_back(N->getOperand(3)); 4534 Ops.push_back(Chain); 4535 break; 4536 case NVPTXISD::Suld2DI16Zero: 4537 Opc = NVPTX::SULD_2D_I16_ZERO; 4538 Ops.push_back(TexHandle); 4539 Ops.push_back(N->getOperand(2)); 4540 Ops.push_back(N->getOperand(3)); 4541 Ops.push_back(Chain); 4542 break; 4543 case NVPTXISD::Suld2DI32Zero: 4544 Opc = NVPTX::SULD_2D_I32_ZERO; 4545 Ops.push_back(TexHandle); 4546 Ops.push_back(N->getOperand(2)); 4547 Ops.push_back(N->getOperand(3)); 4548 Ops.push_back(Chain); 4549 break; 4550 case NVPTXISD::Suld2DI64Zero: 4551 Opc = NVPTX::SULD_2D_I64_ZERO; 4552 Ops.push_back(TexHandle); 4553 Ops.push_back(N->getOperand(2)); 4554 Ops.push_back(N->getOperand(3)); 4555 Ops.push_back(Chain); 4556 break; 4557 case NVPTXISD::Suld2DV2I8Zero: 4558 Opc = NVPTX::SULD_2D_V2I8_ZERO; 4559 Ops.push_back(TexHandle); 4560 Ops.push_back(N->getOperand(2)); 4561 Ops.push_back(N->getOperand(3)); 4562 Ops.push_back(Chain); 4563 break; 4564 case NVPTXISD::Suld2DV2I16Zero: 4565 Opc = NVPTX::SULD_2D_V2I16_ZERO; 4566 Ops.push_back(TexHandle); 4567 Ops.push_back(N->getOperand(2)); 4568 Ops.push_back(N->getOperand(3)); 4569 Ops.push_back(Chain); 4570 break; 4571 case NVPTXISD::Suld2DV2I32Zero: 4572 Opc = NVPTX::SULD_2D_V2I32_ZERO; 4573 Ops.push_back(TexHandle); 4574 Ops.push_back(N->getOperand(2)); 4575 Ops.push_back(N->getOperand(3)); 4576 Ops.push_back(Chain); 4577 break; 4578 case NVPTXISD::Suld2DV2I64Zero: 4579 Opc = NVPTX::SULD_2D_V2I64_ZERO; 4580 Ops.push_back(TexHandle); 4581 Ops.push_back(N->getOperand(2)); 4582 Ops.push_back(N->getOperand(3)); 4583 Ops.push_back(Chain); 4584 break; 4585 case NVPTXISD::Suld2DV4I8Zero: 4586 Opc = NVPTX::SULD_2D_V4I8_ZERO; 4587 Ops.push_back(TexHandle); 4588 Ops.push_back(N->getOperand(2)); 4589 Ops.push_back(N->getOperand(3)); 4590 Ops.push_back(Chain); 4591 break; 4592 case NVPTXISD::Suld2DV4I16Zero: 4593 Opc = NVPTX::SULD_2D_V4I16_ZERO; 4594 Ops.push_back(TexHandle); 4595 Ops.push_back(N->getOperand(2)); 4596 Ops.push_back(N->getOperand(3)); 4597 Ops.push_back(Chain); 4598 break; 4599 case NVPTXISD::Suld2DV4I32Zero: 4600 Opc = NVPTX::SULD_2D_V4I32_ZERO; 4601 Ops.push_back(TexHandle); 4602 Ops.push_back(N->getOperand(2)); 4603 Ops.push_back(N->getOperand(3)); 4604 Ops.push_back(Chain); 4605 break; 4606 case NVPTXISD::Suld2DArrayI8Zero: 4607 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; 4608 Ops.push_back(TexHandle); 4609 Ops.push_back(N->getOperand(2)); 4610 Ops.push_back(N->getOperand(3)); 4611 Ops.push_back(N->getOperand(4)); 4612 Ops.push_back(Chain); 4613 break; 4614 case NVPTXISD::Suld2DArrayI16Zero: 4615 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; 4616 Ops.push_back(TexHandle); 4617 Ops.push_back(N->getOperand(2)); 4618 Ops.push_back(N->getOperand(3)); 4619 Ops.push_back(N->getOperand(4)); 4620 Ops.push_back(Chain); 4621 break; 4622 case NVPTXISD::Suld2DArrayI32Zero: 4623 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; 4624 Ops.push_back(TexHandle); 4625 Ops.push_back(N->getOperand(2)); 4626 Ops.push_back(N->getOperand(3)); 4627 Ops.push_back(N->getOperand(4)); 4628 Ops.push_back(Chain); 4629 break; 4630 case NVPTXISD::Suld2DArrayI64Zero: 4631 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; 4632 Ops.push_back(TexHandle); 4633 Ops.push_back(N->getOperand(2)); 4634 Ops.push_back(N->getOperand(3)); 4635 Ops.push_back(N->getOperand(4)); 4636 Ops.push_back(Chain); 4637 break; 4638 case NVPTXISD::Suld2DArrayV2I8Zero: 4639 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; 4640 Ops.push_back(TexHandle); 4641 Ops.push_back(N->getOperand(2)); 4642 Ops.push_back(N->getOperand(3)); 4643 Ops.push_back(N->getOperand(4)); 4644 Ops.push_back(Chain); 4645 break; 4646 case NVPTXISD::Suld2DArrayV2I16Zero: 4647 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; 4648 Ops.push_back(TexHandle); 4649 Ops.push_back(N->getOperand(2)); 4650 Ops.push_back(N->getOperand(3)); 4651 Ops.push_back(N->getOperand(4)); 4652 Ops.push_back(Chain); 4653 break; 4654 case NVPTXISD::Suld2DArrayV2I32Zero: 4655 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; 4656 Ops.push_back(TexHandle); 4657 Ops.push_back(N->getOperand(2)); 4658 Ops.push_back(N->getOperand(3)); 4659 Ops.push_back(N->getOperand(4)); 4660 Ops.push_back(Chain); 4661 break; 4662 case NVPTXISD::Suld2DArrayV2I64Zero: 4663 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; 4664 Ops.push_back(TexHandle); 4665 Ops.push_back(N->getOperand(2)); 4666 Ops.push_back(N->getOperand(3)); 4667 Ops.push_back(N->getOperand(4)); 4668 Ops.push_back(Chain); 4669 break; 4670 case NVPTXISD::Suld2DArrayV4I8Zero: 4671 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; 4672 Ops.push_back(TexHandle); 4673 Ops.push_back(N->getOperand(2)); 4674 Ops.push_back(N->getOperand(3)); 4675 Ops.push_back(N->getOperand(4)); 4676 Ops.push_back(Chain); 4677 break; 4678 case NVPTXISD::Suld2DArrayV4I16Zero: 4679 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; 4680 Ops.push_back(TexHandle); 4681 Ops.push_back(N->getOperand(2)); 4682 Ops.push_back(N->getOperand(3)); 4683 Ops.push_back(N->getOperand(4)); 4684 Ops.push_back(Chain); 4685 break; 4686 case NVPTXISD::Suld2DArrayV4I32Zero: 4687 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; 4688 Ops.push_back(TexHandle); 4689 Ops.push_back(N->getOperand(2)); 4690 Ops.push_back(N->getOperand(3)); 4691 Ops.push_back(N->getOperand(4)); 4692 Ops.push_back(Chain); 4693 break; 4694 case NVPTXISD::Suld3DI8Zero: 4695 Opc = NVPTX::SULD_3D_I8_ZERO; 4696 Ops.push_back(TexHandle); 4697 Ops.push_back(N->getOperand(2)); 4698 Ops.push_back(N->getOperand(3)); 4699 Ops.push_back(N->getOperand(4)); 4700 Ops.push_back(Chain); 4701 break; 4702 case NVPTXISD::Suld3DI16Zero: 4703 Opc = NVPTX::SULD_3D_I16_ZERO; 4704 Ops.push_back(TexHandle); 4705 Ops.push_back(N->getOperand(2)); 4706 Ops.push_back(N->getOperand(3)); 4707 Ops.push_back(N->getOperand(4)); 4708 Ops.push_back(Chain); 4709 break; 4710 case NVPTXISD::Suld3DI32Zero: 4711 Opc = NVPTX::SULD_3D_I32_ZERO; 4712 Ops.push_back(TexHandle); 4713 Ops.push_back(N->getOperand(2)); 4714 Ops.push_back(N->getOperand(3)); 4715 Ops.push_back(N->getOperand(4)); 4716 Ops.push_back(Chain); 4717 break; 4718 case NVPTXISD::Suld3DI64Zero: 4719 Opc = NVPTX::SULD_3D_I64_ZERO; 4720 Ops.push_back(TexHandle); 4721 Ops.push_back(N->getOperand(2)); 4722 Ops.push_back(N->getOperand(3)); 4723 Ops.push_back(N->getOperand(4)); 4724 Ops.push_back(Chain); 4725 break; 4726 case NVPTXISD::Suld3DV2I8Zero: 4727 Opc = NVPTX::SULD_3D_V2I8_ZERO; 4728 Ops.push_back(TexHandle); 4729 Ops.push_back(N->getOperand(2)); 4730 Ops.push_back(N->getOperand(3)); 4731 Ops.push_back(N->getOperand(4)); 4732 Ops.push_back(Chain); 4733 break; 4734 case NVPTXISD::Suld3DV2I16Zero: 4735 Opc = NVPTX::SULD_3D_V2I16_ZERO; 4736 Ops.push_back(TexHandle); 4737 Ops.push_back(N->getOperand(2)); 4738 Ops.push_back(N->getOperand(3)); 4739 Ops.push_back(N->getOperand(4)); 4740 Ops.push_back(Chain); 4741 break; 4742 case NVPTXISD::Suld3DV2I32Zero: 4743 Opc = NVPTX::SULD_3D_V2I32_ZERO; 4744 Ops.push_back(TexHandle); 4745 Ops.push_back(N->getOperand(2)); 4746 Ops.push_back(N->getOperand(3)); 4747 Ops.push_back(N->getOperand(4)); 4748 Ops.push_back(Chain); 4749 break; 4750 case NVPTXISD::Suld3DV2I64Zero: 4751 Opc = NVPTX::SULD_3D_V2I64_ZERO; 4752 Ops.push_back(TexHandle); 4753 Ops.push_back(N->getOperand(2)); 4754 Ops.push_back(N->getOperand(3)); 4755 Ops.push_back(N->getOperand(4)); 4756 Ops.push_back(Chain); 4757 break; 4758 case NVPTXISD::Suld3DV4I8Zero: 4759 Opc = NVPTX::SULD_3D_V4I8_ZERO; 4760 Ops.push_back(TexHandle); 4761 Ops.push_back(N->getOperand(2)); 4762 Ops.push_back(N->getOperand(3)); 4763 Ops.push_back(N->getOperand(4)); 4764 Ops.push_back(Chain); 4765 break; 4766 case NVPTXISD::Suld3DV4I16Zero: 4767 Opc = NVPTX::SULD_3D_V4I16_ZERO; 4768 Ops.push_back(TexHandle); 4769 Ops.push_back(N->getOperand(2)); 4770 Ops.push_back(N->getOperand(3)); 4771 Ops.push_back(N->getOperand(4)); 4772 Ops.push_back(Chain); 4773 break; 4774 case NVPTXISD::Suld3DV4I32Zero: 4775 Opc = NVPTX::SULD_3D_V4I32_ZERO; 4776 Ops.push_back(TexHandle); 4777 Ops.push_back(N->getOperand(2)); 4778 Ops.push_back(N->getOperand(3)); 4779 Ops.push_back(N->getOperand(4)); 4780 Ops.push_back(Chain); 4781 break; 4782 } 4783 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 4784 return Ret; 4785} 4786 4787 4788/// SelectBFE - Look for instruction sequences that can be made more efficient 4789/// by using the 'bfe' (bit-field extract) PTX instruction 4790SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { 4791 SDLoc DL(N); 4792 SDValue LHS = N->getOperand(0); 4793 SDValue RHS = N->getOperand(1); 4794 SDValue Len; 4795 SDValue Start; 4796 SDValue Val; 4797 bool IsSigned = false; 4798 4799 if (N->getOpcode() == ISD::AND) { 4800 // Canonicalize the operands 4801 // We want 'and %val, %mask' 4802 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { 4803 std::swap(LHS, RHS); 4804 } 4805 4806 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); 4807 if (!Mask) { 4808 // We need a constant mask on the RHS of the AND 4809 return NULL; 4810 } 4811 4812 // Extract the mask bits 4813 uint64_t MaskVal = Mask->getZExtValue(); 4814 if (!isMask_64(MaskVal)) { 4815 // We *could* handle shifted masks here, but doing so would require an 4816 // 'and' operation to fix up the low-order bits so we would trade 4817 // shr+and for bfe+and, which has the same throughput 4818 return NULL; 4819 } 4820 4821 // How many bits are in our mask? 4822 uint64_t NumBits = countTrailingOnes(MaskVal); 4823 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 4824 4825 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { 4826 // We have a 'srl/and' pair, extract the effective start bit and length 4827 Val = LHS.getNode()->getOperand(0); 4828 Start = LHS.getNode()->getOperand(1); 4829 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); 4830 if (StartConst) { 4831 uint64_t StartVal = StartConst->getZExtValue(); 4832 // How many "good" bits do we have left? "good" is defined here as bits 4833 // that exist in the original value, not shifted in. 4834 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal; 4835 if (NumBits > GoodBits) { 4836 // Do not handle the case where bits have been shifted in. In theory 4837 // we could handle this, but the cost is likely higher than just 4838 // emitting the srl/and pair. 4839 return NULL; 4840 } 4841 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32); 4842 } else { 4843 // Do not handle the case where the shift amount (can be zero if no srl 4844 // was found) is not constant. We could handle this case, but it would 4845 // require run-time logic that would be more expensive than just 4846 // emitting the srl/and pair. 4847 return NULL; 4848 } 4849 } else { 4850 // Do not handle the case where the LHS of the and is not a shift. While 4851 // it would be trivial to handle this case, it would just transform 4852 // 'and' -> 'bfe', but 'and' has higher-throughput. 4853 return NULL; 4854 } 4855 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { 4856 if (LHS->getOpcode() == ISD::AND) { 4857 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); 4858 if (!ShiftCnst) { 4859 // Shift amount must be constant 4860 return NULL; 4861 } 4862 4863 uint64_t ShiftAmt = ShiftCnst->getZExtValue(); 4864 4865 SDValue AndLHS = LHS->getOperand(0); 4866 SDValue AndRHS = LHS->getOperand(1); 4867 4868 // Canonicalize the AND to have the mask on the RHS 4869 if (isa<ConstantSDNode>(AndLHS)) { 4870 std::swap(AndLHS, AndRHS); 4871 } 4872 4873 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); 4874 if (!MaskCnst) { 4875 // Mask must be constant 4876 return NULL; 4877 } 4878 4879 uint64_t MaskVal = MaskCnst->getZExtValue(); 4880 uint64_t NumZeros; 4881 uint64_t NumBits; 4882 if (isMask_64(MaskVal)) { 4883 NumZeros = 0; 4884 // The number of bits in the result bitfield will be the number of 4885 // trailing ones (the AND) minus the number of bits we shift off 4886 NumBits = countTrailingOnes(MaskVal) - ShiftAmt; 4887 } else if (isShiftedMask_64(MaskVal)) { 4888 NumZeros = countTrailingZeros(MaskVal); 4889 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); 4890 // The number of bits in the result bitfield will be the number of 4891 // trailing zeros plus the number of set bits in the mask minus the 4892 // number of bits we shift off 4893 NumBits = NumZeros + NumOnes - ShiftAmt; 4894 } else { 4895 // This is not a mask we can handle 4896 return NULL; 4897 } 4898 4899 if (ShiftAmt < NumZeros) { 4900 // Handling this case would require extra logic that would make this 4901 // transformation non-profitable 4902 return NULL; 4903 } 4904 4905 Val = AndLHS; 4906 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32); 4907 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 4908 } else if (LHS->getOpcode() == ISD::SHL) { 4909 // Here, we have a pattern like: 4910 // 4911 // (sra (shl val, NN), MM) 4912 // or 4913 // (srl (shl val, NN), MM) 4914 // 4915 // If MM >= NN, we can efficiently optimize this with bfe 4916 Val = LHS->getOperand(0); 4917 4918 SDValue ShlRHS = LHS->getOperand(1); 4919 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); 4920 if (!ShlCnst) { 4921 // Shift amount must be constant 4922 return NULL; 4923 } 4924 uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); 4925 4926 SDValue ShrRHS = RHS; 4927 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); 4928 if (!ShrCnst) { 4929 // Shift amount must be constant 4930 return NULL; 4931 } 4932 uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); 4933 4934 // To avoid extra codegen and be profitable, we need Outer >= Inner 4935 if (OuterShiftAmt < InnerShiftAmt) { 4936 return NULL; 4937 } 4938 4939 // If the outer shift is more than the type size, we have no bitfield to 4940 // extract (since we also check that the inner shift is <= the outer shift 4941 // then this also implies that the inner shift is < the type size) 4942 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) { 4943 return NULL; 4944 } 4945 4946 Start = 4947 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32); 4948 Len = 4949 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() - 4950 OuterShiftAmt, DL, MVT::i32); 4951 4952 if (N->getOpcode() == ISD::SRA) { 4953 // If we have a arithmetic right shift, we need to use the signed bfe 4954 // variant 4955 IsSigned = true; 4956 } 4957 } else { 4958 // No can do... 4959 return NULL; 4960 } 4961 } else { 4962 // No can do... 4963 return NULL; 4964 } 4965 4966 4967 unsigned Opc; 4968 // For the BFE operations we form here from "and" and "srl", always use the 4969 // unsigned variants. 4970 if (Val.getValueType() == MVT::i32) { 4971 if (IsSigned) { 4972 Opc = NVPTX::BFE_S32rii; 4973 } else { 4974 Opc = NVPTX::BFE_U32rii; 4975 } 4976 } else if (Val.getValueType() == MVT::i64) { 4977 if (IsSigned) { 4978 Opc = NVPTX::BFE_S64rii; 4979 } else { 4980 Opc = NVPTX::BFE_U64rii; 4981 } 4982 } else { 4983 // We cannot handle this type 4984 return NULL; 4985 } 4986 4987 SDValue Ops[] = { 4988 Val, Start, Len 4989 }; 4990 4991 return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops); 4992} 4993 4994// SelectDirectAddr - Match a direct address for DAG. 4995// A direct address could be a globaladdress or externalsymbol. 4996bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { 4997 // Return true if TGA or ES. 4998 if (N.getOpcode() == ISD::TargetGlobalAddress || 4999 N.getOpcode() == ISD::TargetExternalSymbol) { 5000 Address = N; 5001 return true; 5002 } 5003 if (N.getOpcode() == NVPTXISD::Wrapper) { 5004 Address = N.getOperand(0); 5005 return true; 5006 } 5007 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { 5008 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue(); 5009 if (IID == Intrinsic::nvvm_ptr_gen_to_param) 5010 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam) 5011 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address)); 5012 } 5013 return false; 5014} 5015 5016// symbol+offset 5017bool NVPTXDAGToDAGISel::SelectADDRsi_imp( 5018 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 5019 if (Addr.getOpcode() == ISD::ADD) { 5020 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 5021 SDValue base = Addr.getOperand(0); 5022 if (SelectDirectAddr(base, Base)) { 5023 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 5024 mvt); 5025 return true; 5026 } 5027 } 5028 } 5029 return false; 5030} 5031 5032// symbol+offset 5033bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, 5034 SDValue &Base, SDValue &Offset) { 5035 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); 5036} 5037 5038// symbol+offset 5039bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, 5040 SDValue &Base, SDValue &Offset) { 5041 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); 5042} 5043 5044// register+offset 5045bool NVPTXDAGToDAGISel::SelectADDRri_imp( 5046 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 5047 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 5048 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 5049 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt); 5050 return true; 5051 } 5052 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 5053 Addr.getOpcode() == ISD::TargetGlobalAddress) 5054 return false; // direct calls. 5055 5056 if (Addr.getOpcode() == ISD::ADD) { 5057 if (SelectDirectAddr(Addr.getOperand(0), Addr)) { 5058 return false; 5059 } 5060 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 5061 if (FrameIndexSDNode *FIN = 5062 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) 5063 // Constant offset from frame ref. 5064 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 5065 else 5066 Base = Addr.getOperand(0); 5067 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 5068 mvt); 5069 return true; 5070 } 5071 } 5072 return false; 5073} 5074 5075// register+offset 5076bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, 5077 SDValue &Base, SDValue &Offset) { 5078 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); 5079} 5080 5081// register+offset 5082bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, 5083 SDValue &Base, SDValue &Offset) { 5084 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); 5085} 5086 5087bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, 5088 unsigned int spN) const { 5089 const Value *Src = nullptr; 5090 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { 5091 if (spN == 0 && mN->getMemOperand()->getPseudoValue()) 5092 return true; 5093 Src = mN->getMemOperand()->getValue(); 5094 } 5095 if (!Src) 5096 return false; 5097 if (auto *PT = dyn_cast<PointerType>(Src->getType())) 5098 return (PT->getAddressSpace() == spN); 5099 return false; 5100} 5101 5102/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 5103/// inline asm expressions. 5104bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( 5105 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 5106 SDValue Op0, Op1; 5107 switch (ConstraintID) { 5108 default: 5109 return true; 5110 case InlineAsm::Constraint_m: // memory 5111 if (SelectDirectAddr(Op, Op0)) { 5112 OutOps.push_back(Op0); 5113 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); 5114 return false; 5115 } 5116 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { 5117 OutOps.push_back(Op0); 5118 OutOps.push_back(Op1); 5119 return false; 5120 } 5121 break; 5122 } 5123 return true; 5124} 5125