NVPTXIntrinsics.td revision cd81d94322a39503e4a3e87b6ee03d4fcb3465fb
1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10def immFloat0 : PatLeaf<(fpimm), [{ 11 float f = (float)N->getValueAPF().convertToFloat(); 12 return (f==0.0f); 13}]>; 14 15def immFloat1 : PatLeaf<(fpimm), [{ 16 float f = (float)N->getValueAPF().convertToFloat(); 17 return (f==1.0f); 18}]>; 19 20def immDouble0 : PatLeaf<(fpimm), [{ 21 double d = (double)N->getValueAPF().convertToDouble(); 22 return (d==0.0); 23}]>; 24 25def immDouble1 : PatLeaf<(fpimm), [{ 26 double d = (double)N->getValueAPF().convertToDouble(); 27 return (d==1.0); 28}]>; 29 30 31 32//----------------------------------- 33// Synchronization Functions 34//----------------------------------- 35def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins), 36 "bar.sync \t0;", 37 [(int_cuda_syncthreads)]>; 38def INT_BARRIER0 : NVPTXInst<(outs), (ins), 39 "bar.sync \t0;", 40 [(int_nvvm_barrier0)]>; 41def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 42 !strconcat("{{ \n\t", 43 !strconcat(".reg .pred \t%p1; \n\t", 44 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", 45 !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 46 !strconcat("}}", ""))))), 47 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 48def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 49 !strconcat("{{ \n\t", 50 !strconcat(".reg .pred \t%p1; \n\t", 51 !strconcat(".reg .pred \t%p2; \n\t", 52 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", 53 !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t", 54 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", 55 !strconcat("}}", ""))))))), 56 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 57def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 58 !strconcat("{{ \n\t", 59 !strconcat(".reg .pred \t%p1; \n\t", 60 !strconcat(".reg .pred \t%p2; \n\t", 61 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", 62 !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t", 63 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", 64 !strconcat("}}", ""))))))), 65 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 66 67 68//----------------------------------- 69// Explicit Memory Fence Functions 70//----------------------------------- 71class MEMBAR<string StrOp, Intrinsic IntOP> : 72 NVPTXInst<(outs), (ins), 73 StrOp, [(IntOP)]>; 74 75def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 76def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 77def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 78 79 80//----------------------------------- 81// Math Functions 82//----------------------------------- 83 84// Map min(1.0, max(0.0, x)) to sat(x) 85// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 86// NaN 87// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 88// Same story for fmax, fmin. 89 90def : Pat<(int_nvvm_fmin_f immFloat1, 91 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 92 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 93def : Pat<(int_nvvm_fmin_f immFloat1, 94 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 95 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 96def : Pat<(int_nvvm_fmin_f 97 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 98 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 99def : Pat<(int_nvvm_fmin_f 100 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 101 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 102 103def : Pat<(int_nvvm_fmin_d immDouble1, 104 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 105 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 106def : Pat<(int_nvvm_fmin_d immDouble1, 107 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 108 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 109def : Pat<(int_nvvm_fmin_d 110 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 111 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 112def : Pat<(int_nvvm_fmin_d 113 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 114 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 115 116 117// We need a full string for OpcStr here because we need to deal with case like 118// INT_PTX_RECIP. 119class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 120 NVPTXRegClass src_regclass, Intrinsic IntOP> 121 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 122 OpcStr, 123 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 124 125// We need a full string for OpcStr here because we need to deal with the case 126// like INT_PTX_NATIVE_POWR_F. 127class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 128 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 129 : NVPTXInst<(outs t_regclass:$dst), 130 (ins s0_regclass:$src0, s1_regclass:$src1), 131 OpcStr, 132 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 133 134class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 135 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 136 NVPTXRegClass s2_regclass, Intrinsic IntOP> 137 : NVPTXInst<(outs t_regclass:$dst), 138 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 139 OpcStr, 140 [(set t_regclass:$dst, 141 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 142 143// 144// MISC 145// 146 147def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs, 148 int_nvvm_clz_i>; 149def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs, 150 int_nvvm_clz_ll>; 151 152def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs, 153 int_nvvm_popc_i>; 154def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs, 155 int_nvvm_popc_ll>; 156 157def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 158 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 159 160// 161// Min Max 162// 163 164def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs, 165 Int32Regs, Int32Regs, int_nvvm_min_i>; 166def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs, 167 Int32Regs, Int32Regs, int_nvvm_min_ui>; 168 169def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs, 170 Int64Regs, Int64Regs, int_nvvm_min_ll>; 171def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs, 172 Int64Regs, Int64Regs, int_nvvm_min_ull>; 173 174def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs, 175 Int32Regs, Int32Regs, int_nvvm_max_i>; 176def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs, 177 Int32Regs, Int32Regs, int_nvvm_max_ui>; 178 179def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs, 180 Int64Regs, Int64Regs, int_nvvm_max_ll>; 181def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs, 182 Int64Regs, Int64Regs, int_nvvm_max_ull>; 183 184def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 185 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 186def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 187 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 188 189def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 190 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 191def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 192 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 193 194def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 195 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 196def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 197 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 198 199// 200// Multiplication 201// 202 203def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 204 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 205def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 206 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 207 208def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 209 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 210def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 211 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 212 213def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 214 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 215def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 216 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 217def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 218 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 219def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 220 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 221def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 222 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 223def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 224 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 225def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 226 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 227def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 228 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 229 230def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 231 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 232def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 233 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 234def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 235 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 236def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 237 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 238 239def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 240 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 241def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 242 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 243 244// 245// Div 246// 247 248def INT_NVVM_DIV_APPROX_FTZ_F 249 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 250 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 251def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 252 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 253 254def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 255 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 256def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 257 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 258def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 259 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 260def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 261 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 262def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 263 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 264def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 265 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 266def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 267 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 268def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 269 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 270 271def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 272 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 273def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 274 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 275def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 276 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 277def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 278 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 279 280// 281// Brev 282// 283 284def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs, 285 int_nvvm_brev32>; 286def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs, 287 int_nvvm_brev64>; 288 289// 290// Sad 291// 292 293def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 294 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 295def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 296 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 297 298// 299// Floor Ceil 300// 301 302def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 303 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 304def : Pat<(int_nvvm_floor_f Float32Regs:$a), 305 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 306def : Pat<(int_nvvm_floor_d Float64Regs:$a), 307 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 308 309def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 310 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 311def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 312 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 313def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 314 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 315 316// 317// Abs 318// 319 320def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs, 321 int_nvvm_abs_i>; 322def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs, 323 int_nvvm_abs_ll>; 324 325def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 326 Float32Regs, int_nvvm_fabs_ftz_f>; 327def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 328 Float32Regs, int_nvvm_fabs_f>; 329 330def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 331 Float64Regs, int_nvvm_fabs_d>; 332 333// 334// Round 335// 336 337def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 338 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 339def : Pat<(int_nvvm_round_f Float32Regs:$a), 340 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 341def : Pat<(int_nvvm_round_d Float64Regs:$a), 342 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 343 344// 345// Trunc 346// 347 348def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 349 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 350def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 351 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 352def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 353 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 354 355// 356// Saturate 357// 358 359def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 360 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 361def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 362 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 363def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 364 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 365 366// 367// Exp2 Log2 368// 369 370def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 371 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 372def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 373 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 374def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 375 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 376 377def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 378 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 379def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 380 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 381def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 382 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 383 384// 385// Sin Cos 386// 387 388def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 389 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 390def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 391 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 392 393def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 394 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 395def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 396 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 397 398// 399// Fma 400// 401 402def INT_NVVM_FMA_RN_FTZ_F 403 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 404 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 405def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 406 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 407def INT_NVVM_FMA_RZ_FTZ_F 408 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 409 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 410def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 411 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 412def INT_NVVM_FMA_RM_FTZ_F 413 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 414 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 415def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 416 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 417def INT_NVVM_FMA_RP_FTZ_F 418 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 419 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 420def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 421 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 422 423def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 424 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 425def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 426 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 427def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 428 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 429def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 430 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 431 432// 433// Rcp 434// 435 436def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 437 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 438def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 439 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 440def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 441 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 442def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 443 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 444def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 445 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 446def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 447 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 448def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 449 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 450def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 451 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 452 453def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 454 Float64Regs, int_nvvm_rcp_rn_d>; 455def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 456 Float64Regs, int_nvvm_rcp_rz_d>; 457def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 458 Float64Regs, int_nvvm_rcp_rm_d>; 459def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 460 Float64Regs, int_nvvm_rcp_rp_d>; 461 462def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 463 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 464 465// 466// Sqrt 467// 468 469def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 470 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 471def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 472 Float32Regs, int_nvvm_sqrt_rn_f>; 473def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 474 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 475def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 476 Float32Regs, int_nvvm_sqrt_rz_f>; 477def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 478 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 479def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 480 Float32Regs, int_nvvm_sqrt_rm_f>; 481def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 482 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 483def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 484 Float32Regs, int_nvvm_sqrt_rp_f>; 485def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 486 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 487def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 488 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 489 490def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 491 Float64Regs, int_nvvm_sqrt_rn_d>; 492def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 493 Float64Regs, int_nvvm_sqrt_rz_d>; 494def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 495 Float64Regs, int_nvvm_sqrt_rm_d>; 496def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 497 Float64Regs, int_nvvm_sqrt_rp_d>; 498 499// nvvm_sqrt intrinsic 500def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 501 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 502def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 503 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 504def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 505 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 506def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 507 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 508 509// 510// Rsqrt 511// 512 513def INT_NVVM_RSQRT_APPROX_FTZ_F 514 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 515 int_nvvm_rsqrt_approx_ftz_f>; 516def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 517 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 518def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 519 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 520 521// 522// Add 523// 524 525def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 526 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 527def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 528 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 529def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 530 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 531def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 532 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 533def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 534 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 535def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 536 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 537def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 538 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 539def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 540 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 541 542def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 543 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 544def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 545 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 546def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 547 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 548def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 549 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 550 551// 552// Convert 553// 554 555def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 556 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 557def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 558 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 559def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 560 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 561def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 562 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 563def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 564 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 565def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 566 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 567def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 568 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 569def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 570 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 571 572def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 573 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 574def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 575 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 576def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 577 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 578def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 579 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 580 581def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 582 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 583def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 584 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 585def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 586 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 587def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 588 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 589 590def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 591 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 592def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 593 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 594def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 595 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 596def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 597 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 598 599def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 600 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 601def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 602 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 603def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 604 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 605def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 606 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 607 608def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 609 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 610def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 611 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 612def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 613 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 614def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 615 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 616def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 617 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 618def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 619 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 620def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 621 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 622def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 623 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 624 625def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 626 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 627def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 628 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 629def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 630 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 631def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 632 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 633def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 634 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 635def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 636 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 637def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 638 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 639def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 640 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 641 642def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 643 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 644def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 645 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 646def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 647 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 648def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 649 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 650 651def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 652 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 653def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 654 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 655def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 656 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 657def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 658 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 659 660def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 661 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 662 663def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t", 664 !strconcat(".reg .b32 %temp; \n\t", 665 !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t", 666 "}}"))), 667 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 668def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t", 669 !strconcat(".reg .b32 %temp; \n\t", 670 !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t", 671 "}}"))), 672 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 673 674def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 675 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 676def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 677 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 678def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 679 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 680def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 681 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 682def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 683 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 684def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 685 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 686def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 687 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 688def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 689 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 690 691def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 692 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 693def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 694 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 695def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 696 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 697def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 698 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 699def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 700 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 701def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 702 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 703def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 704 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 705def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 706 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 707 708def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 709 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 710def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 711 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 712def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 713 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 714def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 715 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 716 717def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 718 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 719def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 720 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 721def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 722 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 723def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 724 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 725 726def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 727 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 728def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 729 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 730def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 731 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 732def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 733 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 734 735def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 736 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 737def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 738 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 739def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 740 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 741def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 742 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 743 744def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 745 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 746def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 747 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 748def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 749 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 750def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 751 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 752 753def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 754 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 755def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 756 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 757def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 758 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 759def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 760 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 761 762 763// FIXME: Ideally, we could use these patterns instead of the scope-creating 764// patterns, but ptxas does not like these since .s16 is not compatible with 765// .f16. The solution is to use .bXX for all integer register types, but we 766// are not there yet. 767//def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 768// (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; 769//def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 770// (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 771// 772//def : Pat<(int_nvvm_h2f Int16Regs:$a), 773// (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; 774 775def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t", 776 !strconcat(".reg .b16 %temp;\n\t", 777 !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t", 778 !strconcat("mov.b16 \t$dst, %temp;\n", 779 "}}")))), 780 Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>; 781def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t", 782 !strconcat(".reg .b16 %temp;\n\t", 783 !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t", 784 !strconcat("mov.b16 \t$dst, %temp;\n", 785 "}}")))), 786 Int16Regs, Float32Regs, int_nvvm_f2h_rn>; 787 788def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t", 789 !strconcat(".reg .b16 %temp;\n\t", 790 !strconcat("mov.b16 \t%temp, $src0;\n\t", 791 !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t", 792 "}}")))), 793 Float32Regs, Int16Regs, int_nvvm_h2f>; 794 795def : Pat<(f32 (f16_to_f32 Int16Regs:$a)), 796 (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; 797def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), 798 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; 799def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), 800 (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 801 802// 803// Bitcast 804// 805 806def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 807 Float32Regs, int_nvvm_bitcast_f2i>; 808def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 809 Int32Regs, int_nvvm_bitcast_i2f>; 810 811def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 812 Int64Regs, int_nvvm_bitcast_ll2d>; 813def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 814 Float64Regs, int_nvvm_bitcast_d2ll>; 815 816//----------------------------------- 817// Atomic Functions 818//----------------------------------- 819 820class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 821 : PatFrag<ops, frag, [{ 822 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 823}]>; 824class ATOMIC_SHARED_CHK <dag ops, dag frag> 825 : PatFrag<ops, frag, [{ 826 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 827}]>; 828class ATOMIC_GENERIC_CHK <dag ops, dag frag> 829 : PatFrag<ops, frag, [{ 830 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 831}]>; 832 833multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 834 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 835 Operand IMMType, SDNode IMM, Predicate Pred> { 836 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 837 !strconcat("atom", 838 !strconcat(SpaceStr, 839 !strconcat(OpcStr, 840 !strconcat(TypeStr, 841 !strconcat(" \t$dst, [$addr], $b;", ""))))), 842 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 843 Requires<[Pred]>; 844 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 845 !strconcat("atom", 846 !strconcat(SpaceStr, 847 !strconcat(OpcStr, 848 !strconcat(TypeStr, 849 !strconcat(" \t$dst, [$addr], $b;", ""))))), 850 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 851 Requires<[Pred]>; 852} 853multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 854 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> { 855 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 856 IntOp, IMMType, IMM, Pred>; 857 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 858 IntOp, IMMType, IMM, Pred>; 859} 860 861// has 2 operands, neg the second one 862multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 863 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 864 Operand IMMType, Predicate Pred> { 865 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 866 !strconcat("{{ \n\t", 867 !strconcat(".reg \t.s", 868 !strconcat(TypeStr, 869 !strconcat(" temp; \n\t", 870 !strconcat("neg.s", 871 !strconcat(TypeStr, 872 !strconcat(" \ttemp, $b; \n\t", 873 !strconcat("atom", 874 !strconcat(SpaceStr, 875 !strconcat(OpcStr, 876 !strconcat(".u", 877 !strconcat(TypeStr, 878 !strconcat(" \t$dst, [$addr], temp; \n\t", 879 !strconcat("}}", "")))))))))))))), 880 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 881 Requires<[Pred]>; 882} 883multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 884 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, 885 Predicate Pred> { 886 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 887 IntOp, IMMType, Pred> ; 888 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 889 IntOp, IMMType, Pred> ; 890} 891 892// has 3 operands 893multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 894 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 895 Operand IMMType, Predicate Pred> { 896 def reg : NVPTXInst<(outs regclass:$dst), 897 (ins ptrclass:$addr, regclass:$b, regclass:$c), 898 !strconcat("atom", 899 !strconcat(SpaceStr, 900 !strconcat(OpcStr, 901 !strconcat(TypeStr, 902 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 903 [(set regclass:$dst, 904 (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 905 Requires<[Pred]>; 906 def imm1 : NVPTXInst<(outs regclass:$dst), 907 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 908 !strconcat("atom", 909 !strconcat(SpaceStr, 910 !strconcat(OpcStr, 911 !strconcat(TypeStr, 912 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 913 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 914 Requires<[Pred]>; 915 def imm2 : NVPTXInst<(outs regclass:$dst), 916 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 917 !strconcat("atom", 918 !strconcat(SpaceStr, 919 !strconcat(OpcStr, 920 !strconcat(TypeStr, 921 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 922 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 923 Requires<[Pred]>; 924 def imm3 : NVPTXInst<(outs regclass:$dst), 925 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 926 !strconcat("atom", 927 !strconcat(SpaceStr, 928 !strconcat(OpcStr, 929 !strconcat(TypeStr, 930 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 931 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 932 Requires<[Pred]>; 933} 934multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 935 string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> { 936 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 937 IntOp, IMMType, Pred>; 938 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 939 IntOp, IMMType, Pred>; 940} 941 942// atom_add 943 944def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 945 (atomic_load_add_32 node:$a, node:$b)>; 946def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 947 (atomic_load_add_32 node:$a, node:$b)>; 948def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 949 (atomic_load_add_32 node:$a, node:$b)>; 950def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 951 (atomic_load_add_64 node:$a, node:$b)>; 952def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 953 (atomic_load_add_64 node:$a, node:$b)>; 954def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 955 (atomic_load_add_64 node:$a, node:$b)>; 956def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 957 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 958def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 959 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 960def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 961 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 962 963defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 964 atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>; 965defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 966 atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>; 967defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 968 atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>; 969defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 970 ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>; 971 972defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 973 atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>; 974defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 975 atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>; 976defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 977 atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>; 978defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 979 ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>; 980 981defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 982 atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>; 983defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 984 atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>; 985defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 986 atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>; 987 988// atom_sub 989 990def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 991 (atomic_load_sub_32 node:$a, node:$b)>; 992def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 993 (atomic_load_sub_32 node:$a, node:$b)>; 994def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 995 (atomic_load_sub_32 node:$a, node:$b)>; 996def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 997 (atomic_load_sub_64 node:$a, node:$b)>; 998def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 999 (atomic_load_sub_64 node:$a, node:$b)>; 1000def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1001 (atomic_load_sub_64 node:$a, node:$b)>; 1002 1003defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1004 atomic_load_sub_32_g, i32imm, hasAtomRedG32>; 1005defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1006 atomic_load_sub_64_g, i64imm, hasAtomRedG64>; 1007defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1008 atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>; 1009defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1010 ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>; 1011defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1012 atomic_load_sub_32_s, i32imm, hasAtomRedS32>; 1013defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1014 atomic_load_sub_64_s, i64imm, hasAtomRedS64>; 1015defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1016 atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>; 1017defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1018 ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>; 1019 1020// atom_swap 1021 1022def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1023 (atomic_swap_32 node:$a, node:$b)>; 1024def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1025 (atomic_swap_32 node:$a, node:$b)>; 1026def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1027 (atomic_swap_32 node:$a, node:$b)>; 1028def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1029 (atomic_swap_64 node:$a, node:$b)>; 1030def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1031 (atomic_swap_64 node:$a, node:$b)>; 1032def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1033 (atomic_swap_64 node:$a, node:$b)>; 1034 1035defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1036 atomic_swap_32_g, i32imm, imm, hasAtomRedG32>; 1037defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1038 atomic_swap_32_s, i32imm, imm, hasAtomRedS32>; 1039defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1040 atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>; 1041defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1042 ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1043defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1044 atomic_swap_64_g, i64imm, imm, hasAtomRedG64>; 1045defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1046 atomic_swap_64_s, i64imm, imm, hasAtomRedS64>; 1047defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1048 atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>; 1049defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1050 ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1051 1052// atom_max 1053 1054def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1055 , (atomic_load_max_32 node:$a, node:$b)>; 1056def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1057 (atomic_load_max_32 node:$a, node:$b)>; 1058def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1059 (atomic_load_max_32 node:$a, node:$b)>; 1060def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1061 , (atomic_load_max_64 node:$a, node:$b)>; 1062def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1063 (atomic_load_max_64 node:$a, node:$b)>; 1064def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1065 (atomic_load_max_64 node:$a, node:$b)>; 1066def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1067 (atomic_load_umax_32 node:$a, node:$b)>; 1068def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1069 (atomic_load_umax_32 node:$a, node:$b)>; 1070def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1071 (atomic_load_umax_32 node:$a, node:$b)>; 1072def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1073 (atomic_load_umax_64 node:$a, node:$b)>; 1074def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1075 (atomic_load_umax_64 node:$a, node:$b)>; 1076def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1077 (atomic_load_umax_64 node:$a, node:$b)>; 1078 1079defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1080 ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>; 1081defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1082 ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>; 1083defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1084 atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>; 1085defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1086 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1087defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1088 ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>; 1089defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1090 ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>; 1091defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1092 atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>; 1093defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1094 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1095defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1096 ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>; 1097defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1098 ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>; 1099defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1100 atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>; 1101defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1102 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1103defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1104 ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>; 1105defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1106 ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>; 1107defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1108 atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>; 1109defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1110 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1111 1112// atom_min 1113 1114def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1115 (atomic_load_min_32 node:$a, node:$b)>; 1116def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1117 (atomic_load_min_32 node:$a, node:$b)>; 1118def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1119 (atomic_load_min_32 node:$a, node:$b)>; 1120def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1121 (atomic_load_min_64 node:$a, node:$b)>; 1122def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1123 (atomic_load_min_64 node:$a, node:$b)>; 1124def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1125 (atomic_load_min_64 node:$a, node:$b)>; 1126def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1127 (atomic_load_umin_32 node:$a, node:$b)>; 1128def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1129 (atomic_load_umin_32 node:$a, node:$b)>; 1130def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1131 (atomic_load_umin_32 node:$a, node:$b)>; 1132def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1133 (atomic_load_umin_64 node:$a, node:$b)>; 1134def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1135 (atomic_load_umin_64 node:$a, node:$b)>; 1136def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1137 (atomic_load_umin_64 node:$a, node:$b)>; 1138 1139defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1140 ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>; 1141defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1142 ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>; 1143defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1144 atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>; 1145defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1146 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1147defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1148 ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>; 1149defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1150 ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>; 1151defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1152 atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>; 1153defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1154 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1155defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1156 ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>; 1157defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1158 ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>; 1159defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1160 atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>; 1161defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1162 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1163defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1164 ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>; 1165defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1166 ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>; 1167defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1168 atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>; 1169defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1170 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1171 1172// atom_inc atom_dec 1173 1174def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1175 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1176def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1177 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1178def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1179 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1180def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1181 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1182def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1183 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1184def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1185 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1186 1187defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1188 atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>; 1189defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1190 atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>; 1191defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1192 atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>; 1193defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1194 ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1195defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1196 atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>; 1197defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1198 atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>; 1199defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1200 atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>; 1201defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1202 ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1203 1204// atom_and 1205 1206def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1207 (atomic_load_and_32 node:$a, node:$b)>; 1208def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1209 (atomic_load_and_32 node:$a, node:$b)>; 1210def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1211 (atomic_load_and_32 node:$a, node:$b)>; 1212def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1213 (atomic_load_and_64 node:$a, node:$b)>; 1214def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1215 (atomic_load_and_64 node:$a, node:$b)>; 1216def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1217 (atomic_load_and_64 node:$a, node:$b)>; 1218 1219defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1220 atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>; 1221defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1222 atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>; 1223defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1224 atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>; 1225defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1226 ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1227defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1228 atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>; 1229defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1230 atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>; 1231defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1232 atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>; 1233defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1234 ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1235 1236// atom_or 1237 1238def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1239 (atomic_load_or_32 node:$a, node:$b)>; 1240def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1241 (atomic_load_or_32 node:$a, node:$b)>; 1242def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1243 (atomic_load_or_32 node:$a, node:$b)>; 1244def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1245 (atomic_load_or_64 node:$a, node:$b)>; 1246def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1247 (atomic_load_or_64 node:$a, node:$b)>; 1248def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1249 (atomic_load_or_64 node:$a, node:$b)>; 1250 1251defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1252 atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>; 1253defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1254 atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>; 1255defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1256 ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1257defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1258 atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>; 1259defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1260 atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>; 1261defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1262 atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>; 1263defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1264 ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1265defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1266 atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>; 1267 1268// atom_xor 1269 1270def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1271 (atomic_load_xor_32 node:$a, node:$b)>; 1272def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1273 (atomic_load_xor_32 node:$a, node:$b)>; 1274def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1275 (atomic_load_xor_32 node:$a, node:$b)>; 1276def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1277 (atomic_load_xor_64 node:$a, node:$b)>; 1278def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1279 (atomic_load_xor_64 node:$a, node:$b)>; 1280def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1281 (atomic_load_xor_64 node:$a, node:$b)>; 1282 1283defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1284 atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>; 1285defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1286 atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>; 1287defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1288 atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>; 1289defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1290 ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1291defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1292 atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>; 1293defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1294 atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>; 1295defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1296 atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>; 1297defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1298 ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1299 1300// atom_cas 1301 1302def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1303 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1304def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1305 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1306def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1307 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1308def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1309 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1310def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1311 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1312def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1313 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1314 1315defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1316 atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>; 1317defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1318 atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>; 1319defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1320 atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>; 1321defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1322 ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>; 1323defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1324 atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>; 1325defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1326 atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>; 1327defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1328 atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>; 1329defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1330 ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>; 1331 1332 1333//----------------------------------- 1334// Read Special Registers 1335//----------------------------------- 1336class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> : 1337 NVPTXInst<(outs regclassOut:$dst), (ins), 1338 OpStr, 1339 [(set regclassOut:$dst, (IntOp))]>; 1340 1341def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs, 1342 int_nvvm_read_ptx_sreg_tid_x>; 1343def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs, 1344 int_nvvm_read_ptx_sreg_tid_y>; 1345def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs, 1346 int_nvvm_read_ptx_sreg_tid_z>; 1347 1348def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs, 1349 int_nvvm_read_ptx_sreg_ntid_x>; 1350def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs, 1351 int_nvvm_read_ptx_sreg_ntid_y>; 1352def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs, 1353 int_nvvm_read_ptx_sreg_ntid_z>; 1354 1355def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs, 1356 int_nvvm_read_ptx_sreg_ctaid_x>; 1357def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs, 1358 int_nvvm_read_ptx_sreg_ctaid_y>; 1359def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs, 1360 int_nvvm_read_ptx_sreg_ctaid_z>; 1361 1362def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs, 1363 int_nvvm_read_ptx_sreg_nctaid_x>; 1364def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs, 1365 int_nvvm_read_ptx_sreg_nctaid_y>; 1366def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs, 1367 int_nvvm_read_ptx_sreg_nctaid_z>; 1368 1369def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs, 1370 int_nvvm_read_ptx_sreg_warpsize>; 1371 1372 1373//----------------------------------- 1374// Support for ldu on sm_20 or later 1375//----------------------------------- 1376 1377// Scalar 1378multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 1379 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1380 !strconcat("ldu.global.", TyStr), 1381 []>, Requires<[hasLDU]>; 1382 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1383 !strconcat("ldu.global.", TyStr), 1384 []>, Requires<[hasLDU]>; 1385 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1386 !strconcat("ldu.global.", TyStr), 1387 []>, Requires<[hasLDU]>; 1388 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1389 !strconcat("ldu.global.", TyStr), 1390 []>, Requires<[hasLDU]>; 1391 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1392 !strconcat("ldu.global.", TyStr), 1393 []>, Requires<[hasLDU]>; 1394} 1395 1396defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 1397defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 1398defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1399defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1400defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 1401defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 1402defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1403defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1404 1405// vector 1406 1407// Elementized vector ldu 1408multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1409 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1410 (ins Int32Regs:$src), 1411 !strconcat("ldu.global.", TyStr), []>; 1412 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1413 (ins Int64Regs:$src), 1414 !strconcat("ldu.global.", TyStr), []>; 1415 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1416 (ins MEMri:$src), 1417 !strconcat("ldu.global.", TyStr), []>; 1418 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1419 (ins MEMri64:$src), 1420 !strconcat("ldu.global.", TyStr), []>; 1421 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1422 (ins imemAny:$src), 1423 !strconcat("ldu.global.", TyStr), []>; 1424} 1425 1426multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1427 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1428 regclass:$dst4), (ins Int32Regs:$src), 1429 !strconcat("ldu.global.", TyStr), []>; 1430 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1431 regclass:$dst4), (ins Int64Regs:$src), 1432 !strconcat("ldu.global.", TyStr), []>; 1433 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1434 regclass:$dst4), (ins MEMri:$src), 1435 !strconcat("ldu.global.", TyStr), []>; 1436 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1437 regclass:$dst4), (ins MEMri64:$src), 1438 !strconcat("ldu.global.", TyStr), []>; 1439 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1440 regclass:$dst4), (ins imemAny:$src), 1441 !strconcat("ldu.global.", TyStr), []>; 1442} 1443 1444defm INT_PTX_LDU_G_v2i8_ELE 1445 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1446defm INT_PTX_LDU_G_v2i16_ELE 1447 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1448defm INT_PTX_LDU_G_v2i32_ELE 1449 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1450defm INT_PTX_LDU_G_v2f32_ELE 1451 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1452defm INT_PTX_LDU_G_v2i64_ELE 1453 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1454defm INT_PTX_LDU_G_v2f64_ELE 1455 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1456defm INT_PTX_LDU_G_v4i8_ELE 1457 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1458defm INT_PTX_LDU_G_v4i16_ELE 1459 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1460 Int16Regs>; 1461defm INT_PTX_LDU_G_v4i32_ELE 1462 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1463 Int32Regs>; 1464defm INT_PTX_LDU_G_v4f32_ELE 1465 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1466 Float32Regs>; 1467 1468 1469//----------------------------------- 1470// Support for ldg on sm_35 or later 1471//----------------------------------- 1472 1473multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 1474 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1475 !strconcat("ld.global.nc.", TyStr), 1476 []>, Requires<[hasLDG]>; 1477 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1478 !strconcat("ld.global.nc.", TyStr), 1479 []>, Requires<[hasLDG]>; 1480 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1481 !strconcat("ld.global.nc.", TyStr), 1482 []>, Requires<[hasLDG]>; 1483 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1484 !strconcat("ld.global.nc.", TyStr), 1485 []>, Requires<[hasLDG]>; 1486 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1487 !strconcat("ld.global.nc.", TyStr), 1488 []>, Requires<[hasLDG]>; 1489} 1490 1491defm INT_PTX_LDG_GLOBAL_i8 1492 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 1493defm INT_PTX_LDG_GLOBAL_i16 1494 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 1495defm INT_PTX_LDG_GLOBAL_i32 1496 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 1497defm INT_PTX_LDG_GLOBAL_i64 1498 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 1499defm INT_PTX_LDG_GLOBAL_f32 1500 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 1501defm INT_PTX_LDG_GLOBAL_f64 1502 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 1503defm INT_PTX_LDG_GLOBAL_p32 1504 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 1505defm INT_PTX_LDG_GLOBAL_p64 1506 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 1507 1508// vector 1509 1510// Elementized vector ldg 1511multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1512 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1513 (ins Int32Regs:$src), 1514 !strconcat("ld.global.nc.", TyStr), []>; 1515 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1516 (ins Int64Regs:$src), 1517 !strconcat("ld.global.nc.", TyStr), []>; 1518 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1519 (ins MEMri:$src), 1520 !strconcat("ld.global.nc.", TyStr), []>; 1521 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1522 (ins MEMri64:$src), 1523 !strconcat("ld.global.nc.", TyStr), []>; 1524 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1525 (ins imemAny:$src), 1526 !strconcat("ld.global.nc.", TyStr), []>; 1527} 1528 1529multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1530 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1531 regclass:$dst4), (ins Int32Regs:$src), 1532 !strconcat("ld.global.nc.", TyStr), []>; 1533 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1534 regclass:$dst4), (ins Int64Regs:$src), 1535 !strconcat("ld.global.nc.", TyStr), []>; 1536 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1537 regclass:$dst4), (ins MEMri:$src), 1538 !strconcat("ld.global.nc.", TyStr), []>; 1539 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1540 regclass:$dst4), (ins MEMri64:$src), 1541 !strconcat("ld.global.nc.", TyStr), []>; 1542 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1543 regclass:$dst4), (ins imemAny:$src), 1544 !strconcat("ld.global.nc.", TyStr), []>; 1545} 1546 1547// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 1548defm INT_PTX_LDG_G_v2i8_ELE 1549 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1550defm INT_PTX_LDG_G_v2i16_ELE 1551 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1552defm INT_PTX_LDG_G_v2i32_ELE 1553 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1554defm INT_PTX_LDG_G_v2f32_ELE 1555 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1556defm INT_PTX_LDG_G_v2i64_ELE 1557 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1558defm INT_PTX_LDG_G_v2f64_ELE 1559 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1560defm INT_PTX_LDG_G_v4i8_ELE 1561 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1562defm INT_PTX_LDG_G_v4i16_ELE 1563 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1564defm INT_PTX_LDG_G_v4i32_ELE 1565 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 1566defm INT_PTX_LDG_G_v4f32_ELE 1567 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 1568 1569 1570multiclass NG_TO_G<string Str, Intrinsic Intrin> { 1571 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1572 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), 1573 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, 1574 Requires<[hasGenericLdSt]>; 1575 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1576 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), 1577 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, 1578 Requires<[hasGenericLdSt]>; 1579 1580// @TODO: Are these actually needed? I believe global addresses will be copied 1581// to register values anyway. 1582 /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src), 1583 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), 1584 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, 1585 Requires<[hasGenericLdSt]>; 1586 def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src), 1587 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), 1588 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, 1589 Requires<[hasGenericLdSt]>;*/ 1590 1591 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1592 "mov.u32 \t$result, $src;", 1593 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1594 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1595 "mov.u64 \t$result, $src;", 1596 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1597 1598// @TODO: Are these actually needed? I believe global addresses will be copied 1599// to register values anyway. 1600 /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src), 1601 "mov.u32 \t$result, $src;", 1602 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>; 1603 def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 1604 "mov.u64 \t$result, $src;", 1605 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/ 1606} 1607 1608multiclass G_TO_NG<string Str, Intrinsic Intrin> { 1609 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1610 !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")), 1611 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, 1612 Requires<[hasGenericLdSt]>; 1613 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1614 !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")), 1615 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, 1616 Requires<[hasGenericLdSt]>; 1617 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1618 "mov.u32 \t$result, $src;", 1619 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1620 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1621 "mov.u64 \t$result, $src;", 1622 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1623} 1624 1625defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 1626defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 1627defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 1628defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 1629 1630defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 1631defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 1632defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 1633defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 1634 1635 1636// nvvm.ptr.gen.to.param 1637def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 1638 (ins Int32Regs:$src), 1639 "mov.u32 \t$result, $src;", 1640 [(set Int32Regs:$result, 1641 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 1642def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 1643 (ins Int64Regs:$src), 1644 "mov.u64 \t$result, $src;", 1645 [(set Int64Regs:$result, 1646 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 1647 1648 1649// nvvm.move intrinsicc 1650def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 1651 "mov.b16 \t$r, $s;", 1652 [(set Int16Regs:$r, 1653 (int_nvvm_move_i16 Int16Regs:$s))]>; 1654def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 1655 "mov.b32 \t$r, $s;", 1656 [(set Int32Regs:$r, 1657 (int_nvvm_move_i32 Int32Regs:$s))]>; 1658def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 1659 "mov.b64 \t$r, $s;", 1660 [(set Int64Regs:$r, 1661 (int_nvvm_move_i64 Int64Regs:$s))]>; 1662def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 1663 "mov.f32 \t$r, $s;", 1664 [(set Float32Regs:$r, 1665 (int_nvvm_move_float Float32Regs:$s))]>; 1666def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 1667 "mov.f64 \t$r, $s;", 1668 [(set Float64Regs:$r, 1669 (int_nvvm_move_double Float64Regs:$s))]>; 1670def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 1671 "mov.u32 \t$r, $s;", 1672 [(set Int32Regs:$r, 1673 (int_nvvm_move_ptr Int32Regs:$s))]>; 1674def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 1675 "mov.u64 \t$r, $s;", 1676 [(set Int64Regs:$r, 1677 (int_nvvm_move_ptr Int64Regs:$s))]>; 1678 1679// @TODO: Are these actually needed, or will we always just see symbols 1680// copied to registers first? 1681/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 1682 "mov.u32 \t$r, $s;", 1683 [(set Int32Regs:$r, 1684 (int_nvvm_move_ptr texternalsym:$s))]>; 1685def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 1686 "mov.u64 \t$r, $s;", 1687 [(set Int64Regs:$r, 1688 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 1689 1690 1691// MoveParam %r1, param 1692// ptr_local_to_gen %r2, %r1 1693// ptr_gen_to_local %r3, %r2 1694// -> 1695// mov %r1, param 1696 1697// @TODO: Revisit this. There is a type 1698// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 1699// instructions are not currently defined. However, we can use the ptr 1700// variants and the asm printer will do the right thing. 1701def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 1702 (MoveParam texternalsym:$src)))), 1703 (nvvm_move_ptr64 texternalsym:$src)>; 1704def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 1705 (MoveParam texternalsym:$src)))), 1706 (nvvm_move_ptr32 texternalsym:$src)>; 1707 1708def texsurf_handles 1709 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 1710 "mov.u64 \t$result, $src;", []>; 1711 1712//----------------------------------- 1713// Compiler Error Warn 1714// - Just ignore them in codegen 1715//----------------------------------- 1716 1717def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 1718 "// llvm.nvvm.compiler.warn()", 1719 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 1720def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 1721 "// llvm.nvvm.compiler.warn()", 1722 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 1723def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 1724 "// llvm.nvvm.compiler.error()", 1725 [(int_nvvm_compiler_error Int32Regs:$a)]>; 1726def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 1727 "// llvm.nvvm.compiler.error()", 1728 [(int_nvvm_compiler_error Int64Regs:$a)]>; 1729 1730 1731// isspacep 1732 1733def ISSPACEP_CONST_32 1734 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 1735 "isspacep.const \t$d, $a;", 1736 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, 1737 Requires<[hasPTX31]>; 1738def ISSPACEP_CONST_64 1739 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 1740 "isspacep.const \t$d, $a;", 1741 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, 1742 Requires<[hasPTX31]>; 1743def ISSPACEP_GLOBAL_32 1744 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 1745 "isspacep.global \t$d, $a;", 1746 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; 1747def ISSPACEP_GLOBAL_64 1748 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 1749 "isspacep.global \t$d, $a;", 1750 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; 1751def ISSPACEP_LOCAL_32 1752 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 1753 "isspacep.local \t$d, $a;", 1754 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; 1755def ISSPACEP_LOCAL_64 1756 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 1757 "isspacep.local \t$d, $a;", 1758 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; 1759def ISSPACEP_SHARED_32 1760 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 1761 "isspacep.shared \t$d, $a;", 1762 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; 1763def ISSPACEP_SHARED_64 1764 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 1765 "isspacep.shared \t$d, $a;", 1766 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; 1767 1768 1769// Special register reads 1770def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 1771 (ins SpecialRegs:$r), 1772 "mov.b32\t$d, $r;", []>; 1773 1774def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 1775def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 1776def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 1777def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 1778def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 1779def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 1780def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 1781def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 1782def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 1783def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 1784def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 1785def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 1786def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 1787def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 1788def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 1789def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 1790def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 1791def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 1792def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 1793def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 1794def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 1795def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 1796def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 1797def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 1798def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 1799def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 1800def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 1801def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 1802def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 1803def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 1804def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 1805def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 1806 1807 1808// rotate builtin support 1809 1810def ROTATE_B32_HW_IMM 1811 : NVPTXInst<(outs Int32Regs:$dst), 1812 (ins Int32Regs:$src, i32imm:$amt), 1813 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 1814 [(set Int32Regs:$dst, 1815 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 1816 Requires<[hasHWROT32]> ; 1817 1818def ROTATE_B32_HW_REG 1819 : NVPTXInst<(outs Int32Regs:$dst), 1820 (ins Int32Regs:$src, Int32Regs:$amt), 1821 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 1822 [(set Int32Regs:$dst, 1823 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 1824 Requires<[hasHWROT32]> ; 1825 1826def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 1827 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 1828 Requires<[noHWROT32]> ; 1829 1830def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 1831 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 1832 Requires<[noHWROT32]> ; 1833 1834def GET_LO_INT64 1835 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 1836 !strconcat("{{\n\t", 1837 !strconcat(".reg .b32 %dummy;\n\t", 1838 !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t", 1839 !strconcat("}}", "")))), 1840 []> ; 1841 1842def GET_HI_INT64 1843 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 1844 !strconcat("{{\n\t", 1845 !strconcat(".reg .b32 %dummy;\n\t", 1846 !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t", 1847 !strconcat("}}", "")))), 1848 []> ; 1849 1850def PACK_TWO_INT32 1851 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 1852 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 1853 1854def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 1855 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 1856 (GET_LO_INT64 Int64Regs:$src))> ; 1857 1858// funnel shift, requires >= sm_32 1859def SHF_L_WRAP_B32_IMM 1860 : NVPTXInst<(outs Int32Regs:$dst), 1861 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 1862 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 1863 Requires<[hasHWROT32]>; 1864 1865def SHF_L_WRAP_B32_REG 1866 : NVPTXInst<(outs Int32Regs:$dst), 1867 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 1868 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 1869 Requires<[hasHWROT32]>; 1870 1871def SHF_R_WRAP_B32_IMM 1872 : NVPTXInst<(outs Int32Regs:$dst), 1873 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 1874 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 1875 Requires<[hasHWROT32]>; 1876 1877def SHF_R_WRAP_B32_REG 1878 : NVPTXInst<(outs Int32Regs:$dst), 1879 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 1880 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 1881 Requires<[hasHWROT32]>; 1882 1883// HW version of rotate 64 1884def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 1885 (PACK_TWO_INT32 1886 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 1887 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 1888 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 1889 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 1890 Requires<[hasHWROT32]>; 1891 1892def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 1893 (PACK_TWO_INT32 1894 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 1895 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 1896 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 1897 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 1898 Requires<[hasHWROT32]>; 1899 1900 1901def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 1902 (PACK_TWO_INT32 1903 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 1904 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 1905 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 1906 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 1907 Requires<[hasHWROT32]>; 1908 1909def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 1910 (PACK_TWO_INT32 1911 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 1912 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 1913 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 1914 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 1915 Requires<[hasHWROT32]>; 1916 1917// SW version of rotate 64 1918def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 1919 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 1920 Requires<[noHWROT32]>; 1921def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 1922 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 1923 Requires<[noHWROT32]>; 1924def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 1925 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 1926 Requires<[noHWROT32]>; 1927def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 1928 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 1929 Requires<[noHWROT32]>; 1930 1931 1932//----------------------------------- 1933// Texture Intrinsics 1934//----------------------------------- 1935 1936// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 1937// also defined in NVPTXReplaceImageHandles.cpp 1938 1939 1940// Texture fetch instructions using handles 1941def TEX_1D_F32_I32 1942 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 1943 Float32Regs:$b, Float32Regs:$a), 1944 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 1945 "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 1946 []>; 1947def TEX_1D_F32_F32 1948 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 1949 Float32Regs:$b, Float32Regs:$a), 1950 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 1951 "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 1952 []>; 1953def TEX_1D_F32_F32_LEVEL 1954 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 1955 Float32Regs:$b, Float32Regs:$a), 1956 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), 1957 "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 1958 "[$t, $s, \\{$x\\}], $lod;", 1959 []>; 1960def TEX_1D_F32_F32_GRAD 1961 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 1962 Float32Regs:$b, Float32Regs:$a), 1963 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 1964 Float32Regs:$gradx, Float32Regs:$grady), 1965 "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 1966 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 1967 []>; 1968def TEX_1D_I32_I32 1969 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 1970 Int32Regs:$b, Int32Regs:$a), 1971 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 1972 "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 1973 []>; 1974def TEX_1D_I32_F32 1975 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 1976 Int32Regs:$b, Int32Regs:$a), 1977 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 1978 "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 1979 []>; 1980def TEX_1D_I32_F32_LEVEL 1981 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 1982 Int32Regs:$b, Int32Regs:$a), 1983 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 1984 Float32Regs:$lod), 1985 "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 1986 "[$t, $s, \\{$x\\}], $lod;", 1987 []>; 1988def TEX_1D_I32_F32_GRAD 1989 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 1990 Int32Regs:$b, Int32Regs:$a), 1991 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 1992 Float32Regs:$gradx, Float32Regs:$grady), 1993 "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 1994 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 1995 []>; 1996 1997def TEX_1D_ARRAY_F32_I32 1998 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 1999 Float32Regs:$b, Float32Regs:$a), 2000 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2001 "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " 2002 "[$t, $s, \\{$l, $x\\}];", 2003 []>; 2004def TEX_1D_ARRAY_F32_F32 2005 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2006 Float32Regs:$b, Float32Regs:$a), 2007 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2008 "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2009 "[$t, $s, \\{$l, $x\\}];", 2010 []>; 2011def TEX_1D_ARRAY_F32_F32_LEVEL 2012 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2013 Float32Regs:$b, Float32Regs:$a), 2014 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2015 Float32Regs:$lod), 2016 "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2017 "[$t, $s, \\{$l, $x\\}], $lod;", 2018 []>; 2019def TEX_1D_ARRAY_F32_F32_GRAD 2020 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2021 Float32Regs:$b, Float32Regs:$a), 2022 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2023 Float32Regs:$gradx, Float32Regs:$grady), 2024 "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2025 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2026 []>; 2027def TEX_1D_ARRAY_I32_I32 2028 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2029 Int32Regs:$b, Int32Regs:$a), 2030 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2031 "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " 2032 "[$t, $s, \\{$l, $x\\}];", 2033 []>; 2034def TEX_1D_ARRAY_I32_F32 2035 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2036 Int32Regs:$b, Int32Regs:$a), 2037 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2038 "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2039 "[$t, $s, \\{$l, $x\\}];", 2040 []>; 2041def TEX_1D_ARRAY_I32_F32_LEVEL 2042 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2043 Int32Regs:$b, Int32Regs:$a), 2044 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2045 Float32Regs:$lod), 2046 "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2047 "[$t, $s, \\{$l, $x\\}], $lod;", 2048 []>; 2049def TEX_1D_ARRAY_I32_F32_GRAD 2050 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2051 Int32Regs:$b, Int32Regs:$a), 2052 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2053 Float32Regs:$gradx, Float32Regs:$grady), 2054 "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2055 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2056 []>; 2057 2058def TEX_2D_F32_I32 2059 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2060 Float32Regs:$b, Float32Regs:$a), 2061 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2062 "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " 2063 "[$t, $s, \\{$x, $y\\}];", 2064 []>; 2065def TEX_2D_F32_F32 2066 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2067 Float32Regs:$b, Float32Regs:$a), 2068 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2069 "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2070 "[$t, $s, \\{$x, $y\\}];", 2071 []>; 2072def TEX_2D_F32_F32_LEVEL 2073 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2074 Float32Regs:$b, Float32Regs:$a), 2075 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2076 Float32Regs:$lod), 2077 "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2078 "[$t, $s, \\{$x, $y\\}], $lod;", 2079 []>; 2080def TEX_2D_F32_F32_GRAD 2081 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2082 Float32Regs:$b, Float32Regs:$a), 2083 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2084 Float32Regs:$gradx0, Float32Regs:$gradx1, 2085 Float32Regs:$grady0, Float32Regs:$grady1), 2086 "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2087 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2088 "\\{$grady0, $grady1\\};", 2089 []>; 2090def TEX_2D_I32_I32 2091 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2092 Int32Regs:$b, Int32Regs:$a), 2093 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2094 "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " 2095 "[$t, $s, \\{$x, $y\\}];", 2096 []>; 2097def TEX_2D_I32_F32 2098 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2099 Int32Regs:$b, Int32Regs:$a), 2100 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2101 "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2102 "[$t, $s, \\{$x, $y\\}];", 2103 []>; 2104def TEX_2D_I32_F32_LEVEL 2105 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2106 Int32Regs:$b, Int32Regs:$a), 2107 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2108 Float32Regs:$lod), 2109 "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2110 "[$t, $s, \\{$x, $y\\}], $lod;", 2111 []>; 2112def TEX_2D_I32_F32_GRAD 2113 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2114 Int32Regs:$b, Int32Regs:$a), 2115 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2116 Float32Regs:$gradx0, Float32Regs:$gradx1, 2117 Float32Regs:$grady0, Float32Regs:$grady1), 2118 "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2119 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2120 "\\{$grady0, $grady1\\};", 2121 []>; 2122 2123def TEX_2D_ARRAY_F32_I32 2124 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2125 Float32Regs:$b, Float32Regs:$a), 2126 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2127 Int32Regs:$y), 2128 "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " 2129 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2130 []>; 2131def TEX_2D_ARRAY_F32_F32 2132 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2133 Float32Regs:$b, Float32Regs:$a), 2134 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2135 Float32Regs:$y), 2136 "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2137 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2138 []>; 2139def TEX_2D_ARRAY_F32_F32_LEVEL 2140 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2141 Float32Regs:$b, Float32Regs:$a), 2142 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2143 Float32Regs:$y, Float32Regs:$lod), 2144 "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2145 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2146 []>; 2147def TEX_2D_ARRAY_F32_F32_GRAD 2148 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2149 Float32Regs:$b, Float32Regs:$a), 2150 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2151 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 2152 Float32Regs:$grady0, Float32Regs:$grady1), 2153 "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2154 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2155 "\\{$grady0, $grady1\\};", 2156 []>; 2157def TEX_2D_ARRAY_I32_I32 2158 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2159 Int32Regs:$b, Int32Regs:$a), 2160 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2161 Int32Regs:$y), 2162 "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " 2163 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2164 []>; 2165def TEX_2D_ARRAY_I32_F32 2166 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2167 Int32Regs:$b, Int32Regs:$a), 2168 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2169 Float32Regs:$y), 2170 "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2171 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2172 []>; 2173def TEX_2D_ARRAY_I32_F32_LEVEL 2174 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2175 Int32Regs:$b, Int32Regs:$a), 2176 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2177 Float32Regs:$y, Float32Regs:$lod), 2178 "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2179 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2180 []>; 2181def TEX_2D_ARRAY_I32_F32_GRAD 2182 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2183 Int32Regs:$b, Int32Regs:$a), 2184 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2185 Float32Regs:$y, 2186 Float32Regs:$gradx0, Float32Regs:$gradx1, 2187 Float32Regs:$grady0, Float32Regs:$grady1), 2188 "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2189 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2190 "\\{$grady0, $grady1\\};", 2191 []>; 2192 2193def TEX_3D_F32_I32 2194 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2195 Float32Regs:$b, Float32Regs:$a), 2196 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2197 Int32Regs:$z), 2198 "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " 2199 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2200 []>; 2201def TEX_3D_F32_F32 2202 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2203 Float32Regs:$b, Float32Regs:$a), 2204 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2205 Float32Regs:$z), 2206 "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2207 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2208 []>; 2209def TEX_3D_F32_F32_LEVEL 2210 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2211 Float32Regs:$b, Float32Regs:$a), 2212 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2213 Float32Regs:$z, Float32Regs:$lod), 2214 "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2215 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2216 []>; 2217def TEX_3D_F32_F32_GRAD 2218 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2219 Float32Regs:$b, Float32Regs:$a), 2220 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2221 Float32Regs:$z, 2222 Float32Regs:$gradx0, Float32Regs:$gradx1, 2223 Float32Regs:$gradx2, Float32Regs:$grady0, 2224 Float32Regs:$grady1, Float32Regs:$grady2), 2225 "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " 2226 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2227 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2228 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2229 []>; 2230def TEX_3D_I32_I32 2231 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2232 Int32Regs:$b, Int32Regs:$a), 2233 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2234 Int32Regs:$z), 2235 "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " 2236 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2237 []>; 2238def TEX_3D_I32_F32 2239 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2240 Int32Regs:$b, Int32Regs:$a), 2241 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2242 Float32Regs:$z), 2243 "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2244 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2245 []>; 2246def TEX_3D_I32_F32_LEVEL 2247 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2248 Int32Regs:$b, Int32Regs:$a), 2249 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2250 Float32Regs:$z, Float32Regs:$lod), 2251 "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2252 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2253 []>; 2254def TEX_3D_I32_F32_GRAD 2255 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2256 Int32Regs:$b, Int32Regs:$a), 2257 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2258 Float32Regs:$z, 2259 Float32Regs:$gradx0, Float32Regs:$gradx1, 2260 Float32Regs:$gradx2, Float32Regs:$grady0, 2261 Float32Regs:$grady1, Float32Regs:$grady2), 2262 "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " 2263 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2264 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2265 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2266 []>; 2267 2268 2269// Surface load instructions 2270def SULD_1D_I8_TRAP 2271 : NVPTXInst<(outs Int16Regs:$r), 2272 (ins Int64Regs:$s, Int32Regs:$x), 2273 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", 2274 []>; 2275def SULD_1D_I16_TRAP 2276 : NVPTXInst<(outs Int16Regs:$r), 2277 (ins Int64Regs:$s, Int32Regs:$x), 2278 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", 2279 []>; 2280def SULD_1D_I32_TRAP 2281 : NVPTXInst<(outs Int32Regs:$r), 2282 (ins Int64Regs:$s, Int32Regs:$x), 2283 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", 2284 []>; 2285def SULD_1D_V2I8_TRAP 2286 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2287 (ins Int64Regs:$s, Int32Regs:$x), 2288 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 2289 []>; 2290def SULD_1D_V2I16_TRAP 2291 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2292 (ins Int64Regs:$s, Int32Regs:$x), 2293 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 2294 []>; 2295def SULD_1D_V2I32_TRAP 2296 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 2297 (ins Int64Regs:$s, Int32Regs:$x), 2298 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 2299 []>; 2300def SULD_1D_V4I8_TRAP 2301 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2302 (ins Int64Regs:$s, Int32Regs:$x), 2303 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 2304 []>; 2305def SULD_1D_V4I16_TRAP 2306 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2307 (ins Int64Regs:$s, Int32Regs:$x), 2308 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 2309 []>; 2310def SULD_1D_V4I32_TRAP 2311 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 2312 (ins Int64Regs:$s, Int32Regs:$x), 2313 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 2314 []>; 2315 2316def SULD_1D_ARRAY_I8_TRAP 2317 : NVPTXInst<(outs Int16Regs:$r), 2318 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2319 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 2320 []>; 2321def SULD_1D_ARRAY_I16_TRAP 2322 : NVPTXInst<(outs Int16Regs:$r), 2323 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2324 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 2325 []>; 2326def SULD_1D_ARRAY_I32_TRAP 2327 : NVPTXInst<(outs Int32Regs:$r), 2328 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2329 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 2330 []>; 2331def SULD_1D_ARRAY_V2I8_TRAP 2332 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2333 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2334 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 2335 []>; 2336def SULD_1D_ARRAY_V2I16_TRAP 2337 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2338 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2339 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 2340 []>; 2341def SULD_1D_ARRAY_V2I32_TRAP 2342 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 2343 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2344 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 2345 []>; 2346def SULD_1D_ARRAY_V4I8_TRAP 2347 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2348 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2349 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 2350 "[$s, \\{$l, $x\\}];", 2351 []>; 2352def SULD_1D_ARRAY_V4I16_TRAP 2353 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2354 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2355 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 2356 "[$s, \\{$l, $x\\}];", 2357 []>; 2358def SULD_1D_ARRAY_V4I32_TRAP 2359 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 2360 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2361 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 2362 "[$s, \\{$l, $x\\}];", 2363 []>; 2364 2365def SULD_2D_I8_TRAP 2366 : NVPTXInst<(outs Int16Regs:$r), 2367 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2368 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 2369 []>; 2370def SULD_2D_I16_TRAP 2371 : NVPTXInst<(outs Int16Regs:$r), 2372 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2373 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 2374 []>; 2375def SULD_2D_I32_TRAP 2376 : NVPTXInst<(outs Int32Regs:$r), 2377 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2378 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 2379 []>; 2380def SULD_2D_V2I8_TRAP 2381 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2382 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2383 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 2384 []>; 2385def SULD_2D_V2I16_TRAP 2386 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2387 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2388 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 2389 []>; 2390def SULD_2D_V2I32_TRAP 2391 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 2392 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2393 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 2394 []>; 2395def SULD_2D_V4I8_TRAP 2396 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2397 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2398 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 2399 []>; 2400def SULD_2D_V4I16_TRAP 2401 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2402 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2403 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 2404 []>; 2405def SULD_2D_V4I32_TRAP 2406 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 2407 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2408 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 2409 []>; 2410 2411def SULD_2D_ARRAY_I8_TRAP 2412 : NVPTXInst<(outs Int16Regs:$r), 2413 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 2414 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 2415 []>; 2416def SULD_2D_ARRAY_I16_TRAP 2417 : NVPTXInst<(outs Int16Regs:$r), 2418 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 2419 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 2420 []>; 2421def SULD_2D_ARRAY_I32_TRAP 2422 : NVPTXInst<(outs Int32Regs:$r), 2423 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 2424 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 2425 []>; 2426def SULD_2D_ARRAY_V2I8_TRAP 2427 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2428 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 2429 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " 2430 "[$s, \\{$l, $x, $y, $y\\}];", 2431 []>; 2432def SULD_2D_ARRAY_V2I16_TRAP 2433 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2434 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 2435 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " 2436 "[$s, \\{$l, $x, $y, $y\\}];", 2437 []>; 2438def SULD_2D_ARRAY_V2I32_TRAP 2439 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 2440 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 2441 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " 2442 "[$s, \\{$l, $x, $y, $y\\}];", 2443 []>; 2444def SULD_2D_ARRAY_V4I8_TRAP 2445 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2446 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 2447 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 2448 "[$s, \\{$l, $x, $y, $y\\}];", 2449 []>; 2450def SULD_2D_ARRAY_V4I16_TRAP 2451 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2452 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 2453 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 2454 "[$s, \\{$l, $x, $y, $y\\}];", 2455 []>; 2456def SULD_2D_ARRAY_V4I32_TRAP 2457 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 2458 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 2459 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 2460 "[$s, \\{$l, $x, $y, $y\\}];", 2461 []>; 2462 2463def SULD_3D_I8_TRAP 2464 : NVPTXInst<(outs Int16Regs:$r), 2465 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 2466 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 2467 []>; 2468def SULD_3D_I16_TRAP 2469 : NVPTXInst<(outs Int16Regs:$r), 2470 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 2471 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 2472 []>; 2473def SULD_3D_I32_TRAP 2474 : NVPTXInst<(outs Int32Regs:$r), 2475 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 2476 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 2477 []>; 2478def SULD_3D_V2I8_TRAP 2479 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2480 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 2481 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 2482 []>; 2483def SULD_3D_V2I16_TRAP 2484 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 2485 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 2486 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 2487 []>; 2488def SULD_3D_V2I32_TRAP 2489 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 2490 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 2491 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 2492 []>; 2493def SULD_3D_V4I8_TRAP 2494 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2495 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 2496 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 2497 "[$s, \\{$x, $y, $z, $z\\}];", 2498 []>; 2499def SULD_3D_V4I16_TRAP 2500 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2501 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 2502 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 2503 "[$s, \\{$x, $y, $z, $z\\}];", 2504 []>; 2505def SULD_3D_V4I32_TRAP 2506 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 2507 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 2508 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 2509 "[$s, \\{$x, $y, $z, $z\\}];", 2510 []>; 2511 2512 2513//----------------------------------- 2514// Texture Query Intrinsics 2515//----------------------------------- 2516def TXQ_CHANNEL_ORDER 2517 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2518 "txq.channel_order.b32 \t$d, [$a];", 2519 []>; 2520def TXQ_CHANNEL_DATA_TYPE 2521 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2522 "txq.channel_data_type.b32 \t$d, [$a];", 2523 []>; 2524def TXQ_WIDTH 2525 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2526 "txq.width.b32 \t$d, [$a];", 2527 []>; 2528def TXQ_HEIGHT 2529 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2530 "txq.height.b32 \t$d, [$a];", 2531 []>; 2532def TXQ_DEPTH 2533 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2534 "txq.depth.b32 \t$d, [$a];", 2535 []>; 2536def TXQ_ARRAY_SIZE 2537 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2538 "txq.array_size.b32 \t$d, [$a];", 2539 []>; 2540def TXQ_NUM_SAMPLES 2541 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2542 "txq.num_samples.b32 \t$d, [$a];", 2543 []>; 2544def TXQ_NUM_MIPMAP_LEVELS 2545 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2546 "txq.num_mipmap_levels.b32 \t$d, [$a];", 2547 []>; 2548 2549def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 2550 (TXQ_CHANNEL_ORDER Int64Regs:$a)>; 2551def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 2552 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 2553def : Pat<(int_nvvm_txq_width Int64Regs:$a), 2554 (TXQ_WIDTH Int64Regs:$a)>; 2555def : Pat<(int_nvvm_txq_height Int64Regs:$a), 2556 (TXQ_HEIGHT Int64Regs:$a)>; 2557def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 2558 (TXQ_DEPTH Int64Regs:$a)>; 2559def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 2560 (TXQ_ARRAY_SIZE Int64Regs:$a)>; 2561def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 2562 (TXQ_NUM_SAMPLES Int64Regs:$a)>; 2563def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 2564 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; 2565 2566 2567//----------------------------------- 2568// Surface Query Intrinsics 2569//----------------------------------- 2570def SUQ_CHANNEL_ORDER 2571 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2572 "suq.channel_order.b32 \t$d, [$a];", 2573 []>; 2574def SUQ_CHANNEL_DATA_TYPE 2575 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2576 "suq.channel_data_type.b32 \t$d, [$a];", 2577 []>; 2578def SUQ_WIDTH 2579 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2580 "suq.width.b32 \t$d, [$a];", 2581 []>; 2582def SUQ_HEIGHT 2583 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2584 "suq.height.b32 \t$d, [$a];", 2585 []>; 2586def SUQ_DEPTH 2587 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2588 "suq.depth.b32 \t$d, [$a];", 2589 []>; 2590def SUQ_ARRAY_SIZE 2591 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2592 "suq.array_size.b32 \t$d, [$a];", 2593 []>; 2594 2595def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 2596 (SUQ_CHANNEL_ORDER Int64Regs:$a)>; 2597def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 2598 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 2599def : Pat<(int_nvvm_suq_width Int64Regs:$a), 2600 (SUQ_WIDTH Int64Regs:$a)>; 2601def : Pat<(int_nvvm_suq_height Int64Regs:$a), 2602 (SUQ_HEIGHT Int64Regs:$a)>; 2603def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 2604 (SUQ_DEPTH Int64Regs:$a)>; 2605def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 2606 (SUQ_ARRAY_SIZE Int64Regs:$a)>; 2607 2608 2609//===- Handle Query -------------------------------------------------------===// 2610 2611// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 2612def ISTYPEP_SAMPLER 2613 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2614 "istypep.samplerref \t$d, $a;", 2615 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 2616def ISTYPEP_SURFACE 2617 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2618 "istypep.surfref \t$d, $a;", 2619 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 2620def ISTYPEP_TEXTURE 2621 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2622 "istypep.texref \t$d, $a;", 2623 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 2624 2625//===- Surface Stores -----------------------------------------------------===// 2626 2627// Unformatted 2628 2629def SUST_B_1D_B8_TRAP 2630 : NVPTXInst<(outs), 2631 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 2632 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 2633 []>; 2634def SUST_B_1D_B16_TRAP 2635 : NVPTXInst<(outs), 2636 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 2637 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 2638 []>; 2639def SUST_B_1D_B32_TRAP 2640 : NVPTXInst<(outs), 2641 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 2642 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 2643 []>; 2644def SUST_B_1D_V2B8_TRAP 2645 : NVPTXInst<(outs), 2646 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 2647 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 2648 []>; 2649def SUST_B_1D_V2B16_TRAP 2650 : NVPTXInst<(outs), 2651 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 2652 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 2653 []>; 2654def SUST_B_1D_V2B32_TRAP 2655 : NVPTXInst<(outs), 2656 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 2657 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 2658 []>; 2659def SUST_B_1D_V4B8_TRAP 2660 : NVPTXInst<(outs), 2661 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 2662 Int16Regs:$b, Int16Regs:$a), 2663 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 2664 []>; 2665def SUST_B_1D_V4B16_TRAP 2666 : NVPTXInst<(outs), 2667 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 2668 Int16Regs:$b, Int16Regs:$a), 2669 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 2670 []>; 2671def SUST_B_1D_V4B32_TRAP 2672 : NVPTXInst<(outs), 2673 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 2674 Int32Regs:$b, Int32Regs:$a), 2675 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 2676 []>; 2677 2678 2679def SUST_B_1D_ARRAY_B8_TRAP 2680 : NVPTXInst<(outs), 2681 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 2682 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 2683 []>; 2684def SUST_B_1D_ARRAY_B16_TRAP 2685 : NVPTXInst<(outs), 2686 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 2687 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 2688 []>; 2689def SUST_B_1D_ARRAY_B32_TRAP 2690 : NVPTXInst<(outs), 2691 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 2692 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 2693 []>; 2694def SUST_B_1D_ARRAY_V2B8_TRAP 2695 : NVPTXInst<(outs), 2696 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 2697 Int16Regs:$g), 2698 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 2699 []>; 2700def SUST_B_1D_ARRAY_V2B16_TRAP 2701 : NVPTXInst<(outs), 2702 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 2703 Int16Regs:$g), 2704 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 2705 []>; 2706def SUST_B_1D_ARRAY_V2B32_TRAP 2707 : NVPTXInst<(outs), 2708 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 2709 Int32Regs:$g), 2710 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 2711 []>; 2712def SUST_B_1D_ARRAY_V4B8_TRAP 2713 : NVPTXInst<(outs), 2714 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 2715 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2716 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 2717 "\\{$r, $g, $b, $a\\};", 2718 []>; 2719def SUST_B_1D_ARRAY_V4B16_TRAP 2720 : NVPTXInst<(outs), 2721 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 2722 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2723 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 2724 "\\{$r, $g, $b, $a\\};", 2725 []>; 2726def SUST_B_1D_ARRAY_V4B32_TRAP 2727 : NVPTXInst<(outs), 2728 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 2729 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 2730 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 2731 "\\{$r, $g, $b, $a\\};", 2732 []>; 2733 2734 2735def SUST_B_2D_B8_TRAP 2736 : NVPTXInst<(outs), 2737 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 2738 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 2739 []>; 2740def SUST_B_2D_B16_TRAP 2741 : NVPTXInst<(outs), 2742 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 2743 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 2744 []>; 2745def SUST_B_2D_B32_TRAP 2746 : NVPTXInst<(outs), 2747 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 2748 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 2749 []>; 2750def SUST_B_2D_V2B8_TRAP 2751 : NVPTXInst<(outs), 2752 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 2753 Int16Regs:$g), 2754 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 2755 []>; 2756def SUST_B_2D_V2B16_TRAP 2757 : NVPTXInst<(outs), 2758 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 2759 Int16Regs:$g), 2760 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 2761 []>; 2762def SUST_B_2D_V2B32_TRAP 2763 : NVPTXInst<(outs), 2764 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 2765 Int32Regs:$g), 2766 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 2767 []>; 2768def SUST_B_2D_V4B8_TRAP 2769 : NVPTXInst<(outs), 2770 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 2771 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2772 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 2773 "\\{$r, $g, $b, $a\\};", 2774 []>; 2775def SUST_B_2D_V4B16_TRAP 2776 : NVPTXInst<(outs), 2777 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 2778 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2779 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 2780 "\\{$r, $g, $b, $a\\};", 2781 []>; 2782def SUST_B_2D_V4B32_TRAP 2783 : NVPTXInst<(outs), 2784 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 2785 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 2786 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 2787 "\\{$r, $g, $b, $a\\};", 2788 []>; 2789 2790 2791def SUST_B_2D_ARRAY_B8_TRAP 2792 : NVPTXInst<(outs), 2793 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 2794 Int16Regs:$r), 2795 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 2796 []>; 2797def SUST_B_2D_ARRAY_B16_TRAP 2798 : NVPTXInst<(outs), 2799 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 2800 Int16Regs:$r), 2801 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 2802 []>; 2803def SUST_B_2D_ARRAY_B32_TRAP 2804 : NVPTXInst<(outs), 2805 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 2806 Int32Regs:$r), 2807 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 2808 []>; 2809def SUST_B_2D_ARRAY_V2B8_TRAP 2810 : NVPTXInst<(outs), 2811 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 2812 Int16Regs:$r, Int16Regs:$g), 2813 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 2814 "\\{$r, $g\\};", 2815 []>; 2816def SUST_B_2D_ARRAY_V2B16_TRAP 2817 : NVPTXInst<(outs), 2818 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 2819 Int16Regs:$r, Int16Regs:$g), 2820 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 2821 "\\{$r, $g\\};", 2822 []>; 2823def SUST_B_2D_ARRAY_V2B32_TRAP 2824 : NVPTXInst<(outs), 2825 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 2826 Int32Regs:$r, Int32Regs:$g), 2827 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 2828 "\\{$r, $g\\};", 2829 []>; 2830def SUST_B_2D_ARRAY_V4B8_TRAP 2831 : NVPTXInst<(outs), 2832 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 2833 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2834 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 2835 "\\{$r, $g, $b, $a\\};", 2836 []>; 2837def SUST_B_2D_ARRAY_V4B16_TRAP 2838 : NVPTXInst<(outs), 2839 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 2840 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2841 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 2842 "\\{$r, $g, $b, $a\\};", 2843 []>; 2844def SUST_B_2D_ARRAY_V4B32_TRAP 2845 : NVPTXInst<(outs), 2846 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 2847 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 2848 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 2849 "\\{$r, $g, $b, $a\\};", 2850 []>; 2851 2852 2853def SUST_B_3D_B8_TRAP 2854 : NVPTXInst<(outs), 2855 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 2856 Int16Regs:$r), 2857 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 2858 []>; 2859def SUST_B_3D_B16_TRAP 2860 : NVPTXInst<(outs), 2861 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 2862 Int16Regs:$r), 2863 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 2864 []>; 2865def SUST_B_3D_B32_TRAP 2866 : NVPTXInst<(outs), 2867 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 2868 Int32Regs:$r), 2869 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 2870 []>; 2871def SUST_B_3D_V2B8_TRAP 2872 : NVPTXInst<(outs), 2873 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 2874 Int16Regs:$r, Int16Regs:$g), 2875 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 2876 "\\{$r, $g\\};", 2877 []>; 2878def SUST_B_3D_V2B16_TRAP 2879 : NVPTXInst<(outs), 2880 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 2881 Int16Regs:$r, Int16Regs:$g), 2882 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 2883 "\\{$r, $g\\};", 2884 []>; 2885def SUST_B_3D_V2B32_TRAP 2886 : NVPTXInst<(outs), 2887 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 2888 Int32Regs:$r, Int32Regs:$g), 2889 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 2890 "\\{$r, $g\\};", 2891 []>; 2892def SUST_B_3D_V4B8_TRAP 2893 : NVPTXInst<(outs), 2894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 2895 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2896 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 2897 "\\{$r, $g, $b, $a\\};", 2898 []>; 2899def SUST_B_3D_V4B16_TRAP 2900 : NVPTXInst<(outs), 2901 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 2902 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 2903 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 2904 "\\{$r, $g, $b, $a\\};", 2905 []>; 2906def SUST_B_3D_V4B32_TRAP 2907 : NVPTXInst<(outs), 2908 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 2909 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 2910 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 2911 "\\{$r, $g, $b, $a\\};", 2912 []>; 2913 2914// Formatted 2915 2916def SUST_P_1D_B8_TRAP 2917 : NVPTXInst<(outs), 2918 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 2919 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 2920 []>; 2921def SUST_P_1D_B16_TRAP 2922 : NVPTXInst<(outs), 2923 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 2924 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 2925 []>; 2926def SUST_P_1D_B32_TRAP 2927 : NVPTXInst<(outs), 2928 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 2929 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 2930 []>; 2931def SUST_P_1D_V2B8_TRAP 2932 : NVPTXInst<(outs), 2933 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 2934 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 2935 []>; 2936def SUST_P_1D_V2B16_TRAP 2937 : NVPTXInst<(outs), 2938 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 2939 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 2940 []>; 2941def SUST_P_1D_V2B32_TRAP 2942 : NVPTXInst<(outs), 2943 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 2944 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 2945 []>; 2946def SUST_P_1D_V4B8_TRAP 2947 : NVPTXInst<(outs), 2948 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 2949 Int16Regs:$b, Int16Regs:$a), 2950 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 2951 []>; 2952def SUST_P_1D_V4B16_TRAP 2953 : NVPTXInst<(outs), 2954 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 2955 Int16Regs:$b, Int16Regs:$a), 2956 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 2957 []>; 2958def SUST_P_1D_V4B32_TRAP 2959 : NVPTXInst<(outs), 2960 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 2961 Int32Regs:$b, Int32Regs:$a), 2962 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 2963 []>; 2964 2965 2966def SUST_P_1D_ARRAY_B8_TRAP 2967 : NVPTXInst<(outs), 2968 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 2969 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 2970 []>; 2971def SUST_P_1D_ARRAY_B16_TRAP 2972 : NVPTXInst<(outs), 2973 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 2974 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 2975 []>; 2976def SUST_P_1D_ARRAY_B32_TRAP 2977 : NVPTXInst<(outs), 2978 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 2979 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 2980 []>; 2981def SUST_P_1D_ARRAY_V2B8_TRAP 2982 : NVPTXInst<(outs), 2983 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 2984 Int16Regs:$g), 2985 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 2986 []>; 2987def SUST_P_1D_ARRAY_V2B16_TRAP 2988 : NVPTXInst<(outs), 2989 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 2990 Int16Regs:$g), 2991 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 2992 []>; 2993def SUST_P_1D_ARRAY_V2B32_TRAP 2994 : NVPTXInst<(outs), 2995 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 2996 Int32Regs:$g), 2997 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 2998 []>; 2999def SUST_P_1D_ARRAY_V4B8_TRAP 3000 : NVPTXInst<(outs), 3001 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 3002 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3003 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 3004 "\\{$r, $g, $b, $a\\};", 3005 []>; 3006def SUST_P_1D_ARRAY_V4B16_TRAP 3007 : NVPTXInst<(outs), 3008 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 3009 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3010 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 3011 "\\{$r, $g, $b, $a\\};", 3012 []>; 3013def SUST_P_1D_ARRAY_V4B32_TRAP 3014 : NVPTXInst<(outs), 3015 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 3016 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3017 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 3018 "\\{$r, $g, $b, $a\\};", 3019 []>; 3020 3021 3022def SUST_P_2D_B8_TRAP 3023 : NVPTXInst<(outs), 3024 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3025 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 3026 []>; 3027def SUST_P_2D_B16_TRAP 3028 : NVPTXInst<(outs), 3029 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3030 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 3031 []>; 3032def SUST_P_2D_B32_TRAP 3033 : NVPTXInst<(outs), 3034 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 3035 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 3036 []>; 3037def SUST_P_2D_V2B8_TRAP 3038 : NVPTXInst<(outs), 3039 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 3040 Int16Regs:$g), 3041 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 3042 []>; 3043def SUST_P_2D_V2B16_TRAP 3044 : NVPTXInst<(outs), 3045 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 3046 Int16Regs:$g), 3047 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 3048 []>; 3049def SUST_P_2D_V2B32_TRAP 3050 : NVPTXInst<(outs), 3051 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 3052 Int32Regs:$g), 3053 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 3054 []>; 3055def SUST_P_2D_V4B8_TRAP 3056 : NVPTXInst<(outs), 3057 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 3058 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3059 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 3060 "\\{$r, $g, $b, $a\\};", 3061 []>; 3062def SUST_P_2D_V4B16_TRAP 3063 : NVPTXInst<(outs), 3064 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 3065 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3066 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 3067 "\\{$r, $g, $b, $a\\};", 3068 []>; 3069def SUST_P_2D_V4B32_TRAP 3070 : NVPTXInst<(outs), 3071 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 3072 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3073 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 3074 "\\{$r, $g, $b, $a\\};", 3075 []>; 3076 3077 3078def SUST_P_2D_ARRAY_B8_TRAP 3079 : NVPTXInst<(outs), 3080 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 3081 Int16Regs:$r), 3082 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 3083 []>; 3084def SUST_P_2D_ARRAY_B16_TRAP 3085 : NVPTXInst<(outs), 3086 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 3087 Int16Regs:$r), 3088 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 3089 []>; 3090def SUST_P_2D_ARRAY_B32_TRAP 3091 : NVPTXInst<(outs), 3092 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 3093 Int32Regs:$r), 3094 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 3095 []>; 3096def SUST_P_2D_ARRAY_V2B8_TRAP 3097 : NVPTXInst<(outs), 3098 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 3099 Int16Regs:$r, Int16Regs:$g), 3100 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 3101 "\\{$r, $g\\};", 3102 []>; 3103def SUST_P_2D_ARRAY_V2B16_TRAP 3104 : NVPTXInst<(outs), 3105 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 3106 Int16Regs:$r, Int16Regs:$g), 3107 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 3108 "\\{$r, $g\\};", 3109 []>; 3110def SUST_P_2D_ARRAY_V2B32_TRAP 3111 : NVPTXInst<(outs), 3112 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 3113 Int32Regs:$r, Int32Regs:$g), 3114 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 3115 "\\{$r, $g\\};", 3116 []>; 3117def SUST_P_2D_ARRAY_V4B8_TRAP 3118 : NVPTXInst<(outs), 3119 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 3120 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3121 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 3122 "\\{$r, $g, $b, $a\\};", 3123 []>; 3124def SUST_P_2D_ARRAY_V4B16_TRAP 3125 : NVPTXInst<(outs), 3126 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 3127 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3128 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 3129 "\\{$r, $g, $b, $a\\};", 3130 []>; 3131def SUST_P_2D_ARRAY_V4B32_TRAP 3132 : NVPTXInst<(outs), 3133 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 3134 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3135 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 3136 "\\{$r, $g, $b, $a\\};", 3137 []>; 3138 3139 3140def SUST_P_3D_B8_TRAP 3141 : NVPTXInst<(outs), 3142 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3143 Int16Regs:$r), 3144 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 3145 []>; 3146def SUST_P_3D_B16_TRAP 3147 : NVPTXInst<(outs), 3148 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3149 Int16Regs:$r), 3150 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 3151 []>; 3152def SUST_P_3D_B32_TRAP 3153 : NVPTXInst<(outs), 3154 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3155 Int32Regs:$r), 3156 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 3157 []>; 3158def SUST_P_3D_V2B8_TRAP 3159 : NVPTXInst<(outs), 3160 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3161 Int16Regs:$r, Int16Regs:$g), 3162 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 3163 "\\{$r, $g\\};", 3164 []>; 3165def SUST_P_3D_V2B16_TRAP 3166 : NVPTXInst<(outs), 3167 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3168 Int16Regs:$r, Int16Regs:$g), 3169 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 3170 "\\{$r, $g\\};", 3171 []>; 3172def SUST_P_3D_V2B32_TRAP 3173 : NVPTXInst<(outs), 3174 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3175 Int32Regs:$r, Int32Regs:$g), 3176 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 3177 "\\{$r, $g\\};", 3178 []>; 3179def SUST_P_3D_V4B8_TRAP 3180 : NVPTXInst<(outs), 3181 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3182 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3183 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 3184 "\\{$r, $g, $b, $a\\};", 3185 []>; 3186def SUST_P_3D_V4B16_TRAP 3187 : NVPTXInst<(outs), 3188 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3189 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3190 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 3191 "\\{$r, $g, $b, $a\\};", 3192 []>; 3193def SUST_P_3D_V4B32_TRAP 3194 : NVPTXInst<(outs), 3195 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3196 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3197 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 3198 "\\{$r, $g, $b, $a\\};", 3199 []>; 3200 3201 3202// Surface store instruction patterns 3203// I'm not sure why we can't just include these in the instruction definitions, 3204// but TableGen complains of type errors :( 3205 3206def : Pat<(int_nvvm_sust_b_1d_i8_trap 3207 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 3208 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 3209 3210def : Pat<(int_nvvm_sust_b_1d_i16_trap 3211 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 3212 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 3213 3214def : Pat<(int_nvvm_sust_b_1d_i32_trap 3215 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 3216 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 3217 3218def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 3219 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 3220 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 3221 Int16Regs:$r, Int16Regs:$g)>; 3222 3223def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 3224 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 3225 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 3226 Int16Regs:$r, Int16Regs:$g)>; 3227 3228def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 3229 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 3230 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 3231 Int32Regs:$r, Int32Regs:$g)>; 3232 3233def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 3234 Int64Regs:$s, Int32Regs:$x, 3235 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3236 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 3237 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3238 3239def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 3240 Int64Regs:$s, Int32Regs:$x, 3241 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3242 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 3243 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3244 3245def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 3246 Int64Regs:$s, Int32Regs:$x, 3247 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3248 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 3249 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3250 3251 3252 3253def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 3254 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 3255 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3256 Int16Regs:$r)>; 3257 3258def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 3259 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 3260 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3261 Int16Regs:$r)>; 3262 3263def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 3264 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 3265 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3266 Int32Regs:$r)>; 3267 3268def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 3269 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 3270 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3271 Int16Regs:$r, Int16Regs:$g)>; 3272 3273def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 3274 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 3275 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3276 Int16Regs:$r, Int16Regs:$g)>; 3277 3278def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 3279 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 3280 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3281 Int32Regs:$r, Int32Regs:$g)>; 3282 3283def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 3284 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3285 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3286 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3287 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3288 3289def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 3290 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3291 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3292 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3293 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3294 3295def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 3296 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3297 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3298 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3299 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3300 3301 3302 3303def : Pat<(int_nvvm_sust_b_2d_i8_trap 3304 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3305 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3306 Int16Regs:$r)>; 3307 3308def : Pat<(int_nvvm_sust_b_2d_i16_trap 3309 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3310 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3311 Int16Regs:$r)>; 3312 3313def : Pat<(int_nvvm_sust_b_2d_i32_trap 3314 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 3315 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3316 Int32Regs:$r)>; 3317 3318def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 3319 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 3320 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3321 Int16Regs:$r, Int16Regs:$g)>; 3322 3323def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 3324 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 3325 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3326 Int16Regs:$r, Int16Regs:$g)>; 3327 3328def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 3329 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 3330 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3331 Int32Regs:$r, Int32Regs:$g)>; 3332 3333def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 3334 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3335 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3336 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3337 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3338 3339def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 3340 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3341 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3342 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3343 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3344 3345def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 3346 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3347 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3348 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3349 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3350 3351 3352 3353def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 3354 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3355 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, 3356 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3357 Int16Regs:$r)>; 3358 3359def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 3360 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3361 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, 3362 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3363 Int16Regs:$r)>; 3364 3365def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 3366 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 3367 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, 3368 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3369 Int32Regs:$r)>; 3370 3371def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 3372 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3373 Int16Regs:$r, Int16Regs:$g), 3374 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 3375 Int32Regs:$x, Int32Regs:$y, 3376 Int16Regs:$r, Int16Regs:$g)>; 3377 3378def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 3379 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3380 Int16Regs:$r, Int16Regs:$g), 3381 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 3382 Int32Regs:$x, Int32Regs:$y, 3383 Int16Regs:$r, Int16Regs:$g)>; 3384 3385def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 3386 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 3387 Int32Regs:$g), 3388 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 3389 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 3390 3391def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 3392 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3393 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3394 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 3395 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3396 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3397 3398def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 3399 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3400 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3401 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 3402 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3403 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3404 3405def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 3406 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3407 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3408 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 3409 Int32Regs:$x, Int32Regs:$y, 3410 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3411 3412 3413 3414def : Pat<(int_nvvm_sust_b_3d_i8_trap 3415 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3416 Int16Regs:$r), 3417 (SUST_B_3D_B8_TRAP Int64Regs:$s, 3418 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3419 Int16Regs:$r)>; 3420 3421def : Pat<(int_nvvm_sust_b_3d_i16_trap 3422 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3423 Int16Regs:$r), 3424 (SUST_B_3D_B16_TRAP Int64Regs:$s, 3425 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3426 Int16Regs:$r)>; 3427 3428def : Pat<(int_nvvm_sust_b_3d_i32_trap 3429 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3430 Int32Regs:$r), 3431 (SUST_B_3D_B32_TRAP Int64Regs:$s, 3432 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3433 Int32Regs:$r)>; 3434 3435def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 3436 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3437 Int16Regs:$r, Int16Regs:$g), 3438 (SUST_B_3D_V2B8_TRAP Int64Regs:$s, 3439 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3440 Int16Regs:$r, Int16Regs:$g)>; 3441 3442def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 3443 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3444 Int16Regs:$r, Int16Regs:$g), 3445 (SUST_B_3D_V2B16_TRAP Int64Regs:$s, 3446 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3447 Int16Regs:$r, Int16Regs:$g)>; 3448 3449def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 3450 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3451 Int32Regs:$r, Int32Regs:$g), 3452 (SUST_B_3D_V2B32_TRAP Int64Regs:$s, 3453 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3454 Int32Regs:$r, Int32Regs:$g)>; 3455 3456def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 3457 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3458 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3459 (SUST_B_3D_V4B8_TRAP Int64Regs:$s, 3460 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3461 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3462 3463def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 3464 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3465 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3466 (SUST_B_3D_V4B16_TRAP Int64Regs:$s, 3467 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3468 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3469 3470def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 3471 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3472 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3473 (SUST_B_3D_V4B32_TRAP Int64Regs:$s, 3474 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3475 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3476 3477 3478 3479 3480def : Pat<(int_nvvm_sust_p_1d_i8_trap 3481 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 3482 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 3483 3484def : Pat<(int_nvvm_sust_p_1d_i16_trap 3485 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 3486 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 3487 3488def : Pat<(int_nvvm_sust_p_1d_i32_trap 3489 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 3490 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 3491 3492def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 3493 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 3494 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 3495 Int16Regs:$r, Int16Regs:$g)>; 3496 3497def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 3498 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 3499 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 3500 Int16Regs:$r, Int16Regs:$g)>; 3501 3502def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 3503 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 3504 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 3505 Int32Regs:$r, Int32Regs:$g)>; 3506 3507def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 3508 Int64Regs:$s, Int32Regs:$x, 3509 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3510 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 3511 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3512 3513def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 3514 Int64Regs:$s, Int32Regs:$x, 3515 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3516 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 3517 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3518 3519def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 3520 Int64Regs:$s, Int32Regs:$x, 3521 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3522 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 3523 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3524 3525 3526 3527def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 3528 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 3529 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3530 Int16Regs:$r)>; 3531 3532def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 3533 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 3534 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3535 Int16Regs:$r)>; 3536 3537def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 3538 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 3539 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3540 Int32Regs:$r)>; 3541 3542def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 3543 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 3544 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3545 Int16Regs:$r, Int16Regs:$g)>; 3546 3547def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 3548 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 3549 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3550 Int16Regs:$r, Int16Regs:$g)>; 3551 3552def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 3553 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 3554 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3555 Int32Regs:$r, Int32Regs:$g)>; 3556 3557def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 3558 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3559 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3560 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3561 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3562 3563def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 3564 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3565 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3566 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3567 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3568 3569def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 3570 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3571 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3572 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 3573 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3574 3575 3576 3577def : Pat<(int_nvvm_sust_p_2d_i8_trap 3578 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3579 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3580 Int16Regs:$r)>; 3581 3582def : Pat<(int_nvvm_sust_p_2d_i16_trap 3583 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3584 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3585 Int16Regs:$r)>; 3586 3587def : Pat<(int_nvvm_sust_p_2d_i32_trap 3588 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 3589 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3590 Int32Regs:$r)>; 3591 3592def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 3593 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 3594 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3595 Int16Regs:$r, Int16Regs:$g)>; 3596 3597def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 3598 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 3599 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3600 Int16Regs:$r, Int16Regs:$g)>; 3601 3602def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 3603 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 3604 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3605 Int32Regs:$r, Int32Regs:$g)>; 3606 3607def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 3608 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3609 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3610 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3611 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3612 3613def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 3614 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3615 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3616 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3617 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3618 3619def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 3620 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3621 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3622 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 3623 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3624 3625 3626 3627def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 3628 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3629 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, 3630 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3631 Int16Regs:$r)>; 3632 3633def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 3634 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 3635 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, 3636 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3637 Int16Regs:$r)>; 3638 3639def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 3640 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 3641 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, 3642 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3643 Int32Regs:$r)>; 3644 3645def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 3646 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3647 Int16Regs:$r, Int16Regs:$g), 3648 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 3649 Int32Regs:$x, Int32Regs:$y, 3650 Int16Regs:$r, Int16Regs:$g)>; 3651 3652def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 3653 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3654 Int16Regs:$r, Int16Regs:$g), 3655 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 3656 Int32Regs:$x, Int32Regs:$y, 3657 Int16Regs:$r, Int16Regs:$g)>; 3658 3659def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 3660 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 3661 Int32Regs:$g), 3662 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 3663 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 3664 3665def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 3666 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3667 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3668 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 3669 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3670 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3671 3672def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 3673 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3674 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3675 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 3676 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3677 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3678 3679def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 3680 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 3681 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3682 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 3683 Int32Regs:$x, Int32Regs:$y, 3684 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3685 3686 3687 3688def : Pat<(int_nvvm_sust_p_3d_i8_trap 3689 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3690 Int16Regs:$r), 3691 (SUST_P_3D_B8_TRAP Int64Regs:$s, 3692 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3693 Int16Regs:$r)>; 3694 3695def : Pat<(int_nvvm_sust_p_3d_i16_trap 3696 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3697 Int16Regs:$r), 3698 (SUST_P_3D_B16_TRAP Int64Regs:$s, 3699 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3700 Int16Regs:$r)>; 3701 3702def : Pat<(int_nvvm_sust_p_3d_i32_trap 3703 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3704 Int32Regs:$r), 3705 (SUST_P_3D_B32_TRAP Int64Regs:$s, 3706 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3707 Int32Regs:$r)>; 3708 3709def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 3710 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3711 Int16Regs:$r, Int16Regs:$g), 3712 (SUST_P_3D_V2B8_TRAP Int64Regs:$s, 3713 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3714 Int16Regs:$r, Int16Regs:$g)>; 3715 3716def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 3717 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3718 Int16Regs:$r, Int16Regs:$g), 3719 (SUST_P_3D_V2B16_TRAP Int64Regs:$s, 3720 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3721 Int16Regs:$r, Int16Regs:$g)>; 3722 3723def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 3724 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3725 Int32Regs:$r, Int32Regs:$g), 3726 (SUST_P_3D_V2B32_TRAP Int64Regs:$s, 3727 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3728 Int32Regs:$r, Int32Regs:$g)>; 3729 3730def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 3731 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3732 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3733 (SUST_P_3D_V4B8_TRAP Int64Regs:$s, 3734 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3735 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3736 3737def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 3738 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3739 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3740 (SUST_P_3D_V4B16_TRAP Int64Regs:$s, 3741 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3742 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 3743 3744def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 3745 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3746 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3747 (SUST_P_3D_V4B32_TRAP Int64Regs:$s, 3748 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 3749 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 3750 3751 3752 3753//===-- Old PTX Back-end Intrinsics ---------------------------------------===// 3754 3755// These intrinsics are handled to retain compatibility with the old backend. 3756 3757// PTX Special Purpose Register Accessor Intrinsics 3758 3759class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> 3760 : NVPTXInst<(outs Int64Regs:$d), (ins), 3761 !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"), 3762 [(set Int64Regs:$d, (intop))]>; 3763 3764class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> 3765 : NVPTXInst<(outs Int32Regs:$d), (ins), 3766 !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"), 3767 [(set Int32Regs:$d, (intop))]>; 3768 3769// TODO Add read vector-version of special registers 3770 3771def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", 3772 int_ptx_read_tid_x>; 3773def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", 3774 int_ptx_read_tid_y>; 3775def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", 3776 int_ptx_read_tid_z>; 3777def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", 3778 int_ptx_read_tid_w>; 3779 3780def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", 3781 int_ptx_read_ntid_x>; 3782def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", 3783 int_ptx_read_ntid_y>; 3784def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", 3785 int_ptx_read_ntid_z>; 3786def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", 3787 int_ptx_read_ntid_w>; 3788 3789def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", 3790 int_ptx_read_laneid>; 3791def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", 3792 int_ptx_read_warpid>; 3793def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", 3794 int_ptx_read_nwarpid>; 3795 3796def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", 3797 int_ptx_read_ctaid_x>; 3798def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", 3799 int_ptx_read_ctaid_y>; 3800def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", 3801 int_ptx_read_ctaid_z>; 3802def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", 3803 int_ptx_read_ctaid_w>; 3804 3805def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", 3806 int_ptx_read_nctaid_x>; 3807def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", 3808 int_ptx_read_nctaid_y>; 3809def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", 3810 int_ptx_read_nctaid_z>; 3811def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", 3812 int_ptx_read_nctaid_w>; 3813 3814def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", 3815 int_ptx_read_smid>; 3816def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", 3817 int_ptx_read_nsmid>; 3818def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", 3819 int_ptx_read_gridid>; 3820 3821def PTX_READ_LANEMASK_EQ 3822 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; 3823def PTX_READ_LANEMASK_LE 3824 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; 3825def PTX_READ_LANEMASK_LT 3826 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; 3827def PTX_READ_LANEMASK_GE 3828 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; 3829def PTX_READ_LANEMASK_GT 3830 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; 3831 3832def PTX_READ_CLOCK 3833 : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; 3834def PTX_READ_CLOCK64 3835 : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; 3836 3837def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; 3838def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; 3839def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; 3840def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; 3841 3842// PTX Parallel Synchronization and Communication Intrinsics 3843 3844def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;", 3845 [(int_ptx_bar_sync imm:$i)]>; 3846