R600Instructions.td revision a76a0f74225802f4d3f11028ab54afe98b26302b
1//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// R600 Tablegen instruction definitions 11// 12//===----------------------------------------------------------------------===// 13 14include "R600Intrinsics.td" 15 16class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern, 17 InstrItinClass itin> 18 : AMDGPUInst <outs, ins, asm, pattern> { 19 20 field bits<32> Inst; 21 bit Trig = 0; 22 bit Op3 = 0; 23 bit isVector = 0; 24 25 let Inst = inst; 26 let Namespace = "AMDGPU"; 27 let OutOperandList = outs; 28 let InOperandList = ins; 29 let AsmString = asm; 30 let Pattern = pattern; 31 let Itinerary = itin; 32 33 let TSFlags{4} = Trig; 34 let TSFlags{5} = Op3; 35 36 // Vector instructions are instructions that must fill all slots in an 37 // instruction group 38 let TSFlags{6} = isVector; 39} 40 41class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : 42 AMDGPUInst <outs, ins, asm, pattern> 43{ 44 field bits<64> Inst; 45 46 let Namespace = "AMDGPU"; 47} 48 49def MEMxi : Operand<iPTR> { 50 let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); 51} 52 53def MEMrr : Operand<iPTR> { 54 let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); 55} 56 57def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; 58def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; 59def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; 60 61class R600_ALU { 62 63 bits<7> DST_GPR = 0; 64 bits<9> SRC0_SEL = 0; 65 bits<1> SRC0_NEG = 0; 66 bits<9> SRC1_SEL = 0; 67 bits<1> SRC1_NEG = 0; 68 bits<1> CLAMP = 0; 69 70} 71 72 73class R600_1OP <bits<32> inst, string opName, list<dag> pattern, 74 InstrItinClass itin = AnyALU> : 75 InstR600 <inst, 76 (outs R600_Reg32:$dst), 77 (ins R600_Reg32:$src, variable_ops), 78 !strconcat(opName, " $dst, $src"), 79 pattern, 80 itin 81 >; 82 83class R600_2OP <bits<32> inst, string opName, list<dag> pattern, 84 InstrItinClass itin = AnyALU> : 85 InstR600 <inst, 86 (outs R600_Reg32:$dst), 87 (ins R600_Reg32:$src0, R600_Reg32:$src1, variable_ops), 88 !strconcat(opName, " $dst, $src0, $src1"), 89 pattern, 90 itin 91 >; 92 93class R600_3OP <bits<32> inst, string opName, list<dag> pattern, 94 InstrItinClass itin = AnyALU> : 95 InstR600 <inst, 96 (outs R600_Reg32:$dst), 97 (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, variable_ops), 98 !strconcat(opName, " $dst, $src0, $src1, $src2"), 99 pattern, 100 itin>{ 101 102 let Op3 = 1; 103 } 104 105class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern, 106 InstrItinClass itin = VecALU> : 107 InstR600 <inst, 108 (outs R600_Reg32:$dst), 109 ins, 110 asm, 111 pattern, 112 itin 113 114 >; 115 116class R600_TEX <bits<32> inst, string opName, list<dag> pattern, 117 InstrItinClass itin = AnyALU> : 118 InstR600 <inst, 119 (outs R600_Reg128:$dst), 120 (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2), 121 !strconcat(opName, "$dst, $src0, $src1, $src2"), 122 pattern, 123 itin 124 >; 125 126def TEX_SHADOW : PatLeaf< 127 (imm), 128 [{uint32_t TType = (uint32_t)N->getZExtValue(); 129 return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12; 130 }] 131>; 132 133class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, 134 dag ins, string asm, list<dag> pattern> : 135 InstR600ISA <outs, ins, asm, pattern> 136{ 137 bits<7> RW_GPR; 138 bits<7> INDEX_GPR; 139 140 bits<2> RIM; 141 bits<2> TYPE; 142 bits<1> RW_REL; 143 bits<2> ELEM_SIZE; 144 145 bits<12> ARRAY_SIZE; 146 bits<4> COMP_MASK; 147 bits<4> BURST_COUNT; 148 bits<1> VPM; 149 bits<1> EOP; 150 bits<1> MARK; 151 bits<1> BARRIER; 152 153 /* CF_ALLOC_EXPORT_WORD0_RAT */ 154 let Inst{3-0} = rat_id; 155 let Inst{9-4} = rat_inst; 156 let Inst{10} = 0; /* Reserved */ 157 let Inst{12-11} = RIM; 158 let Inst{14-13} = TYPE; 159 let Inst{21-15} = RW_GPR; 160 let Inst{22} = RW_REL; 161 let Inst{29-23} = INDEX_GPR; 162 let Inst{31-30} = ELEM_SIZE; 163 164 /* CF_ALLOC_EXPORT_WORD1_BUF */ 165 let Inst{43-32} = ARRAY_SIZE; 166 let Inst{47-44} = COMP_MASK; 167 let Inst{51-48} = BURST_COUNT; 168 let Inst{52} = VPM; 169 let Inst{53} = EOP; 170 let Inst{61-54} = cf_inst; 171 let Inst{62} = MARK; 172 let Inst{63} = BARRIER; 173} 174 175/* 176def store_global : PatFrag<(ops node:$value, node:$ptr), 177 (store node:$value, node:$ptr), 178 [{ 179 const Value *Src; 180 const PointerType *Type; 181 if ((src = cast<StoreSDNode>(N)->getSrcValue() && 182 PT = dyn_cast<PointerType>(Src->getType()))) { 183 return PT->getAddressSpace() == 1; 184 } 185 return false; 186 }]>; 187 188*/ 189 190def load_param : PatFrag<(ops node:$ptr), 191 (load node:$ptr), 192 [{ 193 const Value *Src = cast<LoadSDNode>(N)->getSrcValue(); 194 if (Src) { 195 PointerType * PT = dyn_cast<PointerType>(Src->getType()); 196 return PT && PT->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS; 197 } 198 return false; 199 }]>; 200 201//class EG_CF <bits<32> inst, string asm> : 202// InstR600 <inst, (outs), (ins), asm, []>; 203 204/* XXX: We will use this when we emit the real ISA. 205 bits<24> ADDR = 0; 206 bits<3> JTS = 0; 207 208 bits<3> PC = 0; 209 bits<5> CF_CONS = 0; 210 bits<2> COND = 0; 211 bits<6> COUNT = 0; 212 bits<1> VPM = 0; 213 bits<1> EOP = 0; 214 bits<8> CF_INST = 0; 215 bits<1> WQM = 0; 216 bits<1> B = 0; 217 218 let Inst{23-0} = ADDR; 219 let Inst{26-24} = JTS; 220 let Inst{34-32} = PC; 221 let Inst{39-35} = CF_CONST; 222 let Inst{41-40} = COND; 223 let Inst{47-42} = COUNT; 224 let Inst{52} = VPM; 225 let Inst{53} = EOP; 226 let Inst{61-54} = CF_INST; 227 let Inst{62} = WQM; 228 let Inst{63} = B; 229//} 230*/ 231def isR600 : Predicate<"Subtarget.device()" 232 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; 233def isR700 : Predicate<"Subtarget.device()" 234 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" 235 "Subtarget.device()->getDeviceFlag()" 236 ">= OCL_DEVICE_RV710">; 237def isEG : Predicate<"Subtarget.device()" 238 "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " 239 "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; 240def isCayman : Predicate<"Subtarget.device()" 241 "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; 242def isEGorCayman : Predicate<"Subtarget.device()" 243 "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" 244 "|| Subtarget.device()->getGeneration() ==" 245 "AMDGPUDeviceInfo::HD6XXX">; 246 247def isR600toCayman : Predicate< 248 "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; 249 250 251let Predicates = [isR600toCayman] in { 252 253/* ------------------------------------------- */ 254/* Common Instructions R600, R700, Evergreen, Cayman */ 255/* ------------------------------------------- */ 256def ADD : R600_2OP < 257 0x0, "ADD", 258 [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))] 259>; 260 261// Non-IEEE MUL: 0 * anything = 0 262def MUL : R600_2OP < 263 0x1, "MUL NON-IEEE", 264 [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))] 265>; 266 267def MUL_IEEE : R600_2OP < 268 0x2, "MUL_IEEE", 269 [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))] 270>; 271 272def MAX : R600_2OP < 273 0x3, "MAX", 274 [(set R600_Reg32:$dst, (AMDGPUfmax R600_Reg32:$src0, R600_Reg32:$src1))] 275>; 276 277def MIN : R600_2OP < 278 0x4, "MIN", 279 [(set R600_Reg32:$dst, (AMDGPUfmin R600_Reg32:$src0, R600_Reg32:$src1))] 280>; 281 282/* For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, 283 * so some of the instruction names don't match the asm string. 284 * XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. 285 */ 286 287def SETE : R600_2OP < 288 0x08, "SETE", 289 [(set R600_Reg32:$dst, 290 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 291 COND_EQ))] 292>; 293 294def SGT : R600_2OP < 295 0x09, "SETGT", 296 [(set R600_Reg32:$dst, 297 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 298 COND_GT))] 299>; 300 301def SGE : R600_2OP < 302 0xA, "SETGE", 303 [(set R600_Reg32:$dst, 304 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 305 COND_GE))] 306>; 307 308def SNE : R600_2OP < 309 0xB, "SETNE", 310 [(set R600_Reg32:$dst, 311 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 312 COND_NE))] 313>; 314 315def FRACT : R600_1OP < 316 0x10, "FRACT", 317 [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))] 318>; 319 320def TRUNC : R600_1OP < 321 0x11, "TRUNC", 322 [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))] 323>; 324 325def CEIL : R600_1OP < 326 0x12, "CEIL", 327 [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))] 328>; 329 330def RNDNE : R600_1OP < 331 0x13, "RNDNE", 332 [(set R600_Reg32:$dst, (frint R600_Reg32:$src))] 333>; 334 335def FLOOR : R600_1OP < 336 0x14, "FLOOR", 337 [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))] 338>; 339 340def MOV : R600_1OP <0x19, "MOV", []>; 341 342class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19, 343 (outs R600_Reg32:$dst), 344 (ins R600_Reg32:$alu_literal, immType:$imm), 345 "MOV_IMM $dst, $imm", 346 [], AnyALU 347>; 348 349def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; 350def : Pat < 351 (imm:$val), 352 (MOV_IMM_I32 (i32 ALU_LITERAL_X), imm:$val) 353>; 354 355def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; 356def : Pat < 357 (fpimm:$val), 358 (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val) 359>; 360 361def KILLGT : R600_2OP < 362 0x2D, "KILLGT", 363 [] 364>; 365 366def AND_INT : R600_2OP < 367 0x30, "AND_INT", 368 [(set R600_Reg32:$dst, (and R600_Reg32:$src0, R600_Reg32:$src1))] 369>; 370 371def OR_INT : R600_2OP < 372 0x31, "OR_INT", 373 [(set R600_Reg32:$dst, (or R600_Reg32:$src0, R600_Reg32:$src1))] 374>; 375 376def XOR_INT : R600_2OP < 377 0x32, "XOR_INT", 378 [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))] 379>; 380 381def NOT_INT : R600_1OP < 382 0x33, "NOT_INT", 383 [(set R600_Reg32:$dst, (not R600_Reg32:$src))] 384>; 385 386def ADD_INT : R600_2OP < 387 0x34, "ADD_INT", 388 [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))] 389>; 390 391def SUB_INT : R600_2OP < 392 0x35, "SUB_INT", 393 [(set R600_Reg32:$dst, (sub R600_Reg32:$src0, R600_Reg32:$src1))] 394>; 395 396def MAX_INT : R600_2OP < 397 0x36, "MAX_INT", 398 [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]>; 399 400def MIN_INT : R600_2OP < 401 0x37, "MIN_INT", 402 [(set R600_Reg32:$dst, (AMDGPUsmin R600_Reg32:$src0, R600_Reg32:$src1))]>; 403 404def MAX_UINT : R600_2OP < 405 0x38, "MAX_UINT", 406 [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))] 407>; 408 409def MIN_UINT : R600_2OP < 410 0x39, "MIN_UINT", 411 [(set R600_Reg32:$dst, (AMDGPUumin R600_Reg32:$src0, R600_Reg32:$src1))] 412>; 413 414def SETE_INT : R600_2OP < 415 0x3A, "SETE_INT", 416 [(set (i32 R600_Reg32:$dst), 417 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] 418>; 419 420def SETGT_INT : R600_2OP < 421 0x3B, "SGT_INT", 422 [(set (i32 R600_Reg32:$dst), 423 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] 424>; 425 426def SETGE_INT : R600_2OP < 427 0x3C, "SETGE_INT", 428 [(set (i32 R600_Reg32:$dst), 429 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] 430>; 431 432def SETNE_INT : R600_2OP < 433 0x3D, "SETNE_INT", 434 [(set (i32 R600_Reg32:$dst), 435 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] 436>; 437 438def SETGT_UINT : R600_2OP < 439 0x3E, "SETGT_UINT", 440 [(set (i32 R600_Reg32:$dst), 441 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] 442>; 443 444def SETGE_UINT : R600_2OP < 445 0x3F, "SETGE_UINT", 446 [(set (i32 R600_Reg32:$dst), 447 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] 448>; 449 450def CNDE_INT : R600_3OP < 451 0x1C, "CNDE_INT", 452 [(set (i32 R600_Reg32:$dst), 453 (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] 454>; 455 456/* Texture instructions */ 457 458 459def TEX_LD : R600_TEX < 460 0x03, "TEX_LD", 461 [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$src4, imm:$src5))] 462> { 463let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $src4, $src5"; 464let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5); 465} 466 467def TEX_GET_TEXTURE_RESINFO : R600_TEX < 468 0x04, "TEX_GET_TEXTURE_RESINFO", 469 [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))] 470>; 471 472def TEX_GET_GRADIENTS_H : R600_TEX < 473 0x07, "TEX_GET_GRADIENTS_H", 474 [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))] 475>; 476 477def TEX_GET_GRADIENTS_V : R600_TEX < 478 0x08, "TEX_GET_GRADIENTS_V", 479 [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))] 480>; 481 482def TEX_SET_GRADIENTS_H : R600_TEX < 483 0x0B, "TEX_SET_GRADIENTS_H", 484 [] 485>; 486 487def TEX_SET_GRADIENTS_V : R600_TEX < 488 0x0C, "TEX_SET_GRADIENTS_V", 489 [] 490>; 491 492def TEX_SAMPLE : R600_TEX < 493 0x10, "TEX_SAMPLE", 494 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))] 495>; 496 497def TEX_SAMPLE_C : R600_TEX < 498 0x18, "TEX_SAMPLE_C", 499 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] 500>; 501 502def TEX_SAMPLE_L : R600_TEX < 503 0x11, "TEX_SAMPLE_L", 504 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))] 505>; 506 507def TEX_SAMPLE_C_L : R600_TEX < 508 0x19, "TEX_SAMPLE_C_L", 509 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] 510>; 511 512def TEX_SAMPLE_LB : R600_TEX < 513 0x12, "TEX_SAMPLE_LB", 514 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))] 515>; 516 517def TEX_SAMPLE_C_LB : R600_TEX < 518 0x1A, "TEX_SAMPLE_C_LB", 519 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] 520>; 521 522def TEX_SAMPLE_G : R600_TEX < 523 0x14, "TEX_SAMPLE_G", 524 [] 525>; 526 527def TEX_SAMPLE_C_G : R600_TEX < 528 0x1C, "TEX_SAMPLE_C_G", 529 [] 530>; 531 532/* Helper classes for common instructions */ 533 534class MUL_LIT_Common <bits<32> inst> : R600_3OP < 535 inst, "MUL_LIT", 536 [] 537>; 538 539class MULADD_Common <bits<32> inst> : R600_3OP < 540 inst, "MULADD", 541 [(set (f32 R600_Reg32:$dst), 542 (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] 543>; 544 545class CNDE_Common <bits<32> inst> : R600_3OP < 546 inst, "CNDE", 547 [(set (f32 R600_Reg32:$dst), 548 (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))] 549>; 550 551class CNDGT_Common <bits<32> inst> : R600_3OP < 552 inst, "CNDGT", 553 [] 554>; 555 556class CNDGE_Common <bits<32> inst> : R600_3OP < 557 inst, "CNDGE", 558 [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] 559>; 560 561class DOT4_Common <bits<32> inst> : R600_REDUCTION < 562 inst, 563 (ins R600_Reg128:$src0, R600_Reg128:$src1), 564 "DOT4 $dst $src0, $src1", 565 [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] 566>; 567 568class CUBE_Common <bits<32> inst> : InstR600 < 569 inst, 570 (outs R600_Reg128:$dst), 571 (ins R600_Reg128:$src), 572 "CUBE $dst $src", 573 [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], 574 VecALU 575>; 576 577class EXP_IEEE_Common <bits<32> inst> : R600_1OP < 578 inst, "EXP_IEEE", 579 [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))] 580>; 581 582class FLT_TO_INT_Common <bits<32> inst> : R600_1OP < 583 inst, "FLT_TO_INT", 584 [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] 585>; 586 587class INT_TO_FLT_Common <bits<32> inst> : R600_1OP < 588 inst, "INT_TO_FLT", 589 [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] 590>; 591 592class FLT_TO_UINT_Common <bits<32> inst> : R600_1OP < 593 inst, "FLT_TO_UINT", 594 [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))] 595>; 596 597class UINT_TO_FLT_Common <bits<32> inst> : R600_1OP < 598 inst, "UINT_TO_FLT", 599 [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))] 600>; 601 602class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP < 603 inst, "LOG_CLAMPED", 604 [] 605>; 606 607class LOG_IEEE_Common <bits<32> inst> : R600_1OP < 608 inst, "LOG_IEEE", 609 [(set R600_Reg32:$dst, (int_AMDIL_log R600_Reg32:$src))] 610>; 611 612class LSHL_Common <bits<32> inst> : R600_2OP < 613 inst, "LSHL $dst, $src0, $src1", 614 [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))] 615>; 616 617class LSHR_Common <bits<32> inst> : R600_2OP < 618 inst, "LSHR $dst, $src0, $src1", 619 [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))] 620>; 621 622class ASHR_Common <bits<32> inst> : R600_2OP < 623 inst, "ASHR $dst, $src0, $src1", 624 [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))] 625>; 626 627class MULHI_INT_Common <bits<32> inst> : R600_2OP < 628 inst, "MULHI_INT $dst, $src0, $src1", 629 [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))] 630>; 631 632class MULHI_UINT_Common <bits<32> inst> : R600_2OP < 633 inst, "MULHI $dst, $src0, $src1", 634 [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))] 635>; 636 637class MULLO_INT_Common <bits<32> inst> : R600_2OP < 638 inst, "MULLO_INT $dst, $src0, $src1", 639 [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))] 640>; 641 642class MULLO_UINT_Common <bits<32> inst> : R600_2OP < 643 inst, "MULLO_UINT $dst, $src0, $src1", 644 [] 645>; 646 647class RECIP_CLAMPED_Common <bits<32> inst> : R600_1OP < 648 inst, "RECIP_CLAMPED", 649 [] 650>; 651 652class RECIP_IEEE_Common <bits<32> inst> : R600_1OP < 653 inst, "RECIP_IEEE", 654 [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))] 655>; 656 657class RECIP_UINT_Common <bits<32> inst> : R600_1OP < 658 inst, "RECIP_INT $dst, $src", 659 [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] 660>; 661 662class RECIPSQRT_CLAMPED_Common <bits<32> inst> : R600_1OP < 663 inst, "RECIPSQRT_CLAMPED", 664 [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))] 665>; 666 667class RECIPSQRT_IEEE_Common <bits<32> inst> : R600_1OP < 668 inst, "RECIPSQRT_IEEE", 669 [] 670>; 671 672class SIN_Common <bits<32> inst> : R600_1OP < 673 inst, "SIN", []>{ 674 let Trig = 1; 675} 676 677class COS_Common <bits<32> inst> : R600_1OP < 678 inst, "COS", []> { 679 let Trig = 1; 680} 681 682/* Helper patterns for complex intrinsics */ 683/* -------------------------------------- */ 684 685class DIV_Common <InstR600 recip_ieee> : Pat< 686 (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), 687 (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) 688>; 689 690class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat < 691 (int_AMDGPU_ssg R600_Reg32:$src), 692 (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE))) 693>; 694 695class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat < 696 (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), 697 (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) 698>; 699 700/* ---------------------- */ 701/* R600 / R700 Only Instructions */ 702/* ---------------------- */ 703 704let Predicates = [isR600] in { 705 706 def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; 707 def MULADD_r600 : MULADD_Common<0x10>; 708 def CNDE_r600 : CNDE_Common<0x18>; 709 def CNDGT_r600 : CNDGT_Common<0x19>; 710 def CNDGE_r600 : CNDGE_Common<0x1A>; 711 def DOT4_r600 : DOT4_Common<0x50>; 712 def CUBE_r600 : CUBE_Common<0x52>; 713 def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; 714 def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; 715 def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; 716 def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; 717 def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; 718 def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; 719 def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; 720 def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; 721 def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; 722 def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; 723 def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; 724 def SIN_r600 : SIN_Common<0x6E>; 725 def COS_r600 : COS_Common<0x6F>; 726 def ASHR_r600 : ASHR_Common<0x70>; 727 def LSHR_r600 : LSHR_Common<0x71>; 728 def LSHL_r600 : LSHL_Common<0x72>; 729 def MULLO_INT_r600 : MULLO_INT_Common<0x73>; 730 def MULHI_INT_r600 : MULHI_INT_Common<0x74>; 731 def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; 732 def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; 733 def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; 734 735 def DIV_r600 : DIV_Common<RECIP_IEEE_r600>; 736 def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600, MUL, GPRF32>; 737 def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>; 738 def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; 739 740} 741 742// Helper pattern for normalizing inputs to triginomic instructions for R700+ 743// cards. 744class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat< 745 (intr R600_Reg32:$src), 746 (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src)) 747>; 748 749//===----------------------------------------------------------------------===// 750// R700 Only instructions 751//===----------------------------------------------------------------------===// 752 753let Predicates = [isR700] in { 754 def SIN_r700 : SIN_Common<0x6E>; 755 def COS_r700 : COS_Common<0x6F>; 756 757 // R700 normalizes inputs to SIN/COS the same as EG 758 def : TRIG_eg <SIN_r700, int_AMDGPU_sin>; 759 def : TRIG_eg <COS_r700, int_AMDGPU_cos>; 760} 761 762//===----------------------------------------------------------------------===// 763// Evergreen Only instructions 764//===----------------------------------------------------------------------===// 765 766let Predicates = [isEG] in { 767 768def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; 769 770def MULLO_INT_eg : MULLO_INT_Common<0x8F>; 771def MULHI_INT_eg : MULHI_INT_Common<0x90>; 772def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; 773def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; 774def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; 775 776} // End Predicates = [isEG] 777 778/* ------------------------------- */ 779/* Evergreen / Cayman Instructions */ 780/* ------------------------------- */ 781 782let Predicates = [isEGorCayman] in { 783 784 // BFE_UINT - bit_extract, an optimization for mask and shift 785 // Src0 = Input 786 // Src1 = Offset 787 // Src2 = Width 788 // 789 // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) 790 // 791 // Example Usage: 792 // (Offset, Width) 793 // 794 // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 795 // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 796 // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 797 // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 798 def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", 799 [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, 800 R600_Reg32:$src1, 801 R600_Reg32:$src2))], 802 VecALU 803 >; 804 805 def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", 806 [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, 807 R600_Reg32:$src2))], 808 VecALU 809 >; 810 811 def MULADD_eg : MULADD_Common<0x14>; 812 def ASHR_eg : ASHR_Common<0x15>; 813 def LSHR_eg : LSHR_Common<0x16>; 814 def LSHL_eg : LSHL_Common<0x17>; 815 def CNDE_eg : CNDE_Common<0x19>; 816 def CNDGT_eg : CNDGT_Common<0x1A>; 817 def CNDGE_eg : CNDGE_Common<0x1B>; 818 def MUL_LIT_eg : MUL_LIT_Common<0x1F>; 819 def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; 820 def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; 821 def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; 822 def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; 823 def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; 824 def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; 825 def SIN_eg : SIN_Common<0x8D>; 826 def COS_eg : COS_Common<0x8E>; 827 def DOT4_eg : DOT4_Common<0xBE>; 828 def CUBE_eg : CUBE_Common<0xC0>; 829 830 def DIV_eg : DIV_Common<RECIP_IEEE_eg>; 831 def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>; 832 def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>; 833 def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; 834 835 def : TRIG_eg <SIN_eg, int_AMDGPU_sin>; 836 def : TRIG_eg <COS_eg, int_AMDGPU_cos>; 837 838 def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { 839 let Pattern = []; 840 } 841 842 def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; 843 844 def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { 845 let Pattern = []; 846 } 847 848 def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; 849 850 def : Pat<(fp_to_sint R600_Reg32:$src), 851 (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>; 852 853 def : Pat<(fp_to_uint R600_Reg32:$src), 854 (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>; 855 856//===----------------------------------------------------------------------===// 857// Memory read/write instructions 858//===----------------------------------------------------------------------===// 859 860let usesCustomInserter = 1 in { 861 862def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), 863 (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), 864 "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr", 865 [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]> 866{ 867 let RIM = 0; 868 /* XXX: Have a separate instruction for non-indexed writes. */ 869 let TYPE = 1; 870 let RW_REL = 0; 871 let ELEM_SIZE = 0; 872 873 let ARRAY_SIZE = 0; 874 let COMP_MASK = 1; 875 let BURST_COUNT = 0; 876 let VPM = 0; 877 let EOP = 0; 878 let MARK = 0; 879 let BARRIER = 1; 880} 881 882} // End usesCustomInserter = 1 883 884// Floating point global_store 885def : Pat < 886 (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), 887 (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr) 888>; 889 890class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern> 891 : InstR600ISA <outs, (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", pattern> { 892 893 // Operands 894 bits<7> DST_GPR; 895 bits<7> SRC_GPR; 896 897 // Static fields 898 bits<5> VC_INST = 0; 899 bits<2> FETCH_TYPE = 2; 900 bits<1> FETCH_WHOLE_QUAD = 0; 901 bits<8> BUFFER_ID = buffer_id; 902 bits<1> SRC_REL = 0; 903 // XXX: We can infer this field based on the SRC_GPR. This would allow us 904 // to store vertex addresses in any channel, not just X. 905 bits<2> SRC_SEL_X = 0; 906 bits<6> MEGA_FETCH_COUNT; 907 bits<1> DST_REL = 0; 908 bits<3> DST_SEL_X; 909 bits<3> DST_SEL_Y; 910 bits<3> DST_SEL_Z; 911 bits<3> DST_SEL_W; 912 // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, 913 // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, 914 // however, based on my testing if USE_CONST_FIELDS is set, then all 915 // these fields need to be set to 0. 916 bits<1> USE_CONST_FIELDS = 0; 917 bits<6> DATA_FORMAT; 918 bits<2> NUM_FORMAT_ALL = 1; 919 bits<1> FORMAT_COMP_ALL = 0; 920 bits<1> SRF_MODE_ALL = 0; 921 922 // LLVM can only encode 64-bit instructions, so these fields are manually 923 // encoded in R600CodeEmitter 924 // 925 // bits<16> OFFSET; 926 // bits<2> ENDIAN_SWAP = 0; 927 // bits<1> CONST_BUF_NO_STRIDE = 0; 928 // bits<1> MEGA_FETCH = 0; 929 // bits<1> ALT_CONST = 0; 930 // bits<2> BUFFER_INDEX_MODE = 0; 931 932 // VTX_WORD0 933 let Inst{4-0} = VC_INST; 934 let Inst{6-5} = FETCH_TYPE; 935 let Inst{7} = FETCH_WHOLE_QUAD; 936 let Inst{15-8} = BUFFER_ID; 937 let Inst{22-16} = SRC_GPR; 938 let Inst{23} = SRC_REL; 939 let Inst{25-24} = SRC_SEL_X; 940 let Inst{31-26} = MEGA_FETCH_COUNT; 941 942 // VTX_WORD1_GPR 943 let Inst{38-32} = DST_GPR; 944 let Inst{39} = DST_REL; 945 let Inst{40} = 0; // Reserved 946 let Inst{43-41} = DST_SEL_X; 947 let Inst{46-44} = DST_SEL_Y; 948 let Inst{49-47} = DST_SEL_Z; 949 let Inst{52-50} = DST_SEL_W; 950 let Inst{53} = USE_CONST_FIELDS; 951 let Inst{59-54} = DATA_FORMAT; 952 let Inst{61-60} = NUM_FORMAT_ALL; 953 let Inst{62} = FORMAT_COMP_ALL; 954 let Inst{63} = SRF_MODE_ALL; 955 956 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 957 // is done in R600CodeEmitter 958 // 959 // Inst{79-64} = OFFSET; 960 // Inst{81-80} = ENDIAN_SWAP; 961 // Inst{82} = CONST_BUF_NO_STRIDE; 962 // Inst{83} = MEGA_FETCH; 963 // Inst{84} = ALT_CONST; 964 // Inst{86-85} = BUFFER_INDEX_MODE; 965 // Inst{95-86} = 0; Reserved 966 967 // VTX_WORD3 (Padding) 968 // 969 // Inst{127-96} = 0; 970} 971 972class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> 973 : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> { 974 975 let MEGA_FETCH_COUNT = 4; 976 let DST_SEL_X = 0; 977 let DST_SEL_Y = 7; // Masked 978 let DST_SEL_Z = 7; // Masked 979 let DST_SEL_W = 7; // Masked 980 let DATA_FORMAT = 0xD; // COLOR_32 981 982 // This is not really necessary, but there were some GPU hangs that appeared 983 // to be caused by ALU instructions in the next instruction group that wrote 984 // to the $ptr registers of the VTX_READ. 985 // e.g. 986 // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24 987 // %T2_X<def> = MOV %ZERO 988 //Adding this constraint prevents this from happening. 989 let Constraints = "$ptr.ptr = $dst"; 990} 991 992class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> 993 : VTX_READ_eg <buffer_id, (outs R600_Reg128:$dst), pattern> { 994 995 let MEGA_FETCH_COUNT = 16; 996 let DST_SEL_X = 0; 997 let DST_SEL_Y = 1; 998 let DST_SEL_Z = 2; 999 let DST_SEL_W = 3; 1000 let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1001 1002 // XXX: Need to force VTX_READ_128 instructions to write to the same register 1003 // that holds its buffer address to avoid potential hangs. We can't use 1004 // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst 1005 // registers are different sizes. 1006} 1007 1008//===----------------------------------------------------------------------===// 1009// VTX Read from parameter memory space 1010//===----------------------------------------------------------------------===// 1011 1012class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0, 1013 [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] 1014>; 1015 1016def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>; 1017def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>; 1018 1019 1020//===----------------------------------------------------------------------===// 1021// VTX Read from global memory space 1022//===----------------------------------------------------------------------===// 1023 1024// 32-bit reads 1025 1026class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1, 1027 [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] 1028>; 1029 1030def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>; 1031def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>; 1032 1033// 128-bit reads 1034 1035class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1, 1036 [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] 1037>; 1038 1039def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>; 1040def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>; 1041 1042} 1043 1044let Predicates = [isCayman] in { 1045 1046let isVector = 1 in { 1047 1048def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; 1049 1050def MULLO_INT_cm : MULLO_INT_Common<0x8F>; 1051def MULHI_INT_cm : MULHI_INT_Common<0x90>; 1052def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; 1053def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; 1054 1055} // End isVector = 1 1056 1057// RECIP_UINT emulation for Cayman 1058def : Pat < 1059 (AMDGPUurecip R600_Reg32:$src0), 1060 (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), 1061 (MOV_IMM_I32 (i32 ALU_LITERAL_X), 0x4f800000))) 1062>; 1063 1064} // End isCayman 1065 1066/* Other Instructions */ 1067 1068let isCodeGenOnly = 1 in { 1069/* 1070 def SWIZZLE : AMDGPUShaderInst < 1071 (outs GPRV4F32:$dst), 1072 (ins GPRV4F32:$src0, i32imm:$src1), 1073 "SWIZZLE $dst, $src0, $src1", 1074 [(set GPRV4F32:$dst, (int_AMDGPU_swizzle GPRV4F32:$src0, imm:$src1))] 1075 >; 1076*/ 1077 1078 def LAST : AMDGPUShaderInst < 1079 (outs), 1080 (ins), 1081 "LAST", 1082 [] 1083 >; 1084 1085 def GET_CHAN : AMDGPUShaderInst < 1086 (outs R600_Reg32:$dst), 1087 (ins R600_Reg128:$src0, i32imm:$src1), 1088 "GET_CHAN $dst, $src0, $src1", 1089 [] 1090 >; 1091 1092 def MULLIT : AMDGPUShaderInst < 1093 (outs R600_Reg128:$dst), 1094 (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2), 1095 "MULLIT $dst, $src0, $src1", 1096 [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] 1097 >; 1098 1099let usesCustomInserter = 1, isPseudo = 1 in { 1100 1101class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst < 1102 (outs R600_TReg32:$dst), 1103 (ins), 1104 asm, 1105 [(set R600_TReg32:$dst, (intr))] 1106>; 1107 1108def TGID_X : R600PreloadInst <"TGID_X", int_r600_read_tgid_x>; 1109def TGID_Y : R600PreloadInst <"TGID_Y", int_r600_read_tgid_y>; 1110def TGID_Z : R600PreloadInst <"TGID_Z", int_r600_read_tgid_z>; 1111 1112def TIDIG_X : R600PreloadInst <"TIDIG_X", int_r600_read_tidig_x>; 1113def TIDIG_Y : R600PreloadInst <"TIDIG_Y", int_r600_read_tidig_y>; 1114def TIDIG_Z : R600PreloadInst <"TIDIG_Z", int_r600_read_tidig_z>; 1115 1116def NGROUPS_X : R600PreloadInst <"NGROUPS_X", int_r600_read_ngroups_x>; 1117def NGROUPS_Y : R600PreloadInst <"NGROUPS_Y", int_r600_read_ngroups_y>; 1118def NGROUPS_Z : R600PreloadInst <"NGROUPS_Z", int_r600_read_ngroups_z>; 1119 1120def GLOBAL_SIZE_X : R600PreloadInst <"GLOBAL_SIZE_X", 1121 int_r600_read_global_size_x>; 1122def GLOBAL_SIZE_Y : R600PreloadInst <"GLOBAL_SIZE_Y", 1123 int_r600_read_global_size_y>; 1124def GLOBAL_SIZE_Z : R600PreloadInst <"GLOBAL_SIZE_Z", 1125 int_r600_read_global_size_z>; 1126 1127def LOCAL_SIZE_X : R600PreloadInst <"LOCAL_SIZE_X", 1128 int_r600_read_local_size_x>; 1129def LOCAL_SIZE_Y : R600PreloadInst <"LOCAL_SIZE_Y", 1130 int_r600_read_local_size_y>; 1131def LOCAL_SIZE_Z : R600PreloadInst <"LOCAL_SIZE_Z", 1132 int_r600_read_local_size_z>; 1133 1134def R600_LOAD_CONST : AMDGPUShaderInst < 1135 (outs R600_Reg32:$dst), 1136 (ins i32imm:$src0), 1137 "R600_LOAD_CONST $dst, $src0", 1138 [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] 1139>; 1140 1141def LOAD_INPUT : AMDGPUShaderInst < 1142 (outs R600_Reg32:$dst), 1143 (ins i32imm:$src), 1144 "LOAD_INPUT $dst, $src", 1145 [(set R600_Reg32:$dst, (int_R600_load_input imm:$src))] 1146>; 1147 1148def RESERVE_REG : AMDGPUShaderInst < 1149 (outs), 1150 (ins i32imm:$src), 1151 "RESERVE_REG $src", 1152 [(int_AMDGPU_reserve_reg imm:$src)] 1153>; 1154 1155def STORE_OUTPUT: AMDGPUShaderInst < 1156 (outs), 1157 (ins R600_Reg32:$src0, i32imm:$src1), 1158 "STORE_OUTPUT $src0, $src1", 1159 [(int_AMDGPU_store_output R600_Reg32:$src0, imm:$src1)] 1160>; 1161 1162def TXD: AMDGPUShaderInst < 1163 (outs R600_Reg128:$dst), 1164 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), 1165 "TXD $dst, $src0, $src1, $src2, $src3, $src4", 1166 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))] 1167>; 1168 1169def TXD_SHADOW: AMDGPUShaderInst < 1170 (outs R600_Reg128:$dst), 1171 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), 1172 "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4", 1173 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))] 1174>; 1175 1176} // End usesCustomInserter = 1, isPseudo = 1 1177 1178} // End isCodeGenOnly = 1 1179 1180def CLAMP_R600 : CLAMP <R600_Reg32>; 1181def FABS_R600 : FABS<R600_Reg32>; 1182def FNEG_R600 : FNEG<R600_Reg32>; 1183 1184let usesCustomInserter = 1 in { 1185 1186def MASK_WRITE : AMDGPUShaderInst < 1187 (outs), 1188 (ins R600_Reg32:$src), 1189 "MASK_WRITE $src", 1190 [] 1191>; 1192 1193} // End usesCustomInserter = 1 1194 1195//===----------------------------------------------------------------------===// 1196// ISel Patterns 1197//===----------------------------------------------------------------------===// 1198 1199// KIL Patterns 1200def KILP : Pat < 1201 (int_AMDGPU_kilp), 1202 (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) 1203>; 1204 1205def KIL : Pat < 1206 (int_AMDGPU_kill R600_Reg32:$src0), 1207 (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) 1208>; 1209 1210// SGT Reverse args 1211def : Pat < 1212 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), 1213 (SGT R600_Reg32:$src1, R600_Reg32:$src0) 1214>; 1215 1216// SGE Reverse args 1217def : Pat < 1218 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), 1219 (SGE R600_Reg32:$src1, R600_Reg32:$src0) 1220>; 1221 1222// SETGT_INT reverse args 1223def : Pat < 1224 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), 1225 (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) 1226>; 1227 1228// SETGE_INT reverse args 1229def : Pat < 1230 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), 1231 (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) 1232>; 1233 1234// SETGT_UINT reverse args 1235def : Pat < 1236 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), 1237 (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) 1238>; 1239 1240// SETGE_UINT reverse args 1241def : Pat < 1242 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), 1243 (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1) 1244>; 1245 1246// The next two patterns are special cases for handling 'true if ordered' and 1247// 'true if unordered' conditionals. The assumption here is that the behavior of 1248// SETE and SNE conforms to the Direct3D 10 rules for floating point values 1249// described here: 1250// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit 1251// We assume that SETE returns false when one of the operands is NAN and 1252// SNE returns true when on of the operands is NAN 1253 1254//SETE - 'true if ordered' 1255def : Pat < 1256 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), 1257 (SETE R600_Reg32:$src0, R600_Reg32:$src1) 1258>; 1259 1260//SNE - 'true if unordered' 1261def : Pat < 1262 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), 1263 (SNE R600_Reg32:$src0, R600_Reg32:$src1) 1264>; 1265 1266def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; 1267def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; 1268def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; 1269def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; 1270 1271def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 4, sel_x>; 1272def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>; 1273def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>; 1274def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>; 1275 1276def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; 1277def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; 1278def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; 1279def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; 1280 1281def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 4, sel_x>; 1282def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>; 1283def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>; 1284def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>; 1285 1286def : Vector_Build <v4f32, R600_Reg32>; 1287def : Vector_Build <v4i32, R600_Reg32>; 1288 1289// bitconvert patterns 1290 1291def : BitConvert <i32, f32, R600_Reg32>; 1292def : BitConvert <f32, i32, R600_Reg32>; 1293def : BitConvert <v4f32, v4i32, R600_Reg128>; 1294 1295} // End isR600toCayman Predicate 1296