R600Instructions.td revision a2b4eb6d15a13de257319ac6231b5ab622cd02b1
1//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// R600 Tablegen instruction definitions 11// 12//===----------------------------------------------------------------------===// 13 14include "R600Intrinsics.td" 15include "R600InstrFormats.td" 16 17class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : 18 InstR600 <outs, ins, asm, pattern, NullALU> { 19 20 let Namespace = "AMDGPU"; 21} 22 23def MEMxi : Operand<iPTR> { 24 let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); 25 let PrintMethod = "printMemOperand"; 26} 27 28def MEMrr : Operand<iPTR> { 29 let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); 30} 31 32// Operands for non-registers 33 34class InstFlag<string PM = "printOperand", int Default = 0> 35 : OperandWithDefaultOps <i32, (ops (i32 Default))> { 36 let PrintMethod = PM; 37} 38 39// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers 40def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> { 41 let PrintMethod = "printSel"; 42} 43def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> { 44 let PrintMethod = "printBankSwizzle"; 45} 46 47def LITERAL : InstFlag<"printLiteral">; 48 49def WRITE : InstFlag <"printWrite", 1>; 50def OMOD : InstFlag <"printOMOD">; 51def REL : InstFlag <"printRel">; 52def CLAMP : InstFlag <"printClamp">; 53def NEG : InstFlag <"printNeg">; 54def ABS : InstFlag <"printAbs">; 55def UEM : InstFlag <"printUpdateExecMask">; 56def UP : InstFlag <"printUpdatePred">; 57 58// XXX: The r600g finalizer in Mesa expects last to be one in most cases. 59// Once we start using the packetizer in this backend we should have this 60// default to 0. 61def LAST : InstFlag<"printLast", 1>; 62def RSel : Operand<i32> { 63 let PrintMethod = "printRSel"; 64} 65def CT: Operand<i32> { 66 let PrintMethod = "printCT"; 67} 68 69def FRAMEri : Operand<iPTR> { 70 let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); 71} 72 73def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; 74def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; 75def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; 76def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; 77def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; 78 79 80def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), 81 (ops PRED_SEL_OFF)>; 82 83 84let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 85 86// Class for instructions with only one source register. 87// If you add new ins to this instruction, make sure they are listed before 88// $literal, because the backend currently assumes that the last operand is 89// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in 90// R600Defines.h, R600InstrInfo::buildDefaultInstruction(), 91// and R600InstrInfo::getOperandIdx(). 92class R600_1OP <bits<11> inst, string opName, list<dag> pattern, 93 InstrItinClass itin = AnyALU> : 94 InstR600 <(outs R600_Reg32:$dst), 95 (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 96 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 97 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 98 BANK_SWIZZLE:$bank_swizzle), 99 !strconcat(" ", opName, 100 "$clamp $last $dst$write$dst_rel$omod, " 101 "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " 102 "$pred_sel $bank_swizzle"), 103 pattern, 104 itin>, 105 R600ALU_Word0, 106 R600ALU_Word1_OP2 <inst> { 107 108 let src1 = 0; 109 let src1_rel = 0; 110 let src1_neg = 0; 111 let src1_abs = 0; 112 let update_exec_mask = 0; 113 let update_pred = 0; 114 let HasNativeOperands = 1; 115 let Op1 = 1; 116 let ALUInst = 1; 117 let DisableEncoding = "$literal"; 118 let UseNamedOperandTable = 1; 119 120 let Inst{31-0} = Word0; 121 let Inst{63-32} = Word1; 122} 123 124class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 125 InstrItinClass itin = AnyALU> : 126 R600_1OP <inst, opName, 127 [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] 128>; 129 130// If you add or change the operands for R600_2OP instructions, you must 131// also update the R600Op2OperandIndex::ROI enum in R600Defines.h, 132// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). 133class R600_2OP <bits<11> inst, string opName, list<dag> pattern, 134 InstrItinClass itin = AnyALU> : 135 InstR600 <(outs R600_Reg32:$dst), 136 (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, 137 OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 138 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 139 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, 140 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 141 BANK_SWIZZLE:$bank_swizzle), 142 !strconcat(" ", opName, 143 "$clamp $last $update_exec_mask$update_pred$dst$write$dst_rel$omod, " 144 "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " 145 "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " 146 "$pred_sel $bank_swizzle"), 147 pattern, 148 itin>, 149 R600ALU_Word0, 150 R600ALU_Word1_OP2 <inst> { 151 152 let HasNativeOperands = 1; 153 let Op2 = 1; 154 let ALUInst = 1; 155 let DisableEncoding = "$literal"; 156 let UseNamedOperandTable = 1; 157 158 let Inst{31-0} = Word0; 159 let Inst{63-32} = Word1; 160} 161 162class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 163 InstrItinClass itim = AnyALU> : 164 R600_2OP <inst, opName, 165 [(set R600_Reg32:$dst, (node R600_Reg32:$src0, 166 R600_Reg32:$src1))] 167>; 168 169// If you add our change the operands for R600_3OP instructions, you must 170// also update the R600Op3OperandIndex::ROI enum in R600Defines.h, 171// R600InstrInfo::buildDefaultInstruction(), and 172// R600InstrInfo::getOperandIdx(). 173class R600_3OP <bits<5> inst, string opName, list<dag> pattern, 174 InstrItinClass itin = AnyALU> : 175 InstR600 <(outs R600_Reg32:$dst), 176 (ins REL:$dst_rel, CLAMP:$clamp, 177 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, 178 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, 179 R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, 180 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 181 BANK_SWIZZLE:$bank_swizzle), 182 !strconcat(" ", opName, "$clamp $last $dst$dst_rel, " 183 "$src0_neg$src0$src0_rel, " 184 "$src1_neg$src1$src1_rel, " 185 "$src2_neg$src2$src2_rel, " 186 "$pred_sel" 187 "$bank_swizzle"), 188 pattern, 189 itin>, 190 R600ALU_Word0, 191 R600ALU_Word1_OP3<inst>{ 192 193 let HasNativeOperands = 1; 194 let DisableEncoding = "$literal"; 195 let Op3 = 1; 196 let UseNamedOperandTable = 1; 197 let ALUInst = 1; 198 199 let Inst{31-0} = Word0; 200 let Inst{63-32} = Word1; 201} 202 203class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, 204 InstrItinClass itin = VecALU> : 205 InstR600 <(outs R600_Reg32:$dst), 206 ins, 207 asm, 208 pattern, 209 itin>; 210 211 212 213} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 214 215def TEX_SHADOW : PatLeaf< 216 (imm), 217 [{uint32_t TType = (uint32_t)N->getZExtValue(); 218 return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); 219 }] 220>; 221 222def TEX_RECT : PatLeaf< 223 (imm), 224 [{uint32_t TType = (uint32_t)N->getZExtValue(); 225 return TType == 5; 226 }] 227>; 228 229def TEX_ARRAY : PatLeaf< 230 (imm), 231 [{uint32_t TType = (uint32_t)N->getZExtValue(); 232 return TType == 9 || TType == 10 || TType == 16; 233 }] 234>; 235 236def TEX_SHADOW_ARRAY : PatLeaf< 237 (imm), 238 [{uint32_t TType = (uint32_t)N->getZExtValue(); 239 return TType == 11 || TType == 12 || TType == 17; 240 }] 241>; 242 243def TEX_MSAA : PatLeaf< 244 (imm), 245 [{uint32_t TType = (uint32_t)N->getZExtValue(); 246 return TType == 14; 247 }] 248>; 249 250def TEX_ARRAY_MSAA : PatLeaf< 251 (imm), 252 [{uint32_t TType = (uint32_t)N->getZExtValue(); 253 return TType == 15; 254 }] 255>; 256 257class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask, 258 dag outs, dag ins, string asm, list<dag> pattern> : 259 InstR600ISA <outs, ins, asm, pattern>, 260 CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF { 261 262 let rat_id = ratid; 263 let rat_inst = ratinst; 264 let rim = 0; 265 // XXX: Have a separate instruction for non-indexed writes. 266 let type = 1; 267 let rw_rel = 0; 268 let elem_size = 0; 269 270 let array_size = 0; 271 let comp_mask = mask; 272 let burst_count = 0; 273 let vpm = 0; 274 let cf_inst = cfinst; 275 let mark = 0; 276 let barrier = 1; 277 278 let Inst{31-0} = Word0; 279 let Inst{63-32} = Word1; 280 let IsExport = 1; 281 282} 283 284class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 285 : InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>, 286 VTX_WORD1_GPR { 287 288 // Static fields 289 let DST_REL = 0; 290 // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, 291 // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, 292 // however, based on my testing if USE_CONST_FIELDS is set, then all 293 // these fields need to be set to 0. 294 let USE_CONST_FIELDS = 0; 295 let NUM_FORMAT_ALL = 1; 296 let FORMAT_COMP_ALL = 0; 297 let SRF_MODE_ALL = 0; 298 299 let Inst{63-32} = Word1; 300 // LLVM can only encode 64-bit instructions, so these fields are manually 301 // encoded in R600CodeEmitter 302 // 303 // bits<16> OFFSET; 304 // bits<2> ENDIAN_SWAP = 0; 305 // bits<1> CONST_BUF_NO_STRIDE = 0; 306 // bits<1> MEGA_FETCH = 0; 307 // bits<1> ALT_CONST = 0; 308 // bits<2> BUFFER_INDEX_MODE = 0; 309 310 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 311 // is done in R600CodeEmitter 312 // 313 // Inst{79-64} = OFFSET; 314 // Inst{81-80} = ENDIAN_SWAP; 315 // Inst{82} = CONST_BUF_NO_STRIDE; 316 // Inst{83} = MEGA_FETCH; 317 // Inst{84} = ALT_CONST; 318 // Inst{86-85} = BUFFER_INDEX_MODE; 319 // Inst{95-86} = 0; Reserved 320 321 // VTX_WORD3 (Padding) 322 // 323 // Inst{127-96} = 0; 324 325 let VTXInst = 1; 326} 327 328class LoadParamFrag <PatFrag load_type> : PatFrag < 329 (ops node:$ptr), (load_type node:$ptr), 330 [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }] 331>; 332 333def load_param : LoadParamFrag<load>; 334def load_param_exti8 : LoadParamFrag<az_extloadi8>; 335def load_param_exti16 : LoadParamFrag<az_extloadi16>; 336 337def isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">; 338def isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">; 339def isEG : Predicate< 340 "Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && " 341 "Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && " 342 "!Subtarget.hasCaymanISA()">; 343 344def isCayman : Predicate<"Subtarget.hasCaymanISA()">; 345def isEGorCayman : Predicate<"Subtarget.getGeneration() == " 346 "AMDGPUSubtarget::EVERGREEN" 347 "|| Subtarget.getGeneration() ==" 348 "AMDGPUSubtarget::NORTHERN_ISLANDS">; 349 350def isR600toCayman : Predicate< 351 "Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">; 352 353//===----------------------------------------------------------------------===// 354// R600 SDNodes 355//===----------------------------------------------------------------------===// 356 357def INTERP_PAIR_XY : AMDGPUShaderInst < 358 (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), 359 (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), 360 "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", 361 []>; 362 363def INTERP_PAIR_ZW : AMDGPUShaderInst < 364 (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), 365 (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), 366 "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", 367 []>; 368 369def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", 370 SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, 371 [SDNPVariadic] 372>; 373 374def DOT4 : SDNode<"AMDGPUISD::DOT4", 375 SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>, 376 SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>, 377 SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>, 378 [] 379>; 380 381def COS_HW : SDNode<"AMDGPUISD::COS_HW", 382 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> 383>; 384 385def SIN_HW : SDNode<"AMDGPUISD::SIN_HW", 386 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> 387>; 388 389def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; 390 391def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; 392 393multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> { 394def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, 395 (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw), 396 (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz), 397 (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z), 398 (i32 imm:$DST_SEL_W), 399 (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID), 400 (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z), 401 (i32 imm:$COORD_TYPE_W)), 402 (inst R600_Reg128:$SRC_GPR, 403 imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw, 404 imm:$offsetx, imm:$offsety, imm:$offsetz, 405 imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z, 406 imm:$DST_SEL_W, 407 imm:$RESOURCE_ID, imm:$SAMPLER_ID, 408 imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z, 409 imm:$COORD_TYPE_W)>; 410} 411 412//===----------------------------------------------------------------------===// 413// Interpolation Instructions 414//===----------------------------------------------------------------------===// 415 416def INTERP_VEC_LOAD : AMDGPUShaderInst < 417 (outs R600_Reg128:$dst), 418 (ins i32imm:$src0), 419 "INTERP_LOAD $src0 : $dst", 420 [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>; 421 422def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { 423 let bank_swizzle = 5; 424} 425 426def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> { 427 let bank_swizzle = 5; 428} 429 430def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; 431 432//===----------------------------------------------------------------------===// 433// Export Instructions 434//===----------------------------------------------------------------------===// 435 436def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; 437 438def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, 439 [SDNPHasChain, SDNPSideEffect]>; 440 441class ExportWord0 { 442 field bits<32> Word0; 443 444 bits<13> arraybase; 445 bits<2> type; 446 bits<7> gpr; 447 bits<2> elem_size; 448 449 let Word0{12-0} = arraybase; 450 let Word0{14-13} = type; 451 let Word0{21-15} = gpr; 452 let Word0{22} = 0; // RW_REL 453 let Word0{29-23} = 0; // INDEX_GPR 454 let Word0{31-30} = elem_size; 455} 456 457class ExportSwzWord1 { 458 field bits<32> Word1; 459 460 bits<3> sw_x; 461 bits<3> sw_y; 462 bits<3> sw_z; 463 bits<3> sw_w; 464 bits<1> eop; 465 bits<8> inst; 466 467 let Word1{2-0} = sw_x; 468 let Word1{5-3} = sw_y; 469 let Word1{8-6} = sw_z; 470 let Word1{11-9} = sw_w; 471} 472 473class ExportBufWord1 { 474 field bits<32> Word1; 475 476 bits<12> arraySize; 477 bits<4> compMask; 478 bits<1> eop; 479 bits<8> inst; 480 481 let Word1{11-0} = arraySize; 482 let Word1{15-12} = compMask; 483} 484 485multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { 486 def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), 487 (ExportInst 488 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 489 0, 61, 0, 7, 7, 7, cf_inst, 0) 490 >; 491 492 def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), 493 (ExportInst 494 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 495 0, 61, 7, 0, 7, 7, cf_inst, 0) 496 >; 497 498 def : Pat<(int_R600_store_dummy (i32 imm:$type)), 499 (ExportInst 500 (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) 501 >; 502 503 def : Pat<(int_R600_store_dummy 1), 504 (ExportInst 505 (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) 506 >; 507 508 def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), 509 (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), 510 (ExportInst R600_Reg128:$src, imm:$type, imm:$base, 511 imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) 512 >; 513 514} 515 516multiclass SteamOutputExportPattern<Instruction ExportInst, 517 bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { 518// Stream0 519 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 520 (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), 521 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 522 4095, imm:$mask, buf0inst, 0)>; 523// Stream1 524 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 525 (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), 526 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 527 4095, imm:$mask, buf1inst, 0)>; 528// Stream2 529 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 530 (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), 531 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 532 4095, imm:$mask, buf2inst, 0)>; 533// Stream3 534 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 535 (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), 536 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 537 4095, imm:$mask, buf3inst, 0)>; 538} 539 540// Export Instructions should not be duplicated by TailDuplication pass 541// (which assumes that duplicable instruction are affected by exec mask) 542let usesCustomInserter = 1, isNotDuplicable = 1 in { 543 544class ExportSwzInst : InstR600ISA<( 545 outs), 546 (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 547 RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst, 548 i32imm:$eop), 549 !strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"), 550 []>, ExportWord0, ExportSwzWord1 { 551 let elem_size = 3; 552 let Inst{31-0} = Word0; 553 let Inst{63-32} = Word1; 554 let IsExport = 1; 555} 556 557} // End usesCustomInserter = 1 558 559class ExportBufInst : InstR600ISA<( 560 outs), 561 (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 562 i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop), 563 !strconcat("EXPORT", " $gpr"), 564 []>, ExportWord0, ExportBufWord1 { 565 let elem_size = 0; 566 let Inst{31-0} = Word0; 567 let Inst{63-32} = Word1; 568 let IsExport = 1; 569} 570 571//===----------------------------------------------------------------------===// 572// Control Flow Instructions 573//===----------------------------------------------------------------------===// 574 575 576def KCACHE : InstFlag<"printKCache">; 577 578class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs), 579(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, 580KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, 581i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, 582i32imm:$COUNT, i32imm:$Enabled), 583!strconcat(OpName, " $COUNT, @$ADDR, " 584"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"), 585[] >, CF_ALU_WORD0, CF_ALU_WORD1 { 586 field bits<64> Inst; 587 588 let CF_INST = inst; 589 let ALT_CONST = 0; 590 let WHOLE_QUAD_MODE = 0; 591 let BARRIER = 1; 592 let UseNamedOperandTable = 1; 593 594 let Inst{31-0} = Word0; 595 let Inst{63-32} = Word1; 596} 597 598class CF_WORD0_R600 { 599 field bits<32> Word0; 600 601 bits<32> ADDR; 602 603 let Word0 = ADDR; 604} 605 606class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), 607ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { 608 field bits<64> Inst; 609 bits<4> CNT; 610 611 let CF_INST = inst; 612 let BARRIER = 1; 613 let CF_CONST = 0; 614 let VALID_PIXEL_MODE = 0; 615 let COND = 0; 616 let COUNT = CNT{2-0}; 617 let CALL_COUNT = 0; 618 let COUNT_3 = CNT{3}; 619 let END_OF_PROGRAM = 0; 620 let WHOLE_QUAD_MODE = 0; 621 622 let Inst{31-0} = Word0; 623 let Inst{63-32} = Word1; 624} 625 626class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), 627ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { 628 field bits<64> Inst; 629 630 let CF_INST = inst; 631 let BARRIER = 1; 632 let JUMPTABLE_SEL = 0; 633 let CF_CONST = 0; 634 let VALID_PIXEL_MODE = 0; 635 let COND = 0; 636 let END_OF_PROGRAM = 0; 637 638 let Inst{31-0} = Word0; 639 let Inst{63-32} = Word1; 640} 641 642def CF_ALU : ALU_CLAUSE<8, "ALU">; 643def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; 644def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">; 645 646def FETCH_CLAUSE : AMDGPUInst <(outs), 647(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { 648 field bits<8> Inst; 649 bits<8> num; 650 let Inst = num; 651} 652 653def ALU_CLAUSE : AMDGPUInst <(outs), 654(ins i32imm:$addr), "ALU clause starting at $addr:", [] > { 655 field bits<8> Inst; 656 bits<8> num; 657 let Inst = num; 658} 659 660def LITERALS : AMDGPUInst <(outs), 661(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > { 662 field bits<64> Inst; 663 bits<32> literal1; 664 bits<32> literal2; 665 666 let Inst{31-0} = literal1; 667 let Inst{63-32} = literal2; 668} 669 670def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { 671 field bits<64> Inst; 672} 673 674let Predicates = [isR600toCayman] in { 675 676//===----------------------------------------------------------------------===// 677// Common Instructions R600, R700, Evergreen, Cayman 678//===----------------------------------------------------------------------===// 679 680def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; 681// Non-IEEE MUL: 0 * anything = 0 682def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; 683def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; 684def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; 685def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; 686 687// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, 688// so some of the instruction names don't match the asm string. 689// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. 690def SETE : R600_2OP < 691 0x08, "SETE", 692 [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))] 693>; 694 695def SGT : R600_2OP < 696 0x09, "SETGT", 697 [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))] 698>; 699 700def SGE : R600_2OP < 701 0xA, "SETGE", 702 [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))] 703>; 704 705def SNE : R600_2OP < 706 0xB, "SETNE", 707 [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))] 708>; 709 710def SETE_DX10 : R600_2OP < 711 0xC, "SETE_DX10", 712 [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))] 713>; 714 715def SETGT_DX10 : R600_2OP < 716 0xD, "SETGT_DX10", 717 [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))] 718>; 719 720def SETGE_DX10 : R600_2OP < 721 0xE, "SETGE_DX10", 722 [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))] 723>; 724 725def SETNE_DX10 : R600_2OP < 726 0xF, "SETNE_DX10", 727 [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))] 728>; 729 730def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; 731def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; 732def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; 733def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; 734def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; 735 736def MOV : R600_1OP <0x19, "MOV", []>; 737 738let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { 739 740class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < 741 (outs R600_Reg32:$dst), 742 (ins immType:$imm), 743 "", 744 [] 745>; 746 747} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 748 749def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; 750def : Pat < 751 (imm:$val), 752 (MOV_IMM_I32 imm:$val) 753>; 754 755def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; 756def : Pat < 757 (fpimm:$val), 758 (MOV_IMM_F32 fpimm:$val) 759>; 760 761def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>; 762def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>; 763def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>; 764def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>; 765 766let hasSideEffects = 1 in { 767 768def KILLGT : R600_2OP <0x2D, "KILLGT", []>; 769 770} // end hasSideEffects 771 772def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>; 773def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>; 774def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; 775def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; 776def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; 777def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; 778def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; 779def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; 780def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; 781def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; 782 783def SETE_INT : R600_2OP < 784 0x3A, "SETE_INT", 785 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))] 786>; 787 788def SETGT_INT : R600_2OP < 789 0x3B, "SETGT_INT", 790 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))] 791>; 792 793def SETGE_INT : R600_2OP < 794 0x3C, "SETGE_INT", 795 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))] 796>; 797 798def SETNE_INT : R600_2OP < 799 0x3D, "SETNE_INT", 800 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))] 801>; 802 803def SETGT_UINT : R600_2OP < 804 0x3E, "SETGT_UINT", 805 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))] 806>; 807 808def SETGE_UINT : R600_2OP < 809 0x3F, "SETGE_UINT", 810 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))] 811>; 812 813def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; 814def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>; 815def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>; 816def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; 817 818def CNDE_INT : R600_3OP < 819 0x1C, "CNDE_INT", 820 [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))] 821>; 822 823def CNDGE_INT : R600_3OP < 824 0x1E, "CNDGE_INT", 825 [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))] 826>; 827 828def CNDGT_INT : R600_3OP < 829 0x1D, "CNDGT_INT", 830 [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))] 831>; 832 833//===----------------------------------------------------------------------===// 834// Texture instructions 835//===----------------------------------------------------------------------===// 836 837let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 838 839class R600_TEX <bits<11> inst, string opName> : 840 InstR600 <(outs R600_Reg128:$DST_GPR), 841 (ins R600_Reg128:$SRC_GPR, 842 RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw, 843 i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz, 844 RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W, 845 i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, 846 CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z, 847 CT:$COORD_TYPE_W), 848 !strconcat(opName, 849 " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, " 850 "$SRC_GPR.$srcx$srcy$srcz$srcw " 851 "RID:$RESOURCE_ID SID:$SAMPLER_ID " 852 "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"), 853 [], 854 NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { 855 let Inst{31-0} = Word0; 856 let Inst{63-32} = Word1; 857 858 let TEX_INST = inst{4-0}; 859 let SRC_REL = 0; 860 let DST_REL = 0; 861 let LOD_BIAS = 0; 862 863 let INST_MOD = 0; 864 let FETCH_WHOLE_QUAD = 0; 865 let ALT_CONST = 0; 866 let SAMPLER_INDEX_MODE = 0; 867 let RESOURCE_INDEX_MODE = 0; 868 869 let TEXInst = 1; 870} 871 872} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 873 874 875 876def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">; 877def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">; 878def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">; 879def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">; 880def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">; 881def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">; 882def TEX_LD : R600_TEX <0x03, "TEX_LD">; 883def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> { 884 let INST_MOD = 1; 885} 886def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">; 887def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">; 888def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">; 889def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">; 890def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">; 891def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">; 892def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">; 893 894defm : TexPattern<0, TEX_SAMPLE>; 895defm : TexPattern<1, TEX_SAMPLE_C>; 896defm : TexPattern<2, TEX_SAMPLE_L>; 897defm : TexPattern<3, TEX_SAMPLE_C_L>; 898defm : TexPattern<4, TEX_SAMPLE_LB>; 899defm : TexPattern<5, TEX_SAMPLE_C_LB>; 900defm : TexPattern<6, TEX_LD, v4i32>; 901defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>; 902defm : TexPattern<8, TEX_GET_GRADIENTS_H>; 903defm : TexPattern<9, TEX_GET_GRADIENTS_V>; 904defm : TexPattern<10, TEX_LDPTR, v4i32>; 905 906//===----------------------------------------------------------------------===// 907// Helper classes for common instructions 908//===----------------------------------------------------------------------===// 909 910class MUL_LIT_Common <bits<5> inst> : R600_3OP < 911 inst, "MUL_LIT", 912 [] 913>; 914 915class MULADD_Common <bits<5> inst> : R600_3OP < 916 inst, "MULADD", 917 [] 918>; 919 920class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < 921 inst, "MULADD_IEEE", 922 [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] 923>; 924 925class CNDE_Common <bits<5> inst> : R600_3OP < 926 inst, "CNDE", 927 [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] 928>; 929 930class CNDGT_Common <bits<5> inst> : R600_3OP < 931 inst, "CNDGT", 932 [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))] 933> { 934 let Itinerary = VecALU; 935} 936 937class CNDGE_Common <bits<5> inst> : R600_3OP < 938 inst, "CNDGE", 939 [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))] 940> { 941 let Itinerary = VecALU; 942} 943 944 945let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { 946class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins 947// Slot X 948 UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X, 949 OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X, 950 R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X, 951 R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X, 952 R600_Pred:$pred_sel_X, 953// Slot Y 954 UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y, 955 OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y, 956 R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y, 957 R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y, 958 R600_Pred:$pred_sel_Y, 959// Slot Z 960 UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z, 961 OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z, 962 R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z, 963 R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z, 964 R600_Pred:$pred_sel_Z, 965// Slot W 966 UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W, 967 OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W, 968 R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W, 969 R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W, 970 R600_Pred:$pred_sel_W, 971 LITERAL:$literal0, LITERAL:$literal1), 972 "", 973 pattern, 974 AnyALU> { 975 976 let UseNamedOperandTable = 1; 977 978} 979} 980 981def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4 982 R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X, 983 R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y, 984 R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z, 985 R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>; 986 987 988class DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>; 989 990 991let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 992multiclass CUBE_Common <bits<11> inst> { 993 994 def _pseudo : InstR600 < 995 (outs R600_Reg128:$dst), 996 (ins R600_Reg128:$src0), 997 "CUBE $dst $src0", 998 [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))], 999 VecALU 1000 > { 1001 let isPseudo = 1; 1002 let UseNamedOperandTable = 1; 1003 } 1004 1005 def _real : R600_2OP <inst, "CUBE", []>; 1006} 1007} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 1008 1009class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1010 inst, "EXP_IEEE", fexp2 1011> { 1012 let Itinerary = TransALU; 1013} 1014 1015class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < 1016 inst, "FLT_TO_INT", fp_to_sint 1017> { 1018 let Itinerary = TransALU; 1019} 1020 1021class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1022 inst, "INT_TO_FLT", sint_to_fp 1023> { 1024 let Itinerary = TransALU; 1025} 1026 1027class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1028 inst, "FLT_TO_UINT", fp_to_uint 1029> { 1030 let Itinerary = TransALU; 1031} 1032 1033class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1034 inst, "UINT_TO_FLT", uint_to_fp 1035> { 1036 let Itinerary = TransALU; 1037} 1038 1039class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < 1040 inst, "LOG_CLAMPED", [] 1041>; 1042 1043class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1044 inst, "LOG_IEEE", flog2 1045> { 1046 let Itinerary = TransALU; 1047} 1048 1049class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>; 1050class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>; 1051class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; 1052class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < 1053 inst, "MULHI_INT", mulhs 1054> { 1055 let Itinerary = TransALU; 1056} 1057class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < 1058 inst, "MULHI", mulhu 1059> { 1060 let Itinerary = TransALU; 1061} 1062class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < 1063 inst, "MULLO_INT", mul 1064> { 1065 let Itinerary = TransALU; 1066} 1067class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> { 1068 let Itinerary = TransALU; 1069} 1070 1071class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < 1072 inst, "RECIP_CLAMPED", [] 1073> { 1074 let Itinerary = TransALU; 1075} 1076 1077class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < 1078 inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] 1079> { 1080 let Itinerary = TransALU; 1081} 1082 1083class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1084 inst, "RECIP_UINT", AMDGPUurecip 1085> { 1086 let Itinerary = TransALU; 1087} 1088 1089class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < 1090 inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq 1091> { 1092 let Itinerary = TransALU; 1093} 1094 1095class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < 1096 inst, "RECIPSQRT_IEEE", [] 1097> { 1098 let Itinerary = TransALU; 1099} 1100 1101class SIN_Common <bits<11> inst> : R600_1OP < 1102 inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ 1103 let Trig = 1; 1104 let Itinerary = TransALU; 1105} 1106 1107class COS_Common <bits<11> inst> : R600_1OP < 1108 inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> { 1109 let Trig = 1; 1110 let Itinerary = TransALU; 1111} 1112 1113//===----------------------------------------------------------------------===// 1114// Helper patterns for complex intrinsics 1115//===----------------------------------------------------------------------===// 1116 1117multiclass DIV_Common <InstR600 recip_ieee> { 1118def : Pat< 1119 (int_AMDGPU_div f32:$src0, f32:$src1), 1120 (MUL_IEEE $src0, (recip_ieee $src1)) 1121>; 1122 1123def : Pat< 1124 (fdiv f32:$src0, f32:$src1), 1125 (MUL_IEEE $src0, (recip_ieee $src1)) 1126>; 1127} 1128 1129class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> 1130 : Pat < 1131 (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w), 1132 (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x)) 1133>; 1134 1135//===----------------------------------------------------------------------===// 1136// R600 / R700 Instructions 1137//===----------------------------------------------------------------------===// 1138 1139let Predicates = [isR600] in { 1140 1141 def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; 1142 def MULADD_r600 : MULADD_Common<0x10>; 1143 def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>; 1144 def CNDE_r600 : CNDE_Common<0x18>; 1145 def CNDGT_r600 : CNDGT_Common<0x19>; 1146 def CNDGE_r600 : CNDGE_Common<0x1A>; 1147 def DOT4_r600 : DOT4_Common<0x50>; 1148 defm CUBE_r600 : CUBE_Common<0x52>; 1149 def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; 1150 def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; 1151 def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; 1152 def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; 1153 def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; 1154 def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; 1155 def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; 1156 def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; 1157 def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; 1158 def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; 1159 def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; 1160 def SIN_r600 : SIN_Common<0x6E>; 1161 def COS_r600 : COS_Common<0x6F>; 1162 def ASHR_r600 : ASHR_Common<0x70>; 1163 def LSHR_r600 : LSHR_Common<0x71>; 1164 def LSHL_r600 : LSHL_Common<0x72>; 1165 def MULLO_INT_r600 : MULLO_INT_Common<0x73>; 1166 def MULHI_INT_r600 : MULHI_INT_Common<0x74>; 1167 def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; 1168 def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; 1169 def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; 1170 1171 defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; 1172 def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>; 1173 def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; 1174 1175 def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; 1176 1177 def R600_ExportSwz : ExportSwzInst { 1178 let Word1{20-17} = 0; // BURST_COUNT 1179 let Word1{21} = eop; 1180 let Word1{22} = 0; // VALID_PIXEL_MODE 1181 let Word1{30-23} = inst; 1182 let Word1{31} = 1; // BARRIER 1183 } 1184 defm : ExportPattern<R600_ExportSwz, 39>; 1185 1186 def R600_ExportBuf : ExportBufInst { 1187 let Word1{20-17} = 0; // BURST_COUNT 1188 let Word1{21} = eop; 1189 let Word1{22} = 0; // VALID_PIXEL_MODE 1190 let Word1{30-23} = inst; 1191 let Word1{31} = 1; // BARRIER 1192 } 1193 defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; 1194 1195 def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT), 1196 "TEX $CNT @$ADDR"> { 1197 let POP_COUNT = 0; 1198 } 1199 def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT), 1200 "VTX $CNT @$ADDR"> { 1201 let POP_COUNT = 0; 1202 } 1203 def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR), 1204 "LOOP_START_DX10 @$ADDR"> { 1205 let POP_COUNT = 0; 1206 let CNT = 0; 1207 } 1208 def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { 1209 let POP_COUNT = 0; 1210 let CNT = 0; 1211 } 1212 def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR), 1213 "LOOP_BREAK @$ADDR"> { 1214 let POP_COUNT = 0; 1215 let CNT = 0; 1216 } 1217 def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR), 1218 "CONTINUE @$ADDR"> { 1219 let POP_COUNT = 0; 1220 let CNT = 0; 1221 } 1222 def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1223 "JUMP @$ADDR POP:$POP_COUNT"> { 1224 let CNT = 0; 1225 } 1226 def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1227 "ELSE @$ADDR POP:$POP_COUNT"> { 1228 let CNT = 0; 1229 } 1230 def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> { 1231 let ADDR = 0; 1232 let CNT = 0; 1233 let POP_COUNT = 0; 1234 } 1235 def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1236 "POP @$ADDR POP:$POP_COUNT"> { 1237 let CNT = 0; 1238 } 1239 def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> { 1240 let CNT = 0; 1241 let POP_COUNT = 0; 1242 let ADDR = 0; 1243 let END_OF_PROGRAM = 1; 1244 } 1245 1246} 1247 1248//===----------------------------------------------------------------------===// 1249// R700 Only instructions 1250//===----------------------------------------------------------------------===// 1251 1252let Predicates = [isR700] in { 1253 def SIN_r700 : SIN_Common<0x6E>; 1254 def COS_r700 : COS_Common<0x6F>; 1255} 1256 1257//===----------------------------------------------------------------------===// 1258// Evergreen / Cayman store instructions 1259//===----------------------------------------------------------------------===// 1260 1261let Predicates = [isEGorCayman] in { 1262 1263class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins, 1264 string name, list<dag> pattern> 1265 : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins, 1266 "MEM_RAT_CACHELESS "#name, pattern>; 1267 1268class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name, 1269 list<dag> pattern> 1270 : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, 1271 "MEM_RAT "#name, pattern>; 1272 1273def RAT_MSKOR : CF_MEM_RAT <0x11, 0, 1274 (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), 1275 "MSKOR $rw_gpr.XW, $index_gpr", 1276 [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)] 1277> { 1278 let eop = 0; 1279} 1280 1281} // End Predicates = [isEGorCayman] 1282 1283 1284//===----------------------------------------------------------------------===// 1285// Evergreen Only instructions 1286//===----------------------------------------------------------------------===// 1287 1288let Predicates = [isEG] in { 1289 1290def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; 1291defm DIV_eg : DIV_Common<RECIP_IEEE_eg>; 1292 1293def MULLO_INT_eg : MULLO_INT_Common<0x8F>; 1294def MULHI_INT_eg : MULHI_INT_Common<0x90>; 1295def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; 1296def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; 1297def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; 1298def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; 1299def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; 1300def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; 1301def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; 1302def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; 1303def SIN_eg : SIN_Common<0x8D>; 1304def COS_eg : COS_Common<0x8E>; 1305 1306def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; 1307def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; 1308 1309//===----------------------------------------------------------------------===// 1310// Memory read/write instructions 1311//===----------------------------------------------------------------------===// 1312 1313let usesCustomInserter = 1 in { 1314 1315// 32-bit store 1316def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1, 1317 (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1318 "STORE_RAW $rw_gpr, $index_gpr, $eop", 1319 [(global_store i32:$rw_gpr, i32:$index_gpr)] 1320>; 1321 1322// 64-bit store 1323def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3, 1324 (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1325 "STORE_RAW $rw_gpr.XY, $index_gpr, $eop", 1326 [(global_store v2i32:$rw_gpr, i32:$index_gpr)] 1327>; 1328 1329//128-bit store 1330def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, 1331 (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1332 "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop", 1333 [(global_store v4i32:$rw_gpr, i32:$index_gpr)] 1334>; 1335 1336} // End usesCustomInserter = 1 1337 1338class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 1339 : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> { 1340 1341 // Static fields 1342 let VC_INST = 0; 1343 let FETCH_TYPE = 2; 1344 let FETCH_WHOLE_QUAD = 0; 1345 let BUFFER_ID = buffer_id; 1346 let SRC_REL = 0; 1347 // XXX: We can infer this field based on the SRC_GPR. This would allow us 1348 // to store vertex addresses in any channel, not just X. 1349 let SRC_SEL_X = 0; 1350 1351 let Inst{31-0} = Word0; 1352} 1353 1354class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> 1355 : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, 1356 (outs R600_TReg32_X:$dst_gpr), pattern> { 1357 1358 let MEGA_FETCH_COUNT = 1; 1359 let DST_SEL_X = 0; 1360 let DST_SEL_Y = 7; // Masked 1361 let DST_SEL_Z = 7; // Masked 1362 let DST_SEL_W = 7; // Masked 1363 let DATA_FORMAT = 1; // FMT_8 1364} 1365 1366class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> 1367 : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, 1368 (outs R600_TReg32_X:$dst_gpr), pattern> { 1369 let MEGA_FETCH_COUNT = 2; 1370 let DST_SEL_X = 0; 1371 let DST_SEL_Y = 7; // Masked 1372 let DST_SEL_Z = 7; // Masked 1373 let DST_SEL_W = 7; // Masked 1374 let DATA_FORMAT = 5; // FMT_16 1375 1376} 1377 1378class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> 1379 : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, 1380 (outs R600_TReg32_X:$dst_gpr), pattern> { 1381 1382 let MEGA_FETCH_COUNT = 4; 1383 let DST_SEL_X = 0; 1384 let DST_SEL_Y = 7; // Masked 1385 let DST_SEL_Z = 7; // Masked 1386 let DST_SEL_W = 7; // Masked 1387 let DATA_FORMAT = 0xD; // COLOR_32 1388 1389 // This is not really necessary, but there were some GPU hangs that appeared 1390 // to be caused by ALU instructions in the next instruction group that wrote 1391 // to the $src_gpr registers of the VTX_READ. 1392 // e.g. 1393 // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 1394 // %T2_X<def> = MOV %ZERO 1395 //Adding this constraint prevents this from happening. 1396 let Constraints = "$src_gpr.ptr = $dst_gpr"; 1397} 1398 1399class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern> 1400 : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id, 1401 (outs R600_Reg64:$dst_gpr), pattern> { 1402 1403 let MEGA_FETCH_COUNT = 8; 1404 let DST_SEL_X = 0; 1405 let DST_SEL_Y = 1; 1406 let DST_SEL_Z = 7; 1407 let DST_SEL_W = 7; 1408 let DATA_FORMAT = 0x1D; // COLOR_32_32 1409} 1410 1411class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> 1412 : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, 1413 (outs R600_Reg128:$dst_gpr), pattern> { 1414 1415 let MEGA_FETCH_COUNT = 16; 1416 let DST_SEL_X = 0; 1417 let DST_SEL_Y = 1; 1418 let DST_SEL_Z = 2; 1419 let DST_SEL_W = 3; 1420 let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1421 1422 // XXX: Need to force VTX_READ_128 instructions to write to the same register 1423 // that holds its buffer address to avoid potential hangs. We can't use 1424 // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst 1425 // registers are different sizes. 1426} 1427 1428//===----------------------------------------------------------------------===// 1429// VTX Read from parameter memory space 1430//===----------------------------------------------------------------------===// 1431 1432def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, 1433 [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] 1434>; 1435 1436def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, 1437 [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] 1438>; 1439 1440def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, 1441 [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1442>; 1443 1444def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0, 1445 [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1446>; 1447 1448def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, 1449 [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1450>; 1451 1452//===----------------------------------------------------------------------===// 1453// VTX Read from global memory space 1454//===----------------------------------------------------------------------===// 1455 1456// 8-bit reads 1457def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, 1458 [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] 1459>; 1460 1461def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1, 1462 [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] 1463>; 1464 1465// 32-bit reads 1466def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, 1467 [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1468>; 1469 1470// 64-bit reads 1471def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1, 1472 [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1473>; 1474 1475// 128-bit reads 1476def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, 1477 [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1478>; 1479 1480} // End Predicates = [isEG] 1481 1482//===----------------------------------------------------------------------===// 1483// Evergreen / Cayman Instructions 1484//===----------------------------------------------------------------------===// 1485 1486let Predicates = [isEGorCayman] in { 1487 1488 // BFE_UINT - bit_extract, an optimization for mask and shift 1489 // Src0 = Input 1490 // Src1 = Offset 1491 // Src2 = Width 1492 // 1493 // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) 1494 // 1495 // Example Usage: 1496 // (Offset, Width) 1497 // 1498 // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 1499 // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 1500 // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 1501 // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 1502 def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", 1503 [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1, 1504 i32:$src2))], 1505 VecALU 1506 >; 1507 def : BFEPattern <BFE_UINT_eg>; 1508 1509 def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>; 1510 defm : BFIPatterns <BFI_INT_eg>; 1511 1512 def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24", 1513 [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU 1514 >; 1515 def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>; 1516 def : ROTRPattern <BIT_ALIGN_INT_eg>; 1517 1518 def MULADD_eg : MULADD_Common<0x14>; 1519 def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; 1520 def ASHR_eg : ASHR_Common<0x15>; 1521 def LSHR_eg : LSHR_Common<0x16>; 1522 def LSHL_eg : LSHL_Common<0x17>; 1523 def CNDE_eg : CNDE_Common<0x19>; 1524 def CNDGT_eg : CNDGT_Common<0x1A>; 1525 def CNDGE_eg : CNDGE_Common<0x1B>; 1526 def MUL_LIT_eg : MUL_LIT_Common<0x1F>; 1527 def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; 1528 def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24", 1529 [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU 1530 >; 1531 def DOT4_eg : DOT4_Common<0xBE>; 1532 defm CUBE_eg : CUBE_Common<0xC0>; 1533 1534let hasSideEffects = 1 in { 1535 def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; 1536} 1537 1538 def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; 1539 1540 def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { 1541 let Pattern = []; 1542 let Itinerary = AnyALU; 1543 } 1544 1545 def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; 1546 1547 def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { 1548 let Pattern = []; 1549 } 1550 1551 def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; 1552 1553def GROUP_BARRIER : InstR600 < 1554 (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>, 1555 R600ALU_Word0, 1556 R600ALU_Word1_OP2 <0x54> { 1557 1558 let dst = 0; 1559 let dst_rel = 0; 1560 let src0 = 0; 1561 let src0_rel = 0; 1562 let src0_neg = 0; 1563 let src0_abs = 0; 1564 let src1 = 0; 1565 let src1_rel = 0; 1566 let src1_neg = 0; 1567 let src1_abs = 0; 1568 let write = 0; 1569 let omod = 0; 1570 let clamp = 0; 1571 let last = 1; 1572 let bank_swizzle = 0; 1573 let pred_sel = 0; 1574 let update_exec_mask = 0; 1575 let update_pred = 0; 1576 1577 let Inst{31-0} = Word0; 1578 let Inst{63-32} = Word1; 1579 1580 let ALUInst = 1; 1581} 1582 1583//===----------------------------------------------------------------------===// 1584// LDS Instructions 1585//===----------------------------------------------------------------------===// 1586class R600_LDS <bits<6> op, dag outs, dag ins, string asm, 1587 list<dag> pattern = []> : 1588 1589 InstR600 <outs, ins, asm, pattern, XALU>, 1590 R600_ALU_LDS_Word0, 1591 R600LDS_Word1 { 1592 1593 bits<6> offset = 0; 1594 let lds_op = op; 1595 1596 let Word1{27} = offset{0}; 1597 let Word1{12} = offset{1}; 1598 let Word1{28} = offset{2}; 1599 let Word1{31} = offset{3}; 1600 let Word0{12} = offset{4}; 1601 let Word0{25} = offset{5}; 1602 1603 1604 let Inst{31-0} = Word0; 1605 let Inst{63-32} = Word1; 1606 1607 let ALUInst = 1; 1608 let HasNativeOperands = 1; 1609 let UseNamedOperandTable = 1; 1610} 1611 1612class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS < 1613 lds_op, 1614 (outs R600_Reg32:$dst), 1615 (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, 1616 LAST:$last, R600_Pred:$pred_sel, 1617 BANK_SWIZZLE:$bank_swizzle), 1618 " "#name#" $last OQAP, $src0$src0_rel $pred_sel", 1619 pattern 1620 > { 1621 1622 let src1 = 0; 1623 let src1_rel = 0; 1624 let src2 = 0; 1625 let src2_rel = 0; 1626 1627 let Defs = [OQAP]; 1628 let usesCustomInserter = 1; 1629 let LDS_1A = 1; 1630 let DisableEncoding = "$dst"; 1631} 1632 1633class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern, 1634 string dst =""> : 1635 R600_LDS < 1636 lds_op, outs, 1637 (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, 1638 R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, 1639 LAST:$last, R600_Pred:$pred_sel, 1640 BANK_SWIZZLE:$bank_swizzle), 1641 " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel", 1642 pattern 1643 > { 1644 1645 field string BaseOp; 1646 1647 let src2 = 0; 1648 let src2_rel = 0; 1649 let LDS_1A1D = 1; 1650} 1651 1652class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> : 1653 R600_LDS_1A1D <lds_op, (outs), name, pattern> { 1654 let BaseOp = name; 1655} 1656 1657class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> : 1658 R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> { 1659 1660 let BaseOp = name; 1661 let usesCustomInserter = 1; 1662 let DisableEncoding = "$dst"; 1663 let Defs = [OQAP]; 1664} 1665 1666class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> : 1667 R600_LDS < 1668 lds_op, 1669 (outs), 1670 (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, 1671 R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, 1672 R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel, 1673 LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), 1674 " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel", 1675 pattern> { 1676 let LDS_1A2D = 1; 1677} 1678 1679def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; 1680def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >; 1681def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", 1682 [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] 1683>; 1684def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE", 1685 [(truncstorei8_local i32:$src1, i32:$src0)] 1686>; 1687def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", 1688 [(truncstorei16_local i32:$src1, i32:$src0)] 1689>; 1690def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", 1691 [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] 1692>; 1693def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", 1694 [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] 1695>; 1696def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", 1697 [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] 1698>; 1699def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", 1700 [(set i32:$dst, (sextloadi8_local i32:$src0))] 1701>; 1702def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET", 1703 [(set i32:$dst, (az_extloadi8_local i32:$src0))] 1704>; 1705def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET", 1706 [(set i32:$dst, (sextloadi16_local i32:$src0))] 1707>; 1708def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", 1709 [(set i32:$dst, (az_extloadi16_local i32:$src0))] 1710>; 1711 1712 // TRUNC is used for the FLT_TO_INT instructions to work around a 1713 // perceived problem where the rounding modes are applied differently 1714 // depending on the instruction and the slot they are in. 1715 // See: 1716 // https://bugs.freedesktop.org/show_bug.cgi?id=50232 1717 // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c 1718 // 1719 // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, 1720 // which do not need to be truncated since the fp values are 0.0f or 1.0f. 1721 // We should look into handling these cases separately. 1722 def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; 1723 1724 def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; 1725 1726 // SHA-256 Patterns 1727 def : SHA256MaPattern <BFI_INT_eg, XOR_INT>; 1728 1729 def EG_ExportSwz : ExportSwzInst { 1730 let Word1{19-16} = 0; // BURST_COUNT 1731 let Word1{20} = 0; // VALID_PIXEL_MODE 1732 let Word1{21} = eop; 1733 let Word1{29-22} = inst; 1734 let Word1{30} = 0; // MARK 1735 let Word1{31} = 1; // BARRIER 1736 } 1737 defm : ExportPattern<EG_ExportSwz, 83>; 1738 1739 def EG_ExportBuf : ExportBufInst { 1740 let Word1{19-16} = 0; // BURST_COUNT 1741 let Word1{20} = 0; // VALID_PIXEL_MODE 1742 let Word1{21} = eop; 1743 let Word1{29-22} = inst; 1744 let Word1{30} = 0; // MARK 1745 let Word1{31} = 1; // BARRIER 1746 } 1747 defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; 1748 1749 def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT), 1750 "TEX $COUNT @$ADDR"> { 1751 let POP_COUNT = 0; 1752 } 1753 def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT), 1754 "VTX $COUNT @$ADDR"> { 1755 let POP_COUNT = 0; 1756 } 1757 def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR), 1758 "LOOP_START_DX10 @$ADDR"> { 1759 let POP_COUNT = 0; 1760 let COUNT = 0; 1761 } 1762 def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { 1763 let POP_COUNT = 0; 1764 let COUNT = 0; 1765 } 1766 def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR), 1767 "LOOP_BREAK @$ADDR"> { 1768 let POP_COUNT = 0; 1769 let COUNT = 0; 1770 } 1771 def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR), 1772 "CONTINUE @$ADDR"> { 1773 let POP_COUNT = 0; 1774 let COUNT = 0; 1775 } 1776 def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1777 "JUMP @$ADDR POP:$POP_COUNT"> { 1778 let COUNT = 0; 1779 } 1780 def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1781 "ELSE @$ADDR POP:$POP_COUNT"> { 1782 let COUNT = 0; 1783 } 1784 def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> { 1785 let ADDR = 0; 1786 let COUNT = 0; 1787 let POP_COUNT = 0; 1788 } 1789 def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1790 "POP @$ADDR POP:$POP_COUNT"> { 1791 let COUNT = 0; 1792 } 1793 def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> { 1794 let COUNT = 0; 1795 let POP_COUNT = 0; 1796 let ADDR = 0; 1797 let END_OF_PROGRAM = 1; 1798 } 1799 1800} // End Predicates = [isEGorCayman] 1801 1802//===----------------------------------------------------------------------===// 1803// Regist loads and stores - for indirect addressing 1804//===----------------------------------------------------------------------===// 1805 1806defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; 1807 1808//===----------------------------------------------------------------------===// 1809// Cayman Instructions 1810//===----------------------------------------------------------------------===// 1811 1812let Predicates = [isCayman] in { 1813 1814def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", 1815 [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))], VecALU 1816>; 1817def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24", 1818 [(set i32:$dst, (mul I24:$src0, I24:$src1))], VecALU 1819>; 1820 1821let isVector = 1 in { 1822 1823def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; 1824 1825def MULLO_INT_cm : MULLO_INT_Common<0x8F>; 1826def MULHI_INT_cm : MULHI_INT_Common<0x90>; 1827def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; 1828def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; 1829def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; 1830def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; 1831def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; 1832def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; 1833def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; 1834def SIN_cm : SIN_Common<0x8D>; 1835def COS_cm : COS_Common<0x8E>; 1836} // End isVector = 1 1837 1838def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; 1839 1840defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; 1841 1842// RECIP_UINT emulation for Cayman 1843// The multiplication scales from [0,1] to the unsigned integer range 1844def : Pat < 1845 (AMDGPUurecip i32:$src0), 1846 (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), 1847 (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) 1848>; 1849 1850 def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { 1851 let ADDR = 0; 1852 let POP_COUNT = 0; 1853 let COUNT = 0; 1854 } 1855 1856def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; 1857 1858class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> : 1859 CF_MEM_RAT_CACHELESS <0x14, 0, mask, 1860 (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), 1861 "STORE_DWORD $rw_gpr, $index_gpr", 1862 [(global_store vt:$rw_gpr, i32:$index_gpr)]> { 1863 let eop = 0; // This bit is not used on Cayman. 1864} 1865 1866def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>; 1867def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>; 1868def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>; 1869 1870class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 1871 : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> { 1872 1873 // Static fields 1874 let VC_INST = 0; 1875 let FETCH_TYPE = 2; 1876 let FETCH_WHOLE_QUAD = 0; 1877 let BUFFER_ID = buffer_id; 1878 let SRC_REL = 0; 1879 // XXX: We can infer this field based on the SRC_GPR. This would allow us 1880 // to store vertex addresses in any channel, not just X. 1881 let SRC_SEL_X = 0; 1882 let SRC_SEL_Y = 0; 1883 let STRUCTURED_READ = 0; 1884 let LDS_REQ = 0; 1885 let COALESCED_READ = 0; 1886 1887 let Inst{31-0} = Word0; 1888} 1889 1890class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern> 1891 : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, 1892 (outs R600_TReg32_X:$dst_gpr), pattern> { 1893 1894 let DST_SEL_X = 0; 1895 let DST_SEL_Y = 7; // Masked 1896 let DST_SEL_Z = 7; // Masked 1897 let DST_SEL_W = 7; // Masked 1898 let DATA_FORMAT = 1; // FMT_8 1899} 1900 1901class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern> 1902 : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, 1903 (outs R600_TReg32_X:$dst_gpr), pattern> { 1904 let DST_SEL_X = 0; 1905 let DST_SEL_Y = 7; // Masked 1906 let DST_SEL_Z = 7; // Masked 1907 let DST_SEL_W = 7; // Masked 1908 let DATA_FORMAT = 5; // FMT_16 1909 1910} 1911 1912class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern> 1913 : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, 1914 (outs R600_TReg32_X:$dst_gpr), pattern> { 1915 1916 let DST_SEL_X = 0; 1917 let DST_SEL_Y = 7; // Masked 1918 let DST_SEL_Z = 7; // Masked 1919 let DST_SEL_W = 7; // Masked 1920 let DATA_FORMAT = 0xD; // COLOR_32 1921 1922 // This is not really necessary, but there were some GPU hangs that appeared 1923 // to be caused by ALU instructions in the next instruction group that wrote 1924 // to the $src_gpr registers of the VTX_READ. 1925 // e.g. 1926 // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 1927 // %T2_X<def> = MOV %ZERO 1928 //Adding this constraint prevents this from happening. 1929 let Constraints = "$src_gpr.ptr = $dst_gpr"; 1930} 1931 1932class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern> 1933 : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id, 1934 (outs R600_Reg64:$dst_gpr), pattern> { 1935 1936 let DST_SEL_X = 0; 1937 let DST_SEL_Y = 1; 1938 let DST_SEL_Z = 7; 1939 let DST_SEL_W = 7; 1940 let DATA_FORMAT = 0x1D; // COLOR_32_32 1941} 1942 1943class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern> 1944 : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, 1945 (outs R600_Reg128:$dst_gpr), pattern> { 1946 1947 let DST_SEL_X = 0; 1948 let DST_SEL_Y = 1; 1949 let DST_SEL_Z = 2; 1950 let DST_SEL_W = 3; 1951 let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1952 1953 // XXX: Need to force VTX_READ_128 instructions to write to the same register 1954 // that holds its buffer address to avoid potential hangs. We can't use 1955 // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst 1956 // registers are different sizes. 1957} 1958 1959//===----------------------------------------------------------------------===// 1960// VTX Read from parameter memory space 1961//===----------------------------------------------------------------------===// 1962def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0, 1963 [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] 1964>; 1965 1966def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0, 1967 [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] 1968>; 1969 1970def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0, 1971 [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1972>; 1973 1974def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0, 1975 [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1976>; 1977 1978def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0, 1979 [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1980>; 1981 1982//===----------------------------------------------------------------------===// 1983// VTX Read from global memory space 1984//===----------------------------------------------------------------------===// 1985 1986// 8-bit reads 1987def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1, 1988 [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] 1989>; 1990 1991def VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1, 1992 [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] 1993>; 1994 1995// 32-bit reads 1996def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, 1997 [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1998>; 1999 2000// 64-bit reads 2001def VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1, 2002 [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 2003>; 2004 2005// 128-bit reads 2006def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, 2007 [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 2008>; 2009 2010} // End isCayman 2011 2012//===----------------------------------------------------------------------===// 2013// Branch Instructions 2014//===----------------------------------------------------------------------===// 2015 2016 2017def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), 2018 "IF_PREDICATE_SET $src", []>; 2019 2020//===----------------------------------------------------------------------===// 2021// Pseudo instructions 2022//===----------------------------------------------------------------------===// 2023 2024let isPseudo = 1 in { 2025 2026def PRED_X : InstR600 < 2027 (outs R600_Predicate_Bit:$dst), 2028 (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), 2029 "", [], NullALU> { 2030 let FlagOperandIdx = 3; 2031} 2032 2033let isTerminator = 1, isBranch = 1 in { 2034def JUMP_COND : InstR600 < 2035 (outs), 2036 (ins brtarget:$target, R600_Predicate_Bit:$p), 2037 "JUMP $target ($p)", 2038 [], AnyALU 2039 >; 2040 2041def JUMP : InstR600 < 2042 (outs), 2043 (ins brtarget:$target), 2044 "JUMP $target", 2045 [], AnyALU 2046 > 2047{ 2048 let isPredicable = 1; 2049 let isBarrier = 1; 2050} 2051 2052} // End isTerminator = 1, isBranch = 1 2053 2054let usesCustomInserter = 1 in { 2055 2056let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { 2057 2058def MASK_WRITE : AMDGPUShaderInst < 2059 (outs), 2060 (ins R600_Reg32:$src), 2061 "MASK_WRITE $src", 2062 [] 2063>; 2064 2065} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 2066 2067 2068def TXD: InstR600 < 2069 (outs R600_Reg128:$dst), 2070 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, 2071 i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 2072 "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 2073 [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, 2074 imm:$resourceId, imm:$samplerId, imm:$textureTarget))], 2075 NullALU > { 2076 let TEXInst = 1; 2077} 2078 2079def TXD_SHADOW: InstR600 < 2080 (outs R600_Reg128:$dst), 2081 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, 2082 i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 2083 "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 2084 [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, 2085 imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], 2086 NullALU 2087> { 2088 let TEXInst = 1; 2089} 2090} // End isPseudo = 1 2091} // End usesCustomInserter = 1 2092 2093def CLAMP_R600 : CLAMP <R600_Reg32>; 2094def FABS_R600 : FABS<R600_Reg32>; 2095def FNEG_R600 : FNEG<R600_Reg32>; 2096 2097//===---------------------------------------------------------------------===// 2098// Return instruction 2099//===---------------------------------------------------------------------===// 2100let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, 2101 usesCustomInserter = 1 in { 2102 def RETURN : ILFormat<(outs), (ins variable_ops), 2103 "RETURN", [(IL_retflag)]>; 2104} 2105 2106 2107//===----------------------------------------------------------------------===// 2108// Constant Buffer Addressing Support 2109//===----------------------------------------------------------------------===// 2110 2111let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { 2112def CONST_COPY : Instruction { 2113 let OutOperandList = (outs R600_Reg32:$dst); 2114 let InOperandList = (ins i32imm:$src); 2115 let Pattern = 2116 [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; 2117 let AsmString = "CONST_COPY"; 2118 let neverHasSideEffects = 1; 2119 let isAsCheapAsAMove = 1; 2120 let Itinerary = NullALU; 2121} 2122} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" 2123 2124def TEX_VTX_CONSTBUF : 2125 InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", 2126 [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, 2127 VTX_WORD1_GPR, VTX_WORD0_eg { 2128 2129 let VC_INST = 0; 2130 let FETCH_TYPE = 2; 2131 let FETCH_WHOLE_QUAD = 0; 2132 let SRC_REL = 0; 2133 let SRC_SEL_X = 0; 2134 let DST_REL = 0; 2135 let USE_CONST_FIELDS = 0; 2136 let NUM_FORMAT_ALL = 2; 2137 let FORMAT_COMP_ALL = 1; 2138 let SRF_MODE_ALL = 1; 2139 let MEGA_FETCH_COUNT = 16; 2140 let DST_SEL_X = 0; 2141 let DST_SEL_Y = 1; 2142 let DST_SEL_Z = 2; 2143 let DST_SEL_W = 3; 2144 let DATA_FORMAT = 35; 2145 2146 let Inst{31-0} = Word0; 2147 let Inst{63-32} = Word1; 2148 2149// LLVM can only encode 64-bit instructions, so these fields are manually 2150// encoded in R600CodeEmitter 2151// 2152// bits<16> OFFSET; 2153// bits<2> ENDIAN_SWAP = 0; 2154// bits<1> CONST_BUF_NO_STRIDE = 0; 2155// bits<1> MEGA_FETCH = 0; 2156// bits<1> ALT_CONST = 0; 2157// bits<2> BUFFER_INDEX_MODE = 0; 2158 2159 2160 2161// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 2162// is done in R600CodeEmitter 2163// 2164// Inst{79-64} = OFFSET; 2165// Inst{81-80} = ENDIAN_SWAP; 2166// Inst{82} = CONST_BUF_NO_STRIDE; 2167// Inst{83} = MEGA_FETCH; 2168// Inst{84} = ALT_CONST; 2169// Inst{86-85} = BUFFER_INDEX_MODE; 2170// Inst{95-86} = 0; Reserved 2171 2172// VTX_WORD3 (Padding) 2173// 2174// Inst{127-96} = 0; 2175 let VTXInst = 1; 2176} 2177 2178def TEX_VTX_TEXBUF: 2179 InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", 2180 [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, 2181VTX_WORD1_GPR, VTX_WORD0_eg { 2182 2183let VC_INST = 0; 2184let FETCH_TYPE = 2; 2185let FETCH_WHOLE_QUAD = 0; 2186let SRC_REL = 0; 2187let SRC_SEL_X = 0; 2188let DST_REL = 0; 2189let USE_CONST_FIELDS = 1; 2190let NUM_FORMAT_ALL = 0; 2191let FORMAT_COMP_ALL = 0; 2192let SRF_MODE_ALL = 1; 2193let MEGA_FETCH_COUNT = 16; 2194let DST_SEL_X = 0; 2195let DST_SEL_Y = 1; 2196let DST_SEL_Z = 2; 2197let DST_SEL_W = 3; 2198let DATA_FORMAT = 0; 2199 2200let Inst{31-0} = Word0; 2201let Inst{63-32} = Word1; 2202 2203// LLVM can only encode 64-bit instructions, so these fields are manually 2204// encoded in R600CodeEmitter 2205// 2206// bits<16> OFFSET; 2207// bits<2> ENDIAN_SWAP = 0; 2208// bits<1> CONST_BUF_NO_STRIDE = 0; 2209// bits<1> MEGA_FETCH = 0; 2210// bits<1> ALT_CONST = 0; 2211// bits<2> BUFFER_INDEX_MODE = 0; 2212 2213 2214 2215// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 2216// is done in R600CodeEmitter 2217// 2218// Inst{79-64} = OFFSET; 2219// Inst{81-80} = ENDIAN_SWAP; 2220// Inst{82} = CONST_BUF_NO_STRIDE; 2221// Inst{83} = MEGA_FETCH; 2222// Inst{84} = ALT_CONST; 2223// Inst{86-85} = BUFFER_INDEX_MODE; 2224// Inst{95-86} = 0; Reserved 2225 2226// VTX_WORD3 (Padding) 2227// 2228// Inst{127-96} = 0; 2229 let VTXInst = 1; 2230} 2231 2232 2233 2234//===--------------------------------------------------------------------===// 2235// Instructions support 2236//===--------------------------------------------------------------------===// 2237//===---------------------------------------------------------------------===// 2238// Custom Inserter for Branches and returns, this eventually will be a 2239// seperate pass 2240//===---------------------------------------------------------------------===// 2241let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { 2242 def BRANCH : ILFormat<(outs), (ins brtarget:$target), 2243 "; Pseudo unconditional branch instruction", 2244 [(br bb:$target)]>; 2245 defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>; 2246} 2247 2248//===---------------------------------------------------------------------===// 2249// Flow and Program control Instructions 2250//===---------------------------------------------------------------------===// 2251let isTerminator=1 in { 2252 def SWITCH : ILFormat< (outs), (ins GPRI32:$src), 2253 !strconcat("SWITCH", " $src"), []>; 2254 def CASE : ILFormat< (outs), (ins GPRI32:$src), 2255 !strconcat("CASE", " $src"), []>; 2256 def BREAK : ILFormat< (outs), (ins), 2257 "BREAK", []>; 2258 def CONTINUE : ILFormat< (outs), (ins), 2259 "CONTINUE", []>; 2260 def DEFAULT : ILFormat< (outs), (ins), 2261 "DEFAULT", []>; 2262 def ELSE : ILFormat< (outs), (ins), 2263 "ELSE", []>; 2264 def ENDSWITCH : ILFormat< (outs), (ins), 2265 "ENDSWITCH", []>; 2266 def ENDMAIN : ILFormat< (outs), (ins), 2267 "ENDMAIN", []>; 2268 def END : ILFormat< (outs), (ins), 2269 "END", []>; 2270 def ENDFUNC : ILFormat< (outs), (ins), 2271 "ENDFUNC", []>; 2272 def ENDIF : ILFormat< (outs), (ins), 2273 "ENDIF", []>; 2274 def WHILELOOP : ILFormat< (outs), (ins), 2275 "WHILE", []>; 2276 def ENDLOOP : ILFormat< (outs), (ins), 2277 "ENDLOOP", []>; 2278 def FUNC : ILFormat< (outs), (ins), 2279 "FUNC", []>; 2280 def RETDYN : ILFormat< (outs), (ins), 2281 "RET_DYN", []>; 2282 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2283 defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">; 2284 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2285 defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">; 2286 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2287 defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; 2288 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2289 defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; 2290 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2291 defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; 2292 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2293 defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; 2294 defm IFC : BranchInstr2<"IFC">; 2295 defm BREAKC : BranchInstr2<"BREAKC">; 2296 defm CONTINUEC : BranchInstr2<"CONTINUEC">; 2297} 2298 2299//===----------------------------------------------------------------------===// 2300// ISel Patterns 2301//===----------------------------------------------------------------------===// 2302 2303// CND*_INT Pattterns for f32 True / False values 2304 2305class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat < 2306 (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc), 2307 (cnd $src0, $src1, $src2) 2308>; 2309 2310def : CND_INT_f32 <CNDE_INT, SETEQ>; 2311def : CND_INT_f32 <CNDGT_INT, SETGT>; 2312def : CND_INT_f32 <CNDGE_INT, SETGE>; 2313 2314//CNDGE_INT extra pattern 2315def : Pat < 2316 (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT), 2317 (CNDGE_INT $src0, $src1, $src2) 2318>; 2319 2320// KIL Patterns 2321def KILP : Pat < 2322 (int_AMDGPU_kilp), 2323 (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) 2324>; 2325 2326def KIL : Pat < 2327 (int_AMDGPU_kill f32:$src0), 2328 (MASK_WRITE (KILLGT (f32 ZERO), $src0)) 2329>; 2330 2331def : Extract_Element <f32, v4f32, 0, sub0>; 2332def : Extract_Element <f32, v4f32, 1, sub1>; 2333def : Extract_Element <f32, v4f32, 2, sub2>; 2334def : Extract_Element <f32, v4f32, 3, sub3>; 2335 2336def : Insert_Element <f32, v4f32, 0, sub0>; 2337def : Insert_Element <f32, v4f32, 1, sub1>; 2338def : Insert_Element <f32, v4f32, 2, sub2>; 2339def : Insert_Element <f32, v4f32, 3, sub3>; 2340 2341def : Extract_Element <i32, v4i32, 0, sub0>; 2342def : Extract_Element <i32, v4i32, 1, sub1>; 2343def : Extract_Element <i32, v4i32, 2, sub2>; 2344def : Extract_Element <i32, v4i32, 3, sub3>; 2345 2346def : Insert_Element <i32, v4i32, 0, sub0>; 2347def : Insert_Element <i32, v4i32, 1, sub1>; 2348def : Insert_Element <i32, v4i32, 2, sub2>; 2349def : Insert_Element <i32, v4i32, 3, sub3>; 2350 2351def : Vector4_Build <v4f32, f32>; 2352def : Vector4_Build <v4i32, i32>; 2353 2354def : Extract_Element <f32, v2f32, 0, sub0>; 2355def : Extract_Element <f32, v2f32, 1, sub1>; 2356 2357def : Insert_Element <f32, v2f32, 0, sub0>; 2358def : Insert_Element <f32, v2f32, 1, sub1>; 2359 2360def : Extract_Element <i32, v2i32, 0, sub0>; 2361def : Extract_Element <i32, v2i32, 1, sub1>; 2362 2363def : Insert_Element <i32, v2i32, 0, sub0>; 2364def : Insert_Element <i32, v2i32, 1, sub1>; 2365 2366// bitconvert patterns 2367 2368def : BitConvert <i32, f32, R600_Reg32>; 2369def : BitConvert <f32, i32, R600_Reg32>; 2370def : BitConvert <v2f32, v2i32, R600_Reg64>; 2371def : BitConvert <v2i32, v2f32, R600_Reg64>; 2372def : BitConvert <v4f32, v4i32, R600_Reg128>; 2373def : BitConvert <v4i32, v4f32, R600_Reg128>; 2374 2375// DWORDADDR pattern 2376def : DwordAddrPat <i32, R600_Reg32>; 2377 2378} // End isR600toCayman Predicate 2379 2380def getLDSNoRetOp : InstrMapping { 2381 let FilterClass = "R600_LDS_1A1D"; 2382 let RowFields = ["BaseOp"]; 2383 let ColFields = ["DisableEncoding"]; 2384 let KeyCol = ["$dst"]; 2385 let ValueCols = [[""""]]; 2386} 2387