1/* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "tgsi/tgsi_dump.h" 24#include "tgsi/tgsi_scan.h" 25#include "tgsi/tgsi_util.h" 26 27#include <set> 28 29#include "codegen/nv50_ir.h" 30#include "codegen/nv50_ir_util.h" 31#include "codegen/nv50_ir_build_util.h" 32 33namespace tgsi { 34 35class Source; 36 37static nv50_ir::operation translateOpcode(uint opcode); 38static nv50_ir::DataFile translateFile(uint file); 39static nv50_ir::TexTarget translateTexture(uint texTarg); 40static nv50_ir::SVSemantic translateSysVal(uint sysval); 41static nv50_ir::CacheMode translateCacheMode(uint qualifier); 42static nv50_ir::ImgFormat translateImgFormat(uint format); 43 44class Instruction 45{ 46public: 47 Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { } 48 49 class SrcRegister 50 { 51 public: 52 SrcRegister(const struct tgsi_full_src_register *src) 53 : reg(src->Register), 54 fsr(src) 55 { } 56 57 SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { } 58 59 SrcRegister(const struct tgsi_ind_register& ind) 60 : reg(tgsi_util_get_src_from_ind(&ind)), 61 fsr(NULL) 62 { } 63 64 struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off) 65 { 66 struct tgsi_src_register reg; 67 memset(®, 0, sizeof(reg)); 68 reg.Index = off.Index; 69 reg.File = off.File; 70 reg.SwizzleX = off.SwizzleX; 71 reg.SwizzleY = off.SwizzleY; 72 reg.SwizzleZ = off.SwizzleZ; 73 return reg; 74 } 75 76 SrcRegister(const struct tgsi_texture_offset& off) : 77 reg(offsetToSrc(off)), 78 fsr(NULL) 79 { } 80 81 uint getFile() const { return reg.File; } 82 83 bool is2D() const { return reg.Dimension; } 84 85 bool isIndirect(int dim) const 86 { 87 return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect; 88 } 89 90 int getIndex(int dim) const 91 { 92 return (dim && fsr) ? fsr->Dimension.Index : reg.Index; 93 } 94 95 int getSwizzle(int chan) const 96 { 97 return tgsi_util_get_src_register_swizzle(®, chan); 98 } 99 100 int getArrayId() const 101 { 102 if (isIndirect(0)) 103 return fsr->Indirect.ArrayID; 104 return 0; 105 } 106 107 nv50_ir::Modifier getMod(int chan) const; 108 109 SrcRegister getIndirect(int dim) const 110 { 111 assert(fsr && isIndirect(dim)); 112 if (dim) 113 return SrcRegister(fsr->DimIndirect); 114 return SrcRegister(fsr->Indirect); 115 } 116 117 uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const 118 { 119 assert(reg.File == TGSI_FILE_IMMEDIATE); 120 assert(!reg.Absolute); 121 assert(!reg.Negate); 122 return info->immd.data[reg.Index * 4 + getSwizzle(c)]; 123 } 124 125 private: 126 const struct tgsi_src_register reg; 127 const struct tgsi_full_src_register *fsr; 128 }; 129 130 class DstRegister 131 { 132 public: 133 DstRegister(const struct tgsi_full_dst_register *dst) 134 : reg(dst->Register), 135 fdr(dst) 136 { } 137 138 DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { } 139 140 uint getFile() const { return reg.File; } 141 142 bool is2D() const { return reg.Dimension; } 143 144 bool isIndirect(int dim) const 145 { 146 return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect; 147 } 148 149 int getIndex(int dim) const 150 { 151 return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index; 152 } 153 154 unsigned int getMask() const { return reg.WriteMask; } 155 156 bool isMasked(int chan) const { return !(getMask() & (1 << chan)); } 157 158 SrcRegister getIndirect(int dim) const 159 { 160 assert(fdr && isIndirect(dim)); 161 if (dim) 162 return SrcRegister(fdr->DimIndirect); 163 return SrcRegister(fdr->Indirect); 164 } 165 166 int getArrayId() const 167 { 168 if (isIndirect(0)) 169 return fdr->Indirect.ArrayID; 170 return 0; 171 } 172 173 private: 174 const struct tgsi_dst_register reg; 175 const struct tgsi_full_dst_register *fdr; 176 }; 177 178 inline uint getOpcode() const { return insn->Instruction.Opcode; } 179 180 unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; } 181 unsigned int dstCount() const { return insn->Instruction.NumDstRegs; } 182 183 // mask of used components of source s 184 unsigned int srcMask(unsigned int s) const; 185 unsigned int texOffsetMask() const; 186 187 SrcRegister getSrc(unsigned int s) const 188 { 189 assert(s < srcCount()); 190 return SrcRegister(&insn->Src[s]); 191 } 192 193 DstRegister getDst(unsigned int d) const 194 { 195 assert(d < dstCount()); 196 return DstRegister(&insn->Dst[d]); 197 } 198 199 SrcRegister getTexOffset(unsigned int i) const 200 { 201 assert(i < TGSI_FULL_MAX_TEX_OFFSETS); 202 return SrcRegister(insn->TexOffsets[i]); 203 } 204 205 unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; } 206 207 bool checkDstSrcAliasing() const; 208 209 inline nv50_ir::operation getOP() const { 210 return translateOpcode(getOpcode()); } 211 212 nv50_ir::DataType inferSrcType() const; 213 nv50_ir::DataType inferDstType() const; 214 215 nv50_ir::CondCode getSetCond() const; 216 217 nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; 218 219 nv50_ir::CacheMode getCacheMode() const { 220 if (!insn->Instruction.Memory) 221 return nv50_ir::CACHE_CA; 222 return translateCacheMode(insn->Memory.Qualifier); 223 } 224 225 inline uint getLabel() { return insn->Label.Label; } 226 227 unsigned getSaturate() const { return insn->Instruction.Saturate; } 228 229 void print() const 230 { 231 tgsi_dump_instruction(insn, 1); 232 } 233 234private: 235 const struct tgsi_full_instruction *insn; 236}; 237 238unsigned int Instruction::texOffsetMask() const 239{ 240 const struct tgsi_instruction_texture *tex = &insn->Texture; 241 assert(insn->Instruction.Texture); 242 243 switch (tex->Texture) { 244 case TGSI_TEXTURE_BUFFER: 245 case TGSI_TEXTURE_1D: 246 case TGSI_TEXTURE_SHADOW1D: 247 case TGSI_TEXTURE_1D_ARRAY: 248 case TGSI_TEXTURE_SHADOW1D_ARRAY: 249 return 0x1; 250 case TGSI_TEXTURE_2D: 251 case TGSI_TEXTURE_SHADOW2D: 252 case TGSI_TEXTURE_2D_ARRAY: 253 case TGSI_TEXTURE_SHADOW2D_ARRAY: 254 case TGSI_TEXTURE_RECT: 255 case TGSI_TEXTURE_SHADOWRECT: 256 case TGSI_TEXTURE_2D_MSAA: 257 case TGSI_TEXTURE_2D_ARRAY_MSAA: 258 return 0x3; 259 case TGSI_TEXTURE_3D: 260 return 0x7; 261 default: 262 assert(!"Unexpected texture target"); 263 return 0xf; 264 } 265} 266 267unsigned int Instruction::srcMask(unsigned int s) const 268{ 269 unsigned int mask = insn->Dst[0].Register.WriteMask; 270 271 switch (insn->Instruction.Opcode) { 272 case TGSI_OPCODE_COS: 273 case TGSI_OPCODE_SIN: 274 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); 275 case TGSI_OPCODE_DP2: 276 return 0x3; 277 case TGSI_OPCODE_DP3: 278 return 0x7; 279 case TGSI_OPCODE_DP4: 280 case TGSI_OPCODE_DPH: 281 case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */ 282 return 0xf; 283 case TGSI_OPCODE_DST: 284 return mask & (s ? 0xa : 0x6); 285 case TGSI_OPCODE_EX2: 286 case TGSI_OPCODE_EXP: 287 case TGSI_OPCODE_LG2: 288 case TGSI_OPCODE_LOG: 289 case TGSI_OPCODE_POW: 290 case TGSI_OPCODE_RCP: 291 case TGSI_OPCODE_RSQ: 292 case TGSI_OPCODE_SCS: 293 return 0x1; 294 case TGSI_OPCODE_IF: 295 case TGSI_OPCODE_UIF: 296 return 0x1; 297 case TGSI_OPCODE_LIT: 298 return 0xb; 299 case TGSI_OPCODE_TEX2: 300 case TGSI_OPCODE_TXB2: 301 case TGSI_OPCODE_TXL2: 302 return (s == 0) ? 0xf : 0x3; 303 case TGSI_OPCODE_TEX: 304 case TGSI_OPCODE_TXB: 305 case TGSI_OPCODE_TXD: 306 case TGSI_OPCODE_TXL: 307 case TGSI_OPCODE_TXP: 308 case TGSI_OPCODE_LODQ: 309 { 310 const struct tgsi_instruction_texture *tex = &insn->Texture; 311 312 assert(insn->Instruction.Texture); 313 314 mask = 0x7; 315 if (insn->Instruction.Opcode != TGSI_OPCODE_TEX && 316 insn->Instruction.Opcode != TGSI_OPCODE_TXD) 317 mask |= 0x8; /* bias, lod or proj */ 318 319 switch (tex->Texture) { 320 case TGSI_TEXTURE_1D: 321 mask &= 0x9; 322 break; 323 case TGSI_TEXTURE_SHADOW1D: 324 mask &= 0xd; 325 break; 326 case TGSI_TEXTURE_1D_ARRAY: 327 case TGSI_TEXTURE_2D: 328 case TGSI_TEXTURE_RECT: 329 mask &= 0xb; 330 break; 331 case TGSI_TEXTURE_CUBE_ARRAY: 332 case TGSI_TEXTURE_SHADOW2D_ARRAY: 333 case TGSI_TEXTURE_SHADOWCUBE: 334 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 335 mask |= 0x8; 336 break; 337 default: 338 break; 339 } 340 } 341 return mask; 342 case TGSI_OPCODE_XPD: 343 { 344 unsigned int x = 0; 345 if (mask & 1) x |= 0x6; 346 if (mask & 2) x |= 0x5; 347 if (mask & 4) x |= 0x3; 348 return x; 349 } 350 case TGSI_OPCODE_D2I: 351 case TGSI_OPCODE_D2U: 352 case TGSI_OPCODE_D2F: 353 case TGSI_OPCODE_DSLT: 354 case TGSI_OPCODE_DSGE: 355 case TGSI_OPCODE_DSEQ: 356 case TGSI_OPCODE_DSNE: 357 switch (util_bitcount(mask)) { 358 case 1: return 0x3; 359 case 2: return 0xf; 360 default: 361 assert(!"unexpected mask"); 362 return 0xf; 363 } 364 case TGSI_OPCODE_I2D: 365 case TGSI_OPCODE_U2D: 366 case TGSI_OPCODE_F2D: { 367 unsigned int x = 0; 368 if ((mask & 0x3) == 0x3) 369 x |= 1; 370 if ((mask & 0xc) == 0xc) 371 x |= 2; 372 return x; 373 } 374 case TGSI_OPCODE_PK2H: 375 return 0x3; 376 case TGSI_OPCODE_UP2H: 377 return 0x1; 378 default: 379 break; 380 } 381 382 return mask; 383} 384 385nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const 386{ 387 nv50_ir::Modifier m(0); 388 389 if (reg.Absolute) 390 m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS); 391 if (reg.Negate) 392 m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG); 393 return m; 394} 395 396static nv50_ir::DataFile translateFile(uint file) 397{ 398 switch (file) { 399 case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST; 400 case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT; 401 case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT; 402 case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR; 403 case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS; 404 case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; 405 case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; 406 case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; 407 case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; 408 case TGSI_FILE_IMAGE: return nv50_ir::FILE_MEMORY_GLOBAL; 409 case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; 410 case TGSI_FILE_SAMPLER: 411 case TGSI_FILE_NULL: 412 default: 413 return nv50_ir::FILE_NULL; 414 } 415} 416 417static nv50_ir::SVSemantic translateSysVal(uint sysval) 418{ 419 switch (sysval) { 420 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE; 421 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE; 422 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID; 423 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID; 424 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID; 425 case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID; 426 case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID; 427 case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID; 428 case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID; 429 case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX; 430 case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS; 431 case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK; 432 case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID; 433 case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD; 434 case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER; 435 case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER; 436 case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT; 437 case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL; 438 case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX; 439 case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE; 440 case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID; 441 case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM; 442 default: 443 assert(0); 444 return nv50_ir::SV_CLOCK; 445 } 446} 447 448#define NV50_IR_TEX_TARG_CASE(a, b) \ 449 case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b; 450 451static nv50_ir::TexTarget translateTexture(uint tex) 452{ 453 switch (tex) { 454 NV50_IR_TEX_TARG_CASE(1D, 1D); 455 NV50_IR_TEX_TARG_CASE(2D, 2D); 456 NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS); 457 NV50_IR_TEX_TARG_CASE(3D, 3D); 458 NV50_IR_TEX_TARG_CASE(CUBE, CUBE); 459 NV50_IR_TEX_TARG_CASE(RECT, RECT); 460 NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY); 461 NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY); 462 NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY); 463 NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY); 464 NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW); 465 NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW); 466 NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW); 467 NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW); 468 NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW); 469 NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW); 470 NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW); 471 NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER); 472 473 case TGSI_TEXTURE_UNKNOWN: 474 default: 475 assert(!"invalid texture target"); 476 return nv50_ir::TEX_TARGET_2D; 477 } 478} 479 480static nv50_ir::CacheMode translateCacheMode(uint qualifier) 481{ 482 if (qualifier & TGSI_MEMORY_VOLATILE) 483 return nv50_ir::CACHE_CV; 484 if (qualifier & TGSI_MEMORY_COHERENT) 485 return nv50_ir::CACHE_CG; 486 return nv50_ir::CACHE_CA; 487} 488 489static nv50_ir::ImgFormat translateImgFormat(uint format) 490{ 491 492#define FMT_CASE(a, b) \ 493 case PIPE_FORMAT_ ## a: return nv50_ir::FMT_ ## b 494 495 switch (format) { 496 FMT_CASE(NONE, NONE); 497 498 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F); 499 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F); 500 FMT_CASE(R32G32_FLOAT, RG32F); 501 FMT_CASE(R16G16_FLOAT, RG16F); 502 FMT_CASE(R11G11B10_FLOAT, R11G11B10F); 503 FMT_CASE(R32_FLOAT, R32F); 504 FMT_CASE(R16_FLOAT, R16F); 505 506 FMT_CASE(R32G32B32A32_UINT, RGBA32UI); 507 FMT_CASE(R16G16B16A16_UINT, RGBA16UI); 508 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI); 509 FMT_CASE(R8G8B8A8_UINT, RGBA8UI); 510 FMT_CASE(R32G32_UINT, RG32UI); 511 FMT_CASE(R16G16_UINT, RG16UI); 512 FMT_CASE(R8G8_UINT, RG8UI); 513 FMT_CASE(R32_UINT, R32UI); 514 FMT_CASE(R16_UINT, R16UI); 515 FMT_CASE(R8_UINT, R8UI); 516 517 FMT_CASE(R32G32B32A32_SINT, RGBA32I); 518 FMT_CASE(R16G16B16A16_SINT, RGBA16I); 519 FMT_CASE(R8G8B8A8_SINT, RGBA8I); 520 FMT_CASE(R32G32_SINT, RG32I); 521 FMT_CASE(R16G16_SINT, RG16I); 522 FMT_CASE(R8G8_SINT, RG8I); 523 FMT_CASE(R32_SINT, R32I); 524 FMT_CASE(R16_SINT, R16I); 525 FMT_CASE(R8_SINT, R8I); 526 527 FMT_CASE(R16G16B16A16_UNORM, RGBA16); 528 FMT_CASE(R10G10B10A2_UNORM, RGB10A2); 529 FMT_CASE(R8G8B8A8_UNORM, RGBA8); 530 FMT_CASE(R16G16_UNORM, RG16); 531 FMT_CASE(R8G8_UNORM, RG8); 532 FMT_CASE(R16_UNORM, R16); 533 FMT_CASE(R8_UNORM, R8); 534 535 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM); 536 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM); 537 FMT_CASE(R16G16_SNORM, RG16_SNORM); 538 FMT_CASE(R8G8_SNORM, RG8_SNORM); 539 FMT_CASE(R16_SNORM, R16_SNORM); 540 FMT_CASE(R8_SNORM, R8_SNORM); 541 542 FMT_CASE(B8G8R8A8_UNORM, BGRA8); 543 } 544 545 assert(!"Unexpected format"); 546 return nv50_ir::FMT_NONE; 547} 548 549nv50_ir::DataType Instruction::inferSrcType() const 550{ 551 switch (getOpcode()) { 552 case TGSI_OPCODE_UIF: 553 case TGSI_OPCODE_AND: 554 case TGSI_OPCODE_OR: 555 case TGSI_OPCODE_XOR: 556 case TGSI_OPCODE_NOT: 557 case TGSI_OPCODE_SHL: 558 case TGSI_OPCODE_U2F: 559 case TGSI_OPCODE_U2D: 560 case TGSI_OPCODE_UADD: 561 case TGSI_OPCODE_UDIV: 562 case TGSI_OPCODE_UMOD: 563 case TGSI_OPCODE_UMAD: 564 case TGSI_OPCODE_UMUL: 565 case TGSI_OPCODE_UMUL_HI: 566 case TGSI_OPCODE_UMAX: 567 case TGSI_OPCODE_UMIN: 568 case TGSI_OPCODE_USEQ: 569 case TGSI_OPCODE_USGE: 570 case TGSI_OPCODE_USLT: 571 case TGSI_OPCODE_USNE: 572 case TGSI_OPCODE_USHR: 573 case TGSI_OPCODE_ATOMUADD: 574 case TGSI_OPCODE_ATOMXCHG: 575 case TGSI_OPCODE_ATOMCAS: 576 case TGSI_OPCODE_ATOMAND: 577 case TGSI_OPCODE_ATOMOR: 578 case TGSI_OPCODE_ATOMXOR: 579 case TGSI_OPCODE_ATOMUMIN: 580 case TGSI_OPCODE_ATOMUMAX: 581 case TGSI_OPCODE_UBFE: 582 case TGSI_OPCODE_UMSB: 583 case TGSI_OPCODE_UP2H: 584 case TGSI_OPCODE_VOTE_ALL: 585 case TGSI_OPCODE_VOTE_ANY: 586 case TGSI_OPCODE_VOTE_EQ: 587 return nv50_ir::TYPE_U32; 588 case TGSI_OPCODE_I2F: 589 case TGSI_OPCODE_I2D: 590 case TGSI_OPCODE_IDIV: 591 case TGSI_OPCODE_IMUL_HI: 592 case TGSI_OPCODE_IMAX: 593 case TGSI_OPCODE_IMIN: 594 case TGSI_OPCODE_IABS: 595 case TGSI_OPCODE_INEG: 596 case TGSI_OPCODE_ISGE: 597 case TGSI_OPCODE_ISHR: 598 case TGSI_OPCODE_ISLT: 599 case TGSI_OPCODE_ISSG: 600 case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version 601 case TGSI_OPCODE_MOD: 602 case TGSI_OPCODE_UARL: 603 case TGSI_OPCODE_ATOMIMIN: 604 case TGSI_OPCODE_ATOMIMAX: 605 case TGSI_OPCODE_IBFE: 606 case TGSI_OPCODE_IMSB: 607 return nv50_ir::TYPE_S32; 608 case TGSI_OPCODE_D2F: 609 case TGSI_OPCODE_D2I: 610 case TGSI_OPCODE_D2U: 611 case TGSI_OPCODE_DABS: 612 case TGSI_OPCODE_DNEG: 613 case TGSI_OPCODE_DADD: 614 case TGSI_OPCODE_DMUL: 615 case TGSI_OPCODE_DDIV: 616 case TGSI_OPCODE_DMAX: 617 case TGSI_OPCODE_DMIN: 618 case TGSI_OPCODE_DSLT: 619 case TGSI_OPCODE_DSGE: 620 case TGSI_OPCODE_DSEQ: 621 case TGSI_OPCODE_DSNE: 622 case TGSI_OPCODE_DRCP: 623 case TGSI_OPCODE_DSQRT: 624 case TGSI_OPCODE_DMAD: 625 case TGSI_OPCODE_DFMA: 626 case TGSI_OPCODE_DFRAC: 627 case TGSI_OPCODE_DRSQ: 628 case TGSI_OPCODE_DTRUNC: 629 case TGSI_OPCODE_DCEIL: 630 case TGSI_OPCODE_DFLR: 631 case TGSI_OPCODE_DROUND: 632 return nv50_ir::TYPE_F64; 633 default: 634 return nv50_ir::TYPE_F32; 635 } 636} 637 638nv50_ir::DataType Instruction::inferDstType() const 639{ 640 switch (getOpcode()) { 641 case TGSI_OPCODE_D2U: 642 case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32; 643 case TGSI_OPCODE_D2I: 644 case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32; 645 case TGSI_OPCODE_FSEQ: 646 case TGSI_OPCODE_FSGE: 647 case TGSI_OPCODE_FSLT: 648 case TGSI_OPCODE_FSNE: 649 case TGSI_OPCODE_DSEQ: 650 case TGSI_OPCODE_DSGE: 651 case TGSI_OPCODE_DSLT: 652 case TGSI_OPCODE_DSNE: 653 case TGSI_OPCODE_PK2H: 654 return nv50_ir::TYPE_U32; 655 case TGSI_OPCODE_I2F: 656 case TGSI_OPCODE_U2F: 657 case TGSI_OPCODE_D2F: 658 case TGSI_OPCODE_UP2H: 659 return nv50_ir::TYPE_F32; 660 case TGSI_OPCODE_I2D: 661 case TGSI_OPCODE_U2D: 662 case TGSI_OPCODE_F2D: 663 return nv50_ir::TYPE_F64; 664 default: 665 return inferSrcType(); 666 } 667} 668 669nv50_ir::CondCode Instruction::getSetCond() const 670{ 671 using namespace nv50_ir; 672 673 switch (getOpcode()) { 674 case TGSI_OPCODE_SLT: 675 case TGSI_OPCODE_ISLT: 676 case TGSI_OPCODE_USLT: 677 case TGSI_OPCODE_FSLT: 678 case TGSI_OPCODE_DSLT: 679 return CC_LT; 680 case TGSI_OPCODE_SLE: 681 return CC_LE; 682 case TGSI_OPCODE_SGE: 683 case TGSI_OPCODE_ISGE: 684 case TGSI_OPCODE_USGE: 685 case TGSI_OPCODE_FSGE: 686 case TGSI_OPCODE_DSGE: 687 return CC_GE; 688 case TGSI_OPCODE_SGT: 689 return CC_GT; 690 case TGSI_OPCODE_SEQ: 691 case TGSI_OPCODE_USEQ: 692 case TGSI_OPCODE_FSEQ: 693 case TGSI_OPCODE_DSEQ: 694 return CC_EQ; 695 case TGSI_OPCODE_SNE: 696 case TGSI_OPCODE_FSNE: 697 case TGSI_OPCODE_DSNE: 698 return CC_NEU; 699 case TGSI_OPCODE_USNE: 700 return CC_NE; 701 default: 702 return CC_ALWAYS; 703 } 704} 705 706#define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b 707 708static nv50_ir::operation translateOpcode(uint opcode) 709{ 710 switch (opcode) { 711 NV50_IR_OPCODE_CASE(ARL, SHL); 712 NV50_IR_OPCODE_CASE(MOV, MOV); 713 714 NV50_IR_OPCODE_CASE(RCP, RCP); 715 NV50_IR_OPCODE_CASE(RSQ, RSQ); 716 NV50_IR_OPCODE_CASE(SQRT, SQRT); 717 718 NV50_IR_OPCODE_CASE(MUL, MUL); 719 NV50_IR_OPCODE_CASE(ADD, ADD); 720 721 NV50_IR_OPCODE_CASE(MIN, MIN); 722 NV50_IR_OPCODE_CASE(MAX, MAX); 723 NV50_IR_OPCODE_CASE(SLT, SET); 724 NV50_IR_OPCODE_CASE(SGE, SET); 725 NV50_IR_OPCODE_CASE(MAD, MAD); 726 NV50_IR_OPCODE_CASE(FMA, FMA); 727 728 NV50_IR_OPCODE_CASE(FLR, FLOOR); 729 NV50_IR_OPCODE_CASE(ROUND, CVT); 730 NV50_IR_OPCODE_CASE(EX2, EX2); 731 NV50_IR_OPCODE_CASE(LG2, LG2); 732 NV50_IR_OPCODE_CASE(POW, POW); 733 734 NV50_IR_OPCODE_CASE(COS, COS); 735 NV50_IR_OPCODE_CASE(DDX, DFDX); 736 NV50_IR_OPCODE_CASE(DDX_FINE, DFDX); 737 NV50_IR_OPCODE_CASE(DDY, DFDY); 738 NV50_IR_OPCODE_CASE(DDY_FINE, DFDY); 739 NV50_IR_OPCODE_CASE(KILL, DISCARD); 740 741 NV50_IR_OPCODE_CASE(SEQ, SET); 742 NV50_IR_OPCODE_CASE(SGT, SET); 743 NV50_IR_OPCODE_CASE(SIN, SIN); 744 NV50_IR_OPCODE_CASE(SLE, SET); 745 NV50_IR_OPCODE_CASE(SNE, SET); 746 NV50_IR_OPCODE_CASE(TEX, TEX); 747 NV50_IR_OPCODE_CASE(TXD, TXD); 748 NV50_IR_OPCODE_CASE(TXP, TEX); 749 750 NV50_IR_OPCODE_CASE(CAL, CALL); 751 NV50_IR_OPCODE_CASE(RET, RET); 752 NV50_IR_OPCODE_CASE(CMP, SLCT); 753 754 NV50_IR_OPCODE_CASE(TXB, TXB); 755 756 NV50_IR_OPCODE_CASE(DIV, DIV); 757 758 NV50_IR_OPCODE_CASE(TXL, TXL); 759 760 NV50_IR_OPCODE_CASE(CEIL, CEIL); 761 NV50_IR_OPCODE_CASE(I2F, CVT); 762 NV50_IR_OPCODE_CASE(NOT, NOT); 763 NV50_IR_OPCODE_CASE(TRUNC, TRUNC); 764 NV50_IR_OPCODE_CASE(SHL, SHL); 765 766 NV50_IR_OPCODE_CASE(AND, AND); 767 NV50_IR_OPCODE_CASE(OR, OR); 768 NV50_IR_OPCODE_CASE(MOD, MOD); 769 NV50_IR_OPCODE_CASE(XOR, XOR); 770 NV50_IR_OPCODE_CASE(SAD, SAD); 771 NV50_IR_OPCODE_CASE(TXF, TXF); 772 NV50_IR_OPCODE_CASE(TXQ, TXQ); 773 NV50_IR_OPCODE_CASE(TXQS, TXQ); 774 NV50_IR_OPCODE_CASE(TG4, TXG); 775 NV50_IR_OPCODE_CASE(LODQ, TXLQ); 776 777 NV50_IR_OPCODE_CASE(EMIT, EMIT); 778 NV50_IR_OPCODE_CASE(ENDPRIM, RESTART); 779 780 NV50_IR_OPCODE_CASE(KILL_IF, DISCARD); 781 782 NV50_IR_OPCODE_CASE(F2I, CVT); 783 NV50_IR_OPCODE_CASE(FSEQ, SET); 784 NV50_IR_OPCODE_CASE(FSGE, SET); 785 NV50_IR_OPCODE_CASE(FSLT, SET); 786 NV50_IR_OPCODE_CASE(FSNE, SET); 787 NV50_IR_OPCODE_CASE(IDIV, DIV); 788 NV50_IR_OPCODE_CASE(IMAX, MAX); 789 NV50_IR_OPCODE_CASE(IMIN, MIN); 790 NV50_IR_OPCODE_CASE(IABS, ABS); 791 NV50_IR_OPCODE_CASE(INEG, NEG); 792 NV50_IR_OPCODE_CASE(ISGE, SET); 793 NV50_IR_OPCODE_CASE(ISHR, SHR); 794 NV50_IR_OPCODE_CASE(ISLT, SET); 795 NV50_IR_OPCODE_CASE(F2U, CVT); 796 NV50_IR_OPCODE_CASE(U2F, CVT); 797 NV50_IR_OPCODE_CASE(UADD, ADD); 798 NV50_IR_OPCODE_CASE(UDIV, DIV); 799 NV50_IR_OPCODE_CASE(UMAD, MAD); 800 NV50_IR_OPCODE_CASE(UMAX, MAX); 801 NV50_IR_OPCODE_CASE(UMIN, MIN); 802 NV50_IR_OPCODE_CASE(UMOD, MOD); 803 NV50_IR_OPCODE_CASE(UMUL, MUL); 804 NV50_IR_OPCODE_CASE(USEQ, SET); 805 NV50_IR_OPCODE_CASE(USGE, SET); 806 NV50_IR_OPCODE_CASE(USHR, SHR); 807 NV50_IR_OPCODE_CASE(USLT, SET); 808 NV50_IR_OPCODE_CASE(USNE, SET); 809 810 NV50_IR_OPCODE_CASE(DABS, ABS); 811 NV50_IR_OPCODE_CASE(DNEG, NEG); 812 NV50_IR_OPCODE_CASE(DADD, ADD); 813 NV50_IR_OPCODE_CASE(DMUL, MUL); 814 NV50_IR_OPCODE_CASE(DDIV, DIV); 815 NV50_IR_OPCODE_CASE(DMAX, MAX); 816 NV50_IR_OPCODE_CASE(DMIN, MIN); 817 NV50_IR_OPCODE_CASE(DSLT, SET); 818 NV50_IR_OPCODE_CASE(DSGE, SET); 819 NV50_IR_OPCODE_CASE(DSEQ, SET); 820 NV50_IR_OPCODE_CASE(DSNE, SET); 821 NV50_IR_OPCODE_CASE(DRCP, RCP); 822 NV50_IR_OPCODE_CASE(DSQRT, SQRT); 823 NV50_IR_OPCODE_CASE(DMAD, MAD); 824 NV50_IR_OPCODE_CASE(DFMA, FMA); 825 NV50_IR_OPCODE_CASE(D2I, CVT); 826 NV50_IR_OPCODE_CASE(D2U, CVT); 827 NV50_IR_OPCODE_CASE(I2D, CVT); 828 NV50_IR_OPCODE_CASE(U2D, CVT); 829 NV50_IR_OPCODE_CASE(DRSQ, RSQ); 830 NV50_IR_OPCODE_CASE(DTRUNC, TRUNC); 831 NV50_IR_OPCODE_CASE(DCEIL, CEIL); 832 NV50_IR_OPCODE_CASE(DFLR, FLOOR); 833 NV50_IR_OPCODE_CASE(DROUND, CVT); 834 835 NV50_IR_OPCODE_CASE(IMUL_HI, MUL); 836 NV50_IR_OPCODE_CASE(UMUL_HI, MUL); 837 838 NV50_IR_OPCODE_CASE(SAMPLE, TEX); 839 NV50_IR_OPCODE_CASE(SAMPLE_B, TXB); 840 NV50_IR_OPCODE_CASE(SAMPLE_C, TEX); 841 NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX); 842 NV50_IR_OPCODE_CASE(SAMPLE_D, TXD); 843 NV50_IR_OPCODE_CASE(SAMPLE_L, TXL); 844 NV50_IR_OPCODE_CASE(SAMPLE_I, TXF); 845 NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF); 846 NV50_IR_OPCODE_CASE(GATHER4, TXG); 847 NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ); 848 849 NV50_IR_OPCODE_CASE(ATOMUADD, ATOM); 850 NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM); 851 NV50_IR_OPCODE_CASE(ATOMCAS, ATOM); 852 NV50_IR_OPCODE_CASE(ATOMAND, ATOM); 853 NV50_IR_OPCODE_CASE(ATOMOR, ATOM); 854 NV50_IR_OPCODE_CASE(ATOMXOR, ATOM); 855 NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM); 856 NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM); 857 NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM); 858 NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM); 859 860 NV50_IR_OPCODE_CASE(TEX2, TEX); 861 NV50_IR_OPCODE_CASE(TXB2, TXB); 862 NV50_IR_OPCODE_CASE(TXL2, TXL); 863 864 NV50_IR_OPCODE_CASE(IBFE, EXTBF); 865 NV50_IR_OPCODE_CASE(UBFE, EXTBF); 866 NV50_IR_OPCODE_CASE(BFI, INSBF); 867 NV50_IR_OPCODE_CASE(BREV, EXTBF); 868 NV50_IR_OPCODE_CASE(POPC, POPCNT); 869 NV50_IR_OPCODE_CASE(LSB, BFIND); 870 NV50_IR_OPCODE_CASE(IMSB, BFIND); 871 NV50_IR_OPCODE_CASE(UMSB, BFIND); 872 873 NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE); 874 NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE); 875 NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE); 876 877 NV50_IR_OPCODE_CASE(END, EXIT); 878 879 default: 880 return nv50_ir::OP_NOP; 881 } 882} 883 884static uint16_t opcodeToSubOp(uint opcode) 885{ 886 switch (opcode) { 887 case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL); 888 case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL); 889 case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL); 890 case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD; 891 case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH; 892 case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS; 893 case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND; 894 case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR; 895 case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR; 896 case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN; 897 case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; 898 case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; 899 case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; 900 case TGSI_OPCODE_IMUL_HI: 901 case TGSI_OPCODE_UMUL_HI: 902 return NV50_IR_SUBOP_MUL_HIGH; 903 case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL; 904 case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY; 905 case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI; 906 default: 907 return 0; 908 } 909} 910 911bool Instruction::checkDstSrcAliasing() const 912{ 913 if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory 914 return false; 915 916 for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) { 917 if (insn->Src[s].Register.File == TGSI_FILE_NULL) 918 break; 919 if (insn->Src[s].Register.File == insn->Dst[0].Register.File && 920 insn->Src[s].Register.Index == insn->Dst[0].Register.Index) 921 return true; 922 } 923 return false; 924} 925 926class Source 927{ 928public: 929 Source(struct nv50_ir_prog_info *); 930 ~Source(); 931 932public: 933 bool scanSource(); 934 unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; } 935 936public: 937 struct tgsi_shader_info scan; 938 struct tgsi_full_instruction *insns; 939 const struct tgsi_token *tokens; 940 struct nv50_ir_prog_info *info; 941 942 nv50_ir::DynArray tempArrays; 943 nv50_ir::DynArray immdArrays; 944 945 typedef nv50_ir::BuildUtil::Location Location; 946 // these registers are per-subroutine, cannot be used for parameter passing 947 std::set<Location> locals; 948 949 std::set<int> indirectTempArrays; 950 std::map<int, int> indirectTempOffsets; 951 std::map<int, std::pair<int, int> > tempArrayInfo; 952 std::vector<int> tempArrayId; 953 954 int clipVertexOutput; 955 956 struct TextureView { 957 uint8_t target; // TGSI_TEXTURE_* 958 }; 959 std::vector<TextureView> textureViews; 960 961 /* 962 struct Resource { 963 uint8_t target; // TGSI_TEXTURE_* 964 bool raw; 965 uint8_t slot; // $surface index 966 }; 967 std::vector<Resource> resources; 968 */ 969 970 struct Image { 971 uint8_t target; // TGSI_TEXTURE_* 972 bool raw; 973 uint8_t slot; 974 uint16_t format; // PIPE_FORMAT_* 975 }; 976 std::vector<Image> images; 977 978 struct MemoryFile { 979 uint8_t mem_type; // TGSI_MEMORY_TYPE_* 980 }; 981 std::vector<MemoryFile> memoryFiles; 982 983private: 984 int inferSysValDirection(unsigned sn) const; 985 bool scanDeclaration(const struct tgsi_full_declaration *); 986 bool scanInstruction(const struct tgsi_full_instruction *); 987 void scanInstructionSrc(const Instruction& insn, 988 const Instruction::SrcRegister& src, 989 unsigned mask); 990 void scanProperty(const struct tgsi_full_property *); 991 void scanImmediate(const struct tgsi_full_immediate *); 992 993 inline bool isEdgeFlagPassthrough(const Instruction&) const; 994}; 995 996Source::Source(struct nv50_ir_prog_info *prog) : info(prog) 997{ 998 tokens = (const struct tgsi_token *)info->bin.source; 999 1000 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) 1001 tgsi_dump(tokens, 0); 1002} 1003 1004Source::~Source() 1005{ 1006 if (insns) 1007 FREE(insns); 1008 1009 if (info->immd.data) 1010 FREE(info->immd.data); 1011 if (info->immd.type) 1012 FREE(info->immd.type); 1013} 1014 1015bool Source::scanSource() 1016{ 1017 unsigned insnCount = 0; 1018 struct tgsi_parse_context parse; 1019 1020 tgsi_scan_shader(tokens, &scan); 1021 1022 insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions * 1023 sizeof(insns[0])); 1024 if (!insns) 1025 return false; 1026 1027 clipVertexOutput = -1; 1028 1029 textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); 1030 //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); 1031 images.resize(scan.file_max[TGSI_FILE_IMAGE] + 1); 1032 tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1); 1033 memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1); 1034 1035 info->immd.bufSize = 0; 1036 1037 info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; 1038 info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; 1039 info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; 1040 1041 if (info->type == PIPE_SHADER_FRAGMENT) { 1042 info->prop.fp.writesDepth = scan.writes_z; 1043 info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase; 1044 } else 1045 if (info->type == PIPE_SHADER_GEOMETRY) { 1046 info->prop.gp.instanceCount = 1; // default value 1047 } 1048 1049 info->io.viewportId = -1; 1050 info->prop.cp.numThreads = 1; 1051 1052 info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); 1053 info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); 1054 1055 tgsi_parse_init(&parse, tokens); 1056 while (!tgsi_parse_end_of_tokens(&parse)) { 1057 tgsi_parse_token(&parse); 1058 1059 switch (parse.FullToken.Token.Type) { 1060 case TGSI_TOKEN_TYPE_IMMEDIATE: 1061 scanImmediate(&parse.FullToken.FullImmediate); 1062 break; 1063 case TGSI_TOKEN_TYPE_DECLARATION: 1064 scanDeclaration(&parse.FullToken.FullDeclaration); 1065 break; 1066 case TGSI_TOKEN_TYPE_INSTRUCTION: 1067 insns[insnCount++] = parse.FullToken.FullInstruction; 1068 scanInstruction(&parse.FullToken.FullInstruction); 1069 break; 1070 case TGSI_TOKEN_TYPE_PROPERTY: 1071 scanProperty(&parse.FullToken.FullProperty); 1072 break; 1073 default: 1074 INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type); 1075 break; 1076 } 1077 } 1078 tgsi_parse_free(&parse); 1079 1080 if (indirectTempArrays.size()) { 1081 int tempBase = 0; 1082 for (std::set<int>::const_iterator it = indirectTempArrays.begin(); 1083 it != indirectTempArrays.end(); ++it) { 1084 std::pair<int, int>& info = tempArrayInfo[*it]; 1085 indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first)); 1086 tempBase += info.second; 1087 } 1088 info->bin.tlsSpace += tempBase * 16; 1089 } 1090 1091 if (info->io.genUserClip > 0) { 1092 info->io.clipDistances = info->io.genUserClip; 1093 1094 const unsigned int nOut = (info->io.genUserClip + 3) / 4; 1095 1096 for (unsigned int n = 0; n < nOut; ++n) { 1097 unsigned int i = info->numOutputs++; 1098 info->out[i].id = i; 1099 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; 1100 info->out[i].si = n; 1101 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4); 1102 } 1103 } 1104 1105 return info->assignSlots(info) == 0; 1106} 1107 1108void Source::scanProperty(const struct tgsi_full_property *prop) 1109{ 1110 switch (prop->Property.PropertyName) { 1111 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 1112 info->prop.gp.outputPrim = prop->u[0].Data; 1113 break; 1114 case TGSI_PROPERTY_GS_INPUT_PRIM: 1115 info->prop.gp.inputPrim = prop->u[0].Data; 1116 break; 1117 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 1118 info->prop.gp.maxVertices = prop->u[0].Data; 1119 break; 1120 case TGSI_PROPERTY_GS_INVOCATIONS: 1121 info->prop.gp.instanceCount = prop->u[0].Data; 1122 break; 1123 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 1124 info->prop.fp.separateFragData = true; 1125 break; 1126 case TGSI_PROPERTY_FS_COORD_ORIGIN: 1127 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: 1128 case TGSI_PROPERTY_FS_DEPTH_LAYOUT: 1129 // we don't care 1130 break; 1131 case TGSI_PROPERTY_VS_PROHIBIT_UCPS: 1132 info->io.genUserClip = -1; 1133 break; 1134 case TGSI_PROPERTY_TCS_VERTICES_OUT: 1135 info->prop.tp.outputPatchSize = prop->u[0].Data; 1136 break; 1137 case TGSI_PROPERTY_TES_PRIM_MODE: 1138 info->prop.tp.domain = prop->u[0].Data; 1139 break; 1140 case TGSI_PROPERTY_TES_SPACING: 1141 info->prop.tp.partitioning = prop->u[0].Data; 1142 break; 1143 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW: 1144 info->prop.tp.winding = prop->u[0].Data; 1145 break; 1146 case TGSI_PROPERTY_TES_POINT_MODE: 1147 if (prop->u[0].Data) 1148 info->prop.tp.outputPrim = PIPE_PRIM_POINTS; 1149 else 1150 info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */ 1151 break; 1152 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH: 1153 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT: 1154 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH: 1155 info->prop.cp.numThreads *= prop->u[0].Data; 1156 break; 1157 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: 1158 info->io.clipDistances = prop->u[0].Data; 1159 break; 1160 case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: 1161 info->io.cullDistances = prop->u[0].Data; 1162 break; 1163 case TGSI_PROPERTY_NEXT_SHADER: 1164 /* Do not need to know the next shader stage. */ 1165 break; 1166 case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL: 1167 info->prop.fp.earlyFragTests = prop->u[0].Data; 1168 break; 1169 default: 1170 INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); 1171 break; 1172 } 1173} 1174 1175void Source::scanImmediate(const struct tgsi_full_immediate *imm) 1176{ 1177 const unsigned n = info->immd.count++; 1178 1179 assert(n < scan.immediate_count); 1180 1181 for (int c = 0; c < 4; ++c) 1182 info->immd.data[n * 4 + c] = imm->u[c].Uint; 1183 1184 info->immd.type[n] = imm->Immediate.DataType; 1185} 1186 1187int Source::inferSysValDirection(unsigned sn) const 1188{ 1189 switch (sn) { 1190 case TGSI_SEMANTIC_INSTANCEID: 1191 case TGSI_SEMANTIC_VERTEXID: 1192 return 1; 1193 case TGSI_SEMANTIC_LAYER: 1194#if 0 1195 case TGSI_SEMANTIC_VIEWPORTINDEX: 1196 return 0; 1197#endif 1198 case TGSI_SEMANTIC_PRIMID: 1199 return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0; 1200 default: 1201 return 0; 1202 } 1203} 1204 1205bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) 1206{ 1207 unsigned i, c; 1208 unsigned sn = TGSI_SEMANTIC_GENERIC; 1209 unsigned si = 0; 1210 const unsigned first = decl->Range.First, last = decl->Range.Last; 1211 const int arrayId = decl->Array.ArrayID; 1212 1213 if (decl->Declaration.Semantic) { 1214 sn = decl->Semantic.Name; 1215 si = decl->Semantic.Index; 1216 } 1217 1218 if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) { 1219 for (i = first; i <= last; ++i) { 1220 for (c = 0; c < 4; ++c) { 1221 locals.insert( 1222 Location(decl->Declaration.File, decl->Dim.Index2D, i, c)); 1223 } 1224 } 1225 } 1226 1227 switch (decl->Declaration.File) { 1228 case TGSI_FILE_INPUT: 1229 if (info->type == PIPE_SHADER_VERTEX) { 1230 // all vertex attributes are equal 1231 for (i = first; i <= last; ++i) { 1232 info->in[i].sn = TGSI_SEMANTIC_GENERIC; 1233 info->in[i].si = i; 1234 } 1235 } else { 1236 for (i = first; i <= last; ++i, ++si) { 1237 info->in[i].id = i; 1238 info->in[i].sn = sn; 1239 info->in[i].si = si; 1240 if (info->type == PIPE_SHADER_FRAGMENT) { 1241 // translate interpolation mode 1242 switch (decl->Interp.Interpolate) { 1243 case TGSI_INTERPOLATE_CONSTANT: 1244 info->in[i].flat = 1; 1245 break; 1246 case TGSI_INTERPOLATE_COLOR: 1247 info->in[i].sc = 1; 1248 break; 1249 case TGSI_INTERPOLATE_LINEAR: 1250 info->in[i].linear = 1; 1251 break; 1252 default: 1253 break; 1254 } 1255 if (decl->Interp.Location) 1256 info->in[i].centroid = 1; 1257 } 1258 1259 if (sn == TGSI_SEMANTIC_PATCH) 1260 info->in[i].patch = 1; 1261 if (sn == TGSI_SEMANTIC_PATCH) 1262 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1); 1263 } 1264 } 1265 break; 1266 case TGSI_FILE_OUTPUT: 1267 for (i = first; i <= last; ++i, ++si) { 1268 switch (sn) { 1269 case TGSI_SEMANTIC_POSITION: 1270 if (info->type == PIPE_SHADER_FRAGMENT) 1271 info->io.fragDepth = i; 1272 else 1273 if (clipVertexOutput < 0) 1274 clipVertexOutput = i; 1275 break; 1276 case TGSI_SEMANTIC_COLOR: 1277 if (info->type == PIPE_SHADER_FRAGMENT) 1278 info->prop.fp.numColourResults++; 1279 break; 1280 case TGSI_SEMANTIC_EDGEFLAG: 1281 info->io.edgeFlagOut = i; 1282 break; 1283 case TGSI_SEMANTIC_CLIPVERTEX: 1284 clipVertexOutput = i; 1285 break; 1286 case TGSI_SEMANTIC_CLIPDIST: 1287 info->io.genUserClip = -1; 1288 break; 1289 case TGSI_SEMANTIC_SAMPLEMASK: 1290 info->io.sampleMask = i; 1291 break; 1292 case TGSI_SEMANTIC_VIEWPORT_INDEX: 1293 info->io.viewportId = i; 1294 break; 1295 case TGSI_SEMANTIC_PATCH: 1296 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1); 1297 /* fallthrough */ 1298 case TGSI_SEMANTIC_TESSOUTER: 1299 case TGSI_SEMANTIC_TESSINNER: 1300 info->out[i].patch = 1; 1301 break; 1302 default: 1303 break; 1304 } 1305 info->out[i].id = i; 1306 info->out[i].sn = sn; 1307 info->out[i].si = si; 1308 } 1309 break; 1310 case TGSI_FILE_SYSTEM_VALUE: 1311 switch (sn) { 1312 case TGSI_SEMANTIC_INSTANCEID: 1313 info->io.instanceId = first; 1314 break; 1315 case TGSI_SEMANTIC_VERTEXID: 1316 info->io.vertexId = first; 1317 break; 1318 case TGSI_SEMANTIC_BASEVERTEX: 1319 case TGSI_SEMANTIC_BASEINSTANCE: 1320 case TGSI_SEMANTIC_DRAWID: 1321 info->prop.vp.usesDrawParameters = true; 1322 break; 1323 case TGSI_SEMANTIC_SAMPLEID: 1324 case TGSI_SEMANTIC_SAMPLEPOS: 1325 info->prop.fp.persampleInvocation = true; 1326 break; 1327 case TGSI_SEMANTIC_SAMPLEMASK: 1328 info->prop.fp.usesSampleMaskIn = true; 1329 break; 1330 default: 1331 break; 1332 } 1333 for (i = first; i <= last; ++i, ++si) { 1334 info->sv[i].sn = sn; 1335 info->sv[i].si = si; 1336 info->sv[i].input = inferSysValDirection(sn); 1337 1338 switch (sn) { 1339 case TGSI_SEMANTIC_TESSOUTER: 1340 case TGSI_SEMANTIC_TESSINNER: 1341 info->sv[i].patch = 1; 1342 break; 1343 } 1344 } 1345 break; 1346/* 1347 case TGSI_FILE_RESOURCE: 1348 for (i = first; i <= last; ++i) { 1349 resources[i].target = decl->Resource.Resource; 1350 resources[i].raw = decl->Resource.Raw; 1351 resources[i].slot = i; 1352 } 1353 break; 1354*/ 1355 case TGSI_FILE_IMAGE: 1356 for (i = first; i <= last; ++i) { 1357 images[i].target = decl->Image.Resource; 1358 images[i].raw = decl->Image.Raw; 1359 images[i].format = decl->Image.Format; 1360 images[i].slot = i; 1361 } 1362 break; 1363 case TGSI_FILE_SAMPLER_VIEW: 1364 for (i = first; i <= last; ++i) 1365 textureViews[i].target = decl->SamplerView.Resource; 1366 break; 1367 case TGSI_FILE_MEMORY: 1368 for (i = first; i <= last; ++i) 1369 memoryFiles[i].mem_type = decl->Declaration.MemType; 1370 break; 1371 case TGSI_FILE_NULL: 1372 case TGSI_FILE_TEMPORARY: 1373 for (i = first; i <= last; ++i) 1374 tempArrayId[i] = arrayId; 1375 if (arrayId) 1376 tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair( 1377 first, last - first + 1))); 1378 break; 1379 case TGSI_FILE_ADDRESS: 1380 case TGSI_FILE_CONSTANT: 1381 case TGSI_FILE_IMMEDIATE: 1382 case TGSI_FILE_PREDICATE: 1383 case TGSI_FILE_SAMPLER: 1384 case TGSI_FILE_BUFFER: 1385 break; 1386 default: 1387 ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); 1388 return false; 1389 } 1390 return true; 1391} 1392 1393inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const 1394{ 1395 return insn.getOpcode() == TGSI_OPCODE_MOV && 1396 insn.getDst(0).getIndex(0) == info->io.edgeFlagOut && 1397 insn.getSrc(0).getFile() == TGSI_FILE_INPUT; 1398} 1399 1400void Source::scanInstructionSrc(const Instruction& insn, 1401 const Instruction::SrcRegister& src, 1402 unsigned mask) 1403{ 1404 if (src.getFile() == TGSI_FILE_TEMPORARY) { 1405 if (src.isIndirect(0)) 1406 indirectTempArrays.insert(src.getArrayId()); 1407 } else 1408 if (src.getFile() == TGSI_FILE_BUFFER || 1409 src.getFile() == TGSI_FILE_IMAGE || 1410 (src.getFile() == TGSI_FILE_MEMORY && 1411 memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { 1412 info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? 1413 0x1 : 0x2; 1414 } else 1415 if (src.getFile() == TGSI_FILE_OUTPUT) { 1416 if (src.isIndirect(0)) { 1417 // We don't know which one is accessed, just mark everything for 1418 // reading. This is an extremely unlikely occurrence. 1419 for (unsigned i = 0; i < info->numOutputs; ++i) 1420 info->out[i].oread = 1; 1421 } else { 1422 info->out[src.getIndex(0)].oread = 1; 1423 } 1424 } 1425 if (src.getFile() != TGSI_FILE_INPUT) 1426 return; 1427 1428 if (src.isIndirect(0)) { 1429 for (unsigned i = 0; i < info->numInputs; ++i) 1430 info->in[i].mask = 0xf; 1431 } else { 1432 const int i = src.getIndex(0); 1433 for (unsigned c = 0; c < 4; ++c) { 1434 if (!(mask & (1 << c))) 1435 continue; 1436 int k = src.getSwizzle(c); 1437 if (k <= TGSI_SWIZZLE_W) 1438 info->in[i].mask |= 1 << k; 1439 } 1440 switch (info->in[i].sn) { 1441 case TGSI_SEMANTIC_PSIZE: 1442 case TGSI_SEMANTIC_PRIMID: 1443 case TGSI_SEMANTIC_FOG: 1444 info->in[i].mask &= 0x1; 1445 break; 1446 case TGSI_SEMANTIC_PCOORD: 1447 info->in[i].mask &= 0x3; 1448 break; 1449 default: 1450 break; 1451 } 1452 } 1453} 1454 1455bool Source::scanInstruction(const struct tgsi_full_instruction *inst) 1456{ 1457 Instruction insn(inst); 1458 1459 if (insn.getOpcode() == TGSI_OPCODE_BARRIER) 1460 info->numBarriers = 1; 1461 1462 if (insn.getOpcode() == TGSI_OPCODE_FBFETCH) 1463 info->prop.fp.readsFramebuffer = true; 1464 1465 if (insn.dstCount()) { 1466 Instruction::DstRegister dst = insn.getDst(0); 1467 1468 if (dst.getFile() == TGSI_FILE_OUTPUT) { 1469 if (dst.isIndirect(0)) 1470 for (unsigned i = 0; i < info->numOutputs; ++i) 1471 info->out[i].mask = 0xf; 1472 else 1473 info->out[dst.getIndex(0)].mask |= dst.getMask(); 1474 1475 if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE || 1476 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID || 1477 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER || 1478 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX || 1479 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG) 1480 info->out[dst.getIndex(0)].mask &= 1; 1481 1482 if (isEdgeFlagPassthrough(insn)) 1483 info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); 1484 } else 1485 if (dst.getFile() == TGSI_FILE_TEMPORARY) { 1486 if (dst.isIndirect(0)) 1487 indirectTempArrays.insert(dst.getArrayId()); 1488 } else 1489 if (dst.getFile() == TGSI_FILE_BUFFER || 1490 dst.getFile() == TGSI_FILE_IMAGE || 1491 (dst.getFile() == TGSI_FILE_MEMORY && 1492 memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { 1493 info->io.globalAccess |= 0x2; 1494 } 1495 } 1496 1497 for (unsigned s = 0; s < insn.srcCount(); ++s) 1498 scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s)); 1499 1500 for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s) 1501 scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask()); 1502 1503 return true; 1504} 1505 1506nv50_ir::TexInstruction::Target 1507Instruction::getTexture(const tgsi::Source *code, int s) const 1508{ 1509 // XXX: indirect access 1510 unsigned int r; 1511 1512 switch (getSrc(s).getFile()) { 1513/* 1514 case TGSI_FILE_RESOURCE: 1515 r = getSrc(s).getIndex(0); 1516 return translateTexture(code->resources.at(r).target); 1517*/ 1518 case TGSI_FILE_SAMPLER_VIEW: 1519 r = getSrc(s).getIndex(0); 1520 return translateTexture(code->textureViews.at(r).target); 1521 default: 1522 return translateTexture(insn->Texture.Texture); 1523 } 1524} 1525 1526} // namespace tgsi 1527 1528namespace { 1529 1530using namespace nv50_ir; 1531 1532class Converter : public BuildUtil 1533{ 1534public: 1535 Converter(Program *, const tgsi::Source *); 1536 ~Converter(); 1537 1538 bool run(); 1539 1540private: 1541 struct Subroutine 1542 { 1543 Subroutine(Function *f) : f(f) { } 1544 Function *f; 1545 ValueMap values; 1546 }; 1547 1548 Value *shiftAddress(Value *); 1549 Value *getVertexBase(int s); 1550 Value *getOutputBase(int s); 1551 DataArray *getArrayForFile(unsigned file, int idx); 1552 Value *fetchSrc(int s, int c); 1553 Value *acquireDst(int d, int c); 1554 void storeDst(int d, int c, Value *); 1555 1556 Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr); 1557 void storeDst(const tgsi::Instruction::DstRegister dst, int c, 1558 Value *val, Value *ptr); 1559 1560 void adjustTempIndex(int arrayId, int &idx, int &idx2d) const; 1561 Value *applySrcMod(Value *, int s, int c); 1562 1563 Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr); 1564 Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c); 1565 Symbol *dstToSym(tgsi::Instruction::DstRegister, int c); 1566 1567 bool handleInstruction(const struct tgsi_full_instruction *); 1568 void exportOutputs(); 1569 inline Subroutine *getSubroutine(unsigned ip); 1570 inline Subroutine *getSubroutine(Function *); 1571 inline bool isEndOfSubroutine(uint ip); 1572 1573 void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask); 1574 1575 // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto) 1576 void setTexRS(TexInstruction *, unsigned int& s, int R, int S); 1577 void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); 1578 void handleTXF(Value *dst0[4], int R, int L_M); 1579 void handleTXQ(Value *dst0[4], enum TexQuery, int R); 1580 void handleFBFETCH(Value *dst0[4]); 1581 void handleLIT(Value *dst0[4]); 1582 void handleUserClipPlanes(); 1583 1584 // Symbol *getResourceBase(int r); 1585 void getImageCoords(std::vector<Value *>&, int r, int s); 1586 1587 void handleLOAD(Value *dst0[4]); 1588 void handleSTORE(); 1589 void handleATOM(Value *dst0[4], DataType, uint16_t subOp); 1590 1591 void handleINTERP(Value *dst0[4]); 1592 1593 uint8_t translateInterpMode(const struct nv50_ir_varying *var, 1594 operation& op); 1595 Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); 1596 1597 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); 1598 1599 Value *buildDot(int dim); 1600 1601 class BindArgumentsPass : public Pass { 1602 public: 1603 BindArgumentsPass(Converter &conv) : conv(conv) { } 1604 1605 private: 1606 Converter &conv; 1607 Subroutine *sub; 1608 1609 inline const Location *getValueLocation(Subroutine *, Value *); 1610 1611 template<typename T> inline void 1612 updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *), 1613 T (Function::*proto)); 1614 1615 template<typename T> inline void 1616 updatePrototype(BitSet *set, void (Function::*updateSet)(), 1617 T (Function::*proto)); 1618 1619 protected: 1620 bool visit(Function *); 1621 bool visit(BasicBlock *bb) { return false; } 1622 }; 1623 1624private: 1625 const tgsi::Source *code; 1626 const struct nv50_ir_prog_info *info; 1627 1628 struct { 1629 std::map<unsigned, Subroutine> map; 1630 Subroutine *cur; 1631 } sub; 1632 1633 uint ip; // instruction pointer 1634 1635 tgsi::Instruction tgsi; 1636 1637 DataType dstTy; 1638 DataType srcTy; 1639 1640 DataArray tData; // TGSI_FILE_TEMPORARY 1641 DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays 1642 DataArray aData; // TGSI_FILE_ADDRESS 1643 DataArray pData; // TGSI_FILE_PREDICATE 1644 DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers) 1645 1646 Value *zero; 1647 Value *fragCoord[4]; 1648 Value *clipVtx[4]; 1649 1650 Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP) 1651 uint8_t vtxBaseValid; 1652 1653 Value *outBase; // base address of vertex out patch (for TCP) 1654 1655 Stack condBBs; // fork BB, then else clause BB 1656 Stack joinBBs; // fork BB, for inserting join ops on ENDIF 1657 Stack loopBBs; // loop headers 1658 Stack breakBBs; // end of / after loop 1659 1660 Value *viewport; 1661}; 1662 1663Symbol * 1664Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c) 1665{ 1666 const int swz = src.getSwizzle(c); 1667 1668 /* TODO: Use Array ID when it's available for the index */ 1669 return makeSym(src.getFile(), 1670 src.is2D() ? src.getIndex(1) : 0, 1671 src.getIndex(0), swz, 1672 src.getIndex(0) * 16 + swz * 4); 1673} 1674 1675Symbol * 1676Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c) 1677{ 1678 /* TODO: Use Array ID when it's available for the index */ 1679 return makeSym(dst.getFile(), 1680 dst.is2D() ? dst.getIndex(1) : 0, 1681 dst.getIndex(0), c, 1682 dst.getIndex(0) * 16 + c * 4); 1683} 1684 1685Symbol * 1686Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) 1687{ 1688 Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile)); 1689 1690 sym->reg.fileIndex = fileIdx; 1691 1692 if (tgsiFile == TGSI_FILE_MEMORY) { 1693 switch (code->memoryFiles[fileIdx].mem_type) { 1694 case TGSI_MEMORY_TYPE_GLOBAL: 1695 /* No-op this is the default for TGSI_FILE_MEMORY */ 1696 sym->setFile(FILE_MEMORY_GLOBAL); 1697 break; 1698 case TGSI_MEMORY_TYPE_SHARED: 1699 sym->setFile(FILE_MEMORY_SHARED); 1700 break; 1701 case TGSI_MEMORY_TYPE_INPUT: 1702 assert(prog->getType() == Program::TYPE_COMPUTE); 1703 assert(idx == -1); 1704 sym->setFile(FILE_SHADER_INPUT); 1705 address += info->prop.cp.inputOffset; 1706 break; 1707 default: 1708 assert(0); /* TODO: Add support for global and private memory */ 1709 } 1710 } 1711 1712 if (idx >= 0) { 1713 if (sym->reg.file == FILE_SHADER_INPUT) 1714 sym->setOffset(info->in[idx].slot[c] * 4); 1715 else 1716 if (sym->reg.file == FILE_SHADER_OUTPUT) 1717 sym->setOffset(info->out[idx].slot[c] * 4); 1718 else 1719 if (sym->reg.file == FILE_SYSTEM_VALUE) 1720 sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c); 1721 else 1722 sym->setOffset(address); 1723 } else { 1724 sym->setOffset(address); 1725 } 1726 return sym; 1727} 1728 1729uint8_t 1730Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op) 1731{ 1732 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; 1733 1734 if (var->flat) 1735 mode = NV50_IR_INTERP_FLAT; 1736 else 1737 if (var->linear) 1738 mode = NV50_IR_INTERP_LINEAR; 1739 else 1740 if (var->sc) 1741 mode = NV50_IR_INTERP_SC; 1742 1743 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) 1744 ? OP_PINTERP : OP_LINTERP; 1745 1746 if (var->centroid) 1747 mode |= NV50_IR_INTERP_CENTROID; 1748 1749 return mode; 1750} 1751 1752Value * 1753Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr) 1754{ 1755 operation op; 1756 1757 // XXX: no way to know interpolation mode if we don't know what's accessed 1758 const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 : 1759 src.getIndex(0)], op); 1760 1761 Instruction *insn = new_Instruction(func, op, TYPE_F32); 1762 1763 insn->setDef(0, getScratch()); 1764 insn->setSrc(0, srcToSym(src, c)); 1765 if (op == OP_PINTERP) 1766 insn->setSrc(1, fragCoord[3]); 1767 if (ptr) 1768 insn->setIndirect(0, 0, ptr); 1769 1770 insn->setInterpolate(mode); 1771 1772 bb->insertTail(insn); 1773 return insn->getDef(0); 1774} 1775 1776Value * 1777Converter::applySrcMod(Value *val, int s, int c) 1778{ 1779 Modifier m = tgsi.getSrc(s).getMod(c); 1780 DataType ty = tgsi.inferSrcType(); 1781 1782 if (m & Modifier(NV50_IR_MOD_ABS)) 1783 val = mkOp1v(OP_ABS, ty, getScratch(), val); 1784 1785 if (m & Modifier(NV50_IR_MOD_NEG)) 1786 val = mkOp1v(OP_NEG, ty, getScratch(), val); 1787 1788 return val; 1789} 1790 1791Value * 1792Converter::getVertexBase(int s) 1793{ 1794 assert(s < 5); 1795 if (!(vtxBaseValid & (1 << s))) { 1796 const int index = tgsi.getSrc(s).getIndex(1); 1797 Value *rel = NULL; 1798 if (tgsi.getSrc(s).isIndirect(1)) 1799 rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL); 1800 vtxBaseValid |= 1 << s; 1801 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), 1802 mkImm(index), rel); 1803 } 1804 return vtxBase[s]; 1805} 1806 1807Value * 1808Converter::getOutputBase(int s) 1809{ 1810 assert(s < 5); 1811 if (!(vtxBaseValid & (1 << s))) { 1812 Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1)); 1813 if (tgsi.getSrc(s).isIndirect(1)) 1814 offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(), 1815 fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL), 1816 offset); 1817 vtxBaseValid |= 1 << s; 1818 vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset); 1819 } 1820 return vtxBase[s]; 1821} 1822 1823Value * 1824Converter::fetchSrc(int s, int c) 1825{ 1826 Value *res; 1827 Value *ptr = NULL, *dimRel = NULL; 1828 1829 tgsi::Instruction::SrcRegister src = tgsi.getSrc(s); 1830 1831 if (src.isIndirect(0)) 1832 ptr = fetchSrc(src.getIndirect(0), 0, NULL); 1833 1834 if (src.is2D()) { 1835 switch (src.getFile()) { 1836 case TGSI_FILE_OUTPUT: 1837 dimRel = getOutputBase(s); 1838 break; 1839 case TGSI_FILE_INPUT: 1840 dimRel = getVertexBase(s); 1841 break; 1842 case TGSI_FILE_CONSTANT: 1843 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] 1844 if (src.isIndirect(1)) 1845 dimRel = fetchSrc(src.getIndirect(1), 0, 0); 1846 break; 1847 default: 1848 break; 1849 } 1850 } 1851 1852 res = fetchSrc(src, c, ptr); 1853 1854 if (dimRel) 1855 res->getInsn()->setIndirect(0, 1, dimRel); 1856 1857 return applySrcMod(res, s, c); 1858} 1859 1860Converter::DataArray * 1861Converter::getArrayForFile(unsigned file, int idx) 1862{ 1863 switch (file) { 1864 case TGSI_FILE_TEMPORARY: 1865 return idx == 0 ? &tData : &lData; 1866 case TGSI_FILE_PREDICATE: 1867 return &pData; 1868 case TGSI_FILE_ADDRESS: 1869 return &aData; 1870 case TGSI_FILE_OUTPUT: 1871 assert(prog->getType() == Program::TYPE_FRAGMENT); 1872 return &oData; 1873 default: 1874 assert(!"invalid/unhandled TGSI source file"); 1875 return NULL; 1876 } 1877} 1878 1879Value * 1880Converter::shiftAddress(Value *index) 1881{ 1882 if (!index) 1883 return NULL; 1884 return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4)); 1885} 1886 1887void 1888Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const 1889{ 1890 std::map<int, int>::const_iterator it = 1891 code->indirectTempOffsets.find(arrayId); 1892 if (it == code->indirectTempOffsets.end()) 1893 return; 1894 1895 idx2d = 1; 1896 idx += it->second; 1897} 1898 1899Value * 1900Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) 1901{ 1902 int idx2d = src.is2D() ? src.getIndex(1) : 0; 1903 int idx = src.getIndex(0); 1904 const int swz = src.getSwizzle(c); 1905 Instruction *ld; 1906 1907 switch (src.getFile()) { 1908 case TGSI_FILE_IMMEDIATE: 1909 assert(!ptr); 1910 return loadImm(NULL, info->immd.data[idx * 4 + swz]); 1911 case TGSI_FILE_CONSTANT: 1912 return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); 1913 case TGSI_FILE_INPUT: 1914 if (prog->getType() == Program::TYPE_FRAGMENT) { 1915 // don't load masked inputs, won't be assigned a slot 1916 if (!ptr && !(info->in[idx].mask & (1 << swz))) 1917 return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); 1918 return interpolate(src, c, shiftAddress(ptr)); 1919 } else 1920 if (prog->getType() == Program::TYPE_GEOMETRY) { 1921 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID) 1922 return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0)); 1923 // XXX: This is going to be a problem with scalar arrays, i.e. when 1924 // we cannot assume that the address is given in units of vec4. 1925 // 1926 // nv50 and nvc0 need different things here, so let the lowering 1927 // passes decide what to do with the address 1928 if (ptr) 1929 return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); 1930 } 1931 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr)); 1932 ld->perPatch = info->in[idx].patch; 1933 return ld->getDef(0); 1934 case TGSI_FILE_OUTPUT: 1935 assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); 1936 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr)); 1937 ld->perPatch = info->out[idx].patch; 1938 return ld->getDef(0); 1939 case TGSI_FILE_SYSTEM_VALUE: 1940 assert(!ptr); 1941 ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); 1942 ld->perPatch = info->sv[idx].patch; 1943 return ld->getDef(0); 1944 case TGSI_FILE_TEMPORARY: { 1945 int arrayid = src.getArrayId(); 1946 if (!arrayid) 1947 arrayid = code->tempArrayId[idx]; 1948 adjustTempIndex(arrayid, idx, idx2d); 1949 } 1950 /* fallthrough */ 1951 default: 1952 return getArrayForFile(src.getFile(), idx2d)->load( 1953 sub.cur->values, idx, swz, shiftAddress(ptr)); 1954 } 1955} 1956 1957Value * 1958Converter::acquireDst(int d, int c) 1959{ 1960 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); 1961 const unsigned f = dst.getFile(); 1962 int idx = dst.getIndex(0); 1963 int idx2d = dst.is2D() ? dst.getIndex(1) : 0; 1964 1965 if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY || 1966 f == TGSI_FILE_IMAGE) 1967 return NULL; 1968 1969 if (dst.isIndirect(0) || 1970 f == TGSI_FILE_SYSTEM_VALUE || 1971 (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT)) 1972 return getScratch(); 1973 1974 if (f == TGSI_FILE_TEMPORARY) { 1975 int arrayid = dst.getArrayId(); 1976 if (!arrayid) 1977 arrayid = code->tempArrayId[idx]; 1978 adjustTempIndex(arrayid, idx, idx2d); 1979 } 1980 1981 return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c); 1982} 1983 1984void 1985Converter::storeDst(int d, int c, Value *val) 1986{ 1987 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); 1988 1989 if (tgsi.getSaturate()) { 1990 mkOp1(OP_SAT, dstTy, val, val); 1991 } 1992 1993 Value *ptr = NULL; 1994 if (dst.isIndirect(0)) 1995 ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL)); 1996 1997 if (info->io.genUserClip > 0 && 1998 dst.getFile() == TGSI_FILE_OUTPUT && 1999 !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) { 2000 mkMov(clipVtx[c], val); 2001 val = clipVtx[c]; 2002 } 2003 2004 storeDst(dst, c, val, ptr); 2005} 2006 2007void 2008Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, 2009 Value *val, Value *ptr) 2010{ 2011 const unsigned f = dst.getFile(); 2012 int idx = dst.getIndex(0); 2013 int idx2d = dst.is2D() ? dst.getIndex(1) : 0; 2014 2015 if (f == TGSI_FILE_SYSTEM_VALUE) { 2016 assert(!ptr); 2017 mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); 2018 } else 2019 if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { 2020 2021 if (ptr || (info->out[idx].mask & (1 << c))) { 2022 /* Save the viewport index into a scratch register so that it can be 2023 exported at EMIT time */ 2024 if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX && 2025 viewport != NULL) 2026 mkOp1(OP_MOV, TYPE_U32, viewport, val); 2027 else 2028 mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch = 2029 info->out[idx].patch; 2030 } 2031 } else 2032 if (f == TGSI_FILE_TEMPORARY || 2033 f == TGSI_FILE_PREDICATE || 2034 f == TGSI_FILE_ADDRESS || 2035 f == TGSI_FILE_OUTPUT) { 2036 if (f == TGSI_FILE_TEMPORARY) { 2037 int arrayid = dst.getArrayId(); 2038 if (!arrayid) 2039 arrayid = code->tempArrayId[idx]; 2040 adjustTempIndex(arrayid, idx, idx2d); 2041 } 2042 2043 getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val); 2044 } else { 2045 assert(!"invalid dst file"); 2046 } 2047} 2048 2049#define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \ 2050 for (chan = 0; chan < 4; ++chan) \ 2051 if (!inst.getDst(d).isMasked(chan)) 2052 2053Value * 2054Converter::buildDot(int dim) 2055{ 2056 assert(dim > 0); 2057 2058 Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0); 2059 Value *dotp = getScratch(); 2060 2061 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); 2062 2063 for (int c = 1; c < dim; ++c) { 2064 src0 = fetchSrc(0, c); 2065 src1 = fetchSrc(1, c); 2066 mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp); 2067 } 2068 return dotp; 2069} 2070 2071void 2072Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork) 2073{ 2074 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL); 2075 join->fixed = 1; 2076 conv->insertHead(join); 2077 2078 assert(!fork->joinAt); 2079 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv); 2080 fork->insertBefore(fork->getExit(), fork->joinAt); 2081} 2082 2083void 2084Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S) 2085{ 2086 unsigned rIdx = 0, sIdx = 0; 2087 2088 if (R >= 0) 2089 rIdx = tgsi.getSrc(R).getIndex(0); 2090 if (S >= 0) 2091 sIdx = tgsi.getSrc(S).getIndex(0); 2092 2093 tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx); 2094 2095 if (tgsi.getSrc(R).isIndirect(0)) { 2096 tex->tex.rIndirectSrc = s; 2097 tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL)); 2098 } 2099 if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) { 2100 tex->tex.sIndirectSrc = s; 2101 tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL)); 2102 } 2103} 2104 2105void 2106Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R) 2107{ 2108 TexInstruction *tex = new_TexInstruction(func, OP_TXQ); 2109 tex->tex.query = query; 2110 unsigned int c, d; 2111 2112 for (d = 0, c = 0; c < 4; ++c) { 2113 if (!dst0[c]) 2114 continue; 2115 tex->tex.mask |= 1 << c; 2116 tex->setDef(d++, dst0[c]); 2117 } 2118 if (query == TXQ_DIMS) 2119 tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level 2120 else 2121 tex->setSrc((c = 0), zero); 2122 2123 setTexRS(tex, ++c, R, -1); 2124 2125 bb->insertTail(tex); 2126} 2127 2128void 2129Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask) 2130{ 2131 Value *proj = fetchSrc(0, 3); 2132 Instruction *insn = proj->getUniqueInsn(); 2133 int c; 2134 2135 if (insn->op == OP_PINTERP) { 2136 bb->insertTail(insn = cloneForward(func, insn)); 2137 insn->op = OP_LINTERP; 2138 insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode()); 2139 insn->setSrc(1, NULL); 2140 proj = insn->getDef(0); 2141 } 2142 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj); 2143 2144 for (c = 0; c < 4; ++c) { 2145 if (!(mask & (1 << c))) 2146 continue; 2147 if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP) 2148 continue; 2149 mask &= ~(1 << c); 2150 2151 bb->insertTail(insn = cloneForward(func, insn)); 2152 insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode()); 2153 insn->setSrc(1, proj); 2154 dst[c] = insn->getDef(0); 2155 } 2156 if (!mask) 2157 return; 2158 2159 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3)); 2160 2161 for (c = 0; c < 4; ++c) 2162 if (mask & (1 << c)) 2163 dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj); 2164} 2165 2166// order of nv50 ir sources: x y z layer lod/bias shadow 2167// order of TGSI TEX sources: x y z layer shadow lod/bias 2168// lowering will finally set the hw specific order (like array first on nvc0) 2169void 2170Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) 2171{ 2172 Value *arg[4], *src[8]; 2173 Value *lod = NULL, *shd = NULL; 2174 unsigned int s, c, d; 2175 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); 2176 2177 TexInstruction::Target tgt = tgsi.getTexture(code, R); 2178 2179 for (s = 0; s < tgt.getArgCount(); ++s) 2180 arg[s] = src[s] = fetchSrc(0, s); 2181 2182 if (texi->op == OP_TXL || texi->op == OP_TXB) 2183 lod = fetchSrc(L >> 4, L & 3); 2184 2185 if (C == 0x0f) 2186 C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src 2187 2188 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && 2189 tgt == TEX_TARGET_CUBE_ARRAY_SHADOW) 2190 shd = fetchSrc(1, 0); 2191 else if (tgt.isShadow()) 2192 shd = fetchSrc(C >> 4, C & 3); 2193 2194 if (texi->op == OP_TXD) { 2195 for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) { 2196 texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c)); 2197 texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c)); 2198 } 2199 } 2200 2201 // cube textures don't care about projection value, it's divided out 2202 if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) { 2203 unsigned int n = tgt.getDim(); 2204 if (shd) { 2205 arg[n] = shd; 2206 ++n; 2207 assert(tgt.getDim() == tgt.getArgCount()); 2208 } 2209 loadProjTexCoords(src, arg, (1 << n) - 1); 2210 if (shd) 2211 shd = src[n - 1]; 2212 } 2213 2214 for (c = 0, d = 0; c < 4; ++c) { 2215 if (dst[c]) { 2216 texi->setDef(d++, dst[c]); 2217 texi->tex.mask |= 1 << c; 2218 } else { 2219 // NOTE: maybe hook up def too, for CSE 2220 } 2221 } 2222 for (s = 0; s < tgt.getArgCount(); ++s) 2223 texi->setSrc(s, src[s]); 2224 if (lod) 2225 texi->setSrc(s++, lod); 2226 if (shd) 2227 texi->setSrc(s++, shd); 2228 2229 setTexRS(texi, s, R, S); 2230 2231 if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ) 2232 texi->tex.levelZero = true; 2233 if (prog->getType() != Program::TYPE_FRAGMENT && 2234 (tgsi.getOpcode() == TGSI_OPCODE_TEX || 2235 tgsi.getOpcode() == TGSI_OPCODE_TEX2 || 2236 tgsi.getOpcode() == TGSI_OPCODE_TXP)) 2237 texi->tex.levelZero = true; 2238 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow()) 2239 texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info); 2240 2241 texi->tex.useOffsets = tgsi.getNumTexOffsets(); 2242 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { 2243 for (c = 0; c < 3; ++c) { 2244 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); 2245 texi->offset[s][c].setInsn(texi); 2246 } 2247 } 2248 2249 bb->insertTail(texi); 2250} 2251 2252// 1st source: xyz = coordinates, w = lod/sample 2253// 2nd source: offset 2254void 2255Converter::handleTXF(Value *dst[4], int R, int L_M) 2256{ 2257 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); 2258 int ms; 2259 unsigned int c, d, s; 2260 2261 texi->tex.target = tgsi.getTexture(code, R); 2262 2263 ms = texi->tex.target.isMS() ? 1 : 0; 2264 texi->tex.levelZero = ms; /* MS textures don't have mip-maps */ 2265 2266 for (c = 0, d = 0; c < 4; ++c) { 2267 if (dst[c]) { 2268 texi->setDef(d++, dst[c]); 2269 texi->tex.mask |= 1 << c; 2270 } 2271 } 2272 for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c) 2273 texi->setSrc(c, fetchSrc(0, c)); 2274 texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms 2275 2276 setTexRS(texi, c, R, -1); 2277 2278 texi->tex.useOffsets = tgsi.getNumTexOffsets(); 2279 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { 2280 for (c = 0; c < 3; ++c) { 2281 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); 2282 texi->offset[s][c].setInsn(texi); 2283 } 2284 } 2285 2286 bb->insertTail(texi); 2287} 2288 2289void 2290Converter::handleFBFETCH(Value *dst[4]) 2291{ 2292 TexInstruction *texi = new_TexInstruction(func, OP_TXF); 2293 unsigned int c, d; 2294 2295 texi->tex.target = TEX_TARGET_2D_MS_ARRAY; 2296 texi->tex.levelZero = 1; 2297 texi->tex.useOffsets = 0; 2298 2299 for (c = 0, d = 0; c < 4; ++c) { 2300 if (dst[c]) { 2301 texi->setDef(d++, dst[c]); 2302 texi->tex.mask |= 1 << c; 2303 } 2304 } 2305 2306 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0)); 2307 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1)); 2308 Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0)); 2309 Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0)); 2310 2311 mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z; 2312 mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z; 2313 texi->setSrc(0, x); 2314 texi->setSrc(1, y); 2315 texi->setSrc(2, z); 2316 texi->setSrc(3, ms); 2317 2318 texi->tex.r = texi->tex.s = -1; 2319 2320 bb->insertTail(texi); 2321} 2322 2323void 2324Converter::handleLIT(Value *dst0[4]) 2325{ 2326 Value *val0 = NULL; 2327 unsigned int mask = tgsi.getDst(0).getMask(); 2328 2329 if (mask & (1 << 0)) 2330 loadImm(dst0[0], 1.0f); 2331 2332 if (mask & (1 << 3)) 2333 loadImm(dst0[3], 1.0f); 2334 2335 if (mask & (3 << 1)) { 2336 val0 = getScratch(); 2337 mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero); 2338 if (mask & (1 << 1)) 2339 mkMov(dst0[1], val0); 2340 } 2341 2342 if (mask & (1 << 2)) { 2343 Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3); 2344 Value *val1 = getScratch(), *val3 = getScratch(); 2345 2346 Value *pos128 = loadImm(NULL, +127.999999f); 2347 Value *neg128 = loadImm(NULL, -127.999999f); 2348 2349 mkOp2(OP_MAX, TYPE_F32, val1, src1, zero); 2350 mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128); 2351 mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128); 2352 mkOp2(OP_POW, TYPE_F32, val3, val1, val3); 2353 2354 mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0); 2355 } 2356} 2357 2358/* Keep this around for now as reference when adding img support 2359static inline bool 2360isResourceSpecial(const int r) 2361{ 2362 return (r == TGSI_RESOURCE_GLOBAL || 2363 r == TGSI_RESOURCE_LOCAL || 2364 r == TGSI_RESOURCE_PRIVATE || 2365 r == TGSI_RESOURCE_INPUT); 2366} 2367 2368static inline bool 2369isResourceRaw(const tgsi::Source *code, const int r) 2370{ 2371 return isResourceSpecial(r) || code->resources[r].raw; 2372} 2373 2374static inline nv50_ir::TexTarget 2375getResourceTarget(const tgsi::Source *code, int r) 2376{ 2377 if (isResourceSpecial(r)) 2378 return nv50_ir::TEX_TARGET_BUFFER; 2379 return tgsi::translateTexture(code->resources.at(r).target); 2380} 2381 2382Symbol * 2383Converter::getResourceBase(const int r) 2384{ 2385 Symbol *sym = NULL; 2386 2387 switch (r) { 2388 case TGSI_RESOURCE_GLOBAL: 2389 sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 2390 info->io.auxCBSlot); 2391 break; 2392 case TGSI_RESOURCE_LOCAL: 2393 assert(prog->getType() == Program::TYPE_COMPUTE); 2394 sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32, 2395 info->prop.cp.sharedOffset); 2396 break; 2397 case TGSI_RESOURCE_PRIVATE: 2398 sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32, 2399 info->bin.tlsSpace); 2400 break; 2401 case TGSI_RESOURCE_INPUT: 2402 assert(prog->getType() == Program::TYPE_COMPUTE); 2403 sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32, 2404 info->prop.cp.inputOffset); 2405 break; 2406 default: 2407 sym = new_Symbol(prog, 2408 nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot); 2409 break; 2410 } 2411 return sym; 2412} 2413 2414void 2415Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s) 2416{ 2417 const int arg = 2418 TexInstruction::Target(getResourceTarget(code, r)).getArgCount(); 2419 2420 for (int c = 0; c < arg; ++c) 2421 coords.push_back(fetchSrc(s, c)); 2422 2423 // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk 2424 if (r == TGSI_RESOURCE_LOCAL || 2425 r == TGSI_RESOURCE_PRIVATE || 2426 r == TGSI_RESOURCE_INPUT) 2427 coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS), 2428 coords[0]); 2429} 2430*/ 2431static inline int 2432partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) 2433{ 2434 int n = 0; 2435 2436 while (mask) { 2437 if (mask & 1) { 2438 size[n]++; 2439 } else { 2440 if (size[n]) 2441 comp[n = 1] = size[0] + 1; 2442 else 2443 comp[n]++; 2444 } 2445 mask >>= 1; 2446 } 2447 if (size[0] == 3) { 2448 n = 1; 2449 size[0] = (comp[0] == 1) ? 1 : 2; 2450 size[1] = 3 - size[0]; 2451 comp[1] = comp[0] + size[0]; 2452 } 2453 return n + 1; 2454} 2455 2456static inline nv50_ir::TexTarget 2457getImageTarget(const tgsi::Source *code, int r) 2458{ 2459 return tgsi::translateTexture(code->images.at(r).target); 2460} 2461 2462static inline const nv50_ir::TexInstruction::ImgFormatDesc * 2463getImageFormat(const tgsi::Source *code, int r) 2464{ 2465 return &nv50_ir::TexInstruction::formatTable[ 2466 tgsi::translateImgFormat(code->images.at(r).format)]; 2467} 2468 2469void 2470Converter::getImageCoords(std::vector<Value *> &coords, int r, int s) 2471{ 2472 TexInstruction::Target t = 2473 TexInstruction::Target(getImageTarget(code, r)); 2474 const int arg = t.getDim() + (t.isArray() || t.isCube()); 2475 2476 for (int c = 0; c < arg; ++c) 2477 coords.push_back(fetchSrc(s, c)); 2478 2479 if (t.isMS()) 2480 coords.push_back(fetchSrc(s, 3)); 2481} 2482 2483// For raw loads, granularity is 4 byte. 2484// Usage of the texture read mask on OP_SULDP is not allowed. 2485void 2486Converter::handleLOAD(Value *dst0[4]) 2487{ 2488 const int r = tgsi.getSrc(0).getIndex(0); 2489 int c; 2490 std::vector<Value *> off, src, ldv, def; 2491 2492 switch (tgsi.getSrc(0).getFile()) { 2493 case TGSI_FILE_BUFFER: 2494 case TGSI_FILE_MEMORY: 2495 for (c = 0; c < 4; ++c) { 2496 if (!dst0[c]) 2497 continue; 2498 2499 Value *off; 2500 Symbol *sym; 2501 uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4; 2502 2503 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) { 2504 off = NULL; 2505 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 2506 tgsi.getSrc(1).getValueU32(0, info) + 2507 src0_component_offset); 2508 } else { 2509 // yzw are ignored for buffers 2510 off = fetchSrc(1, 0); 2511 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 2512 src0_component_offset); 2513 } 2514 2515 Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off); 2516 ld->cache = tgsi.getCacheMode(); 2517 if (tgsi.getSrc(0).isIndirect(0)) 2518 ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); 2519 } 2520 break; 2521 case TGSI_FILE_IMAGE: { 2522 assert(!code->images[r].raw); 2523 2524 getImageCoords(off, r, 1); 2525 def.resize(4); 2526 2527 for (c = 0; c < 4; ++c) { 2528 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c)) 2529 def[c] = getScratch(); 2530 else 2531 def[c] = dst0[c]; 2532 } 2533 2534 TexInstruction *ld = 2535 mkTex(OP_SULDP, getImageTarget(code, r), code->images[r].slot, 0, 2536 def, off); 2537 ld->tex.mask = tgsi.getDst(0).getMask(); 2538 ld->tex.format = getImageFormat(code, r); 2539 ld->cache = tgsi.getCacheMode(); 2540 if (tgsi.getSrc(0).isIndirect(0)) 2541 ld->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); 2542 2543 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 2544 if (dst0[c] != def[c]) 2545 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); 2546 } 2547 break; 2548 default: 2549 assert(!"Unsupported srcFile for LOAD"); 2550 } 2551 2552/* Keep this around for now as reference when adding img support 2553 getResourceCoords(off, r, 1); 2554 2555 if (isResourceRaw(code, r)) { 2556 uint8_t mask = 0; 2557 uint8_t comp[2] = { 0, 0 }; 2558 uint8_t size[2] = { 0, 0 }; 2559 2560 Symbol *base = getResourceBase(r); 2561 2562 // determine the base and size of the at most 2 load ops 2563 for (c = 0; c < 4; ++c) 2564 if (!tgsi.getDst(0).isMasked(c)) 2565 mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X); 2566 2567 int n = partitionLoadStore(comp, size, mask); 2568 2569 src = off; 2570 2571 def.resize(4); // index by component, the ones we need will be non-NULL 2572 for (c = 0; c < 4; ++c) { 2573 if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c)) 2574 def[c] = dst0[c]; 2575 else 2576 if (mask & (1 << c)) 2577 def[c] = getScratch(); 2578 } 2579 2580 const bool useLd = isResourceSpecial(r) || 2581 (info->io.nv50styleSurfaces && 2582 code->resources[r].target == TGSI_TEXTURE_BUFFER); 2583 2584 for (int i = 0; i < n; ++i) { 2585 ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]); 2586 2587 if (comp[i]) // adjust x component of source address if necessary 2588 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), 2589 off[0], mkImm(comp[i] * 4)); 2590 else 2591 src[0] = off[0]; 2592 2593 if (useLd) { 2594 Instruction *ld = 2595 mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]); 2596 for (size_t c = 1; c < ldv.size(); ++c) 2597 ld->setDef(c, ldv[c]); 2598 } else { 2599 mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot, 2600 0, ldv, src)->dType = typeOfSize(size[i] * 4); 2601 } 2602 } 2603 } else { 2604 def.resize(4); 2605 for (c = 0; c < 4; ++c) { 2606 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c)) 2607 def[c] = getScratch(); 2608 else 2609 def[c] = dst0[c]; 2610 } 2611 2612 mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0, 2613 def, off); 2614 } 2615 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 2616 if (dst0[c] != def[c]) 2617 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); 2618*/ 2619} 2620 2621// For formatted stores, the write mask on OP_SUSTP can be used. 2622// Raw stores have to be split. 2623void 2624Converter::handleSTORE() 2625{ 2626 const int r = tgsi.getDst(0).getIndex(0); 2627 int c; 2628 std::vector<Value *> off, src, dummy; 2629 2630 switch (tgsi.getDst(0).getFile()) { 2631 case TGSI_FILE_BUFFER: 2632 case TGSI_FILE_MEMORY: 2633 for (c = 0; c < 4; ++c) { 2634 if (!(tgsi.getDst(0).getMask() & (1 << c))) 2635 continue; 2636 2637 Symbol *sym; 2638 Value *off; 2639 if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) { 2640 off = NULL; 2641 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 2642 tgsi.getSrc(0).getValueU32(0, info) + 4 * c); 2643 } else { 2644 // yzw are ignored for buffers 2645 off = fetchSrc(0, 0); 2646 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c); 2647 } 2648 2649 Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c)); 2650 st->cache = tgsi.getCacheMode(); 2651 if (tgsi.getDst(0).isIndirect(0)) 2652 st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0)); 2653 } 2654 break; 2655 case TGSI_FILE_IMAGE: { 2656 assert(!code->images[r].raw); 2657 2658 getImageCoords(off, r, 0); 2659 src = off; 2660 2661 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 2662 src.push_back(fetchSrc(1, c)); 2663 2664 TexInstruction *st = 2665 mkTex(OP_SUSTP, getImageTarget(code, r), code->images[r].slot, 2666 0, dummy, src); 2667 st->tex.mask = tgsi.getDst(0).getMask(); 2668 st->tex.format = getImageFormat(code, r); 2669 st->cache = tgsi.getCacheMode(); 2670 if (tgsi.getDst(0).isIndirect(0)) 2671 st->setIndirectR(fetchSrc(tgsi.getDst(0).getIndirect(0), 0, NULL)); 2672 } 2673 break; 2674 default: 2675 assert(!"Unsupported dstFile for STORE"); 2676 } 2677 2678/* Keep this around for now as reference when adding img support 2679 getResourceCoords(off, r, 0); 2680 src = off; 2681 const int s = src.size(); 2682 2683 if (isResourceRaw(code, r)) { 2684 uint8_t comp[2] = { 0, 0 }; 2685 uint8_t size[2] = { 0, 0 }; 2686 2687 int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask()); 2688 2689 Symbol *base = getResourceBase(r); 2690 2691 const bool useSt = isResourceSpecial(r) || 2692 (info->io.nv50styleSurfaces && 2693 code->resources[r].target == TGSI_TEXTURE_BUFFER); 2694 2695 for (int i = 0; i < n; ++i) { 2696 if (comp[i]) // adjust x component of source address if necessary 2697 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), 2698 off[0], mkImm(comp[i] * 4)); 2699 else 2700 src[0] = off[0]; 2701 2702 const DataType stTy = typeOfSize(size[i] * 4); 2703 2704 if (useSt) { 2705 Instruction *st = 2706 mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i])); 2707 for (c = 1; c < size[i]; ++c) 2708 st->setSrc(1 + c, fetchSrc(1, comp[i] + c)); 2709 st->setIndirect(0, 0, src[0]); 2710 } else { 2711 // attach values to be stored 2712 src.resize(s + size[i]); 2713 for (c = 0; c < size[i]; ++c) 2714 src[s + c] = fetchSrc(1, comp[i] + c); 2715 mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot, 2716 0, dummy, src)->setType(stTy); 2717 } 2718 } 2719 } else { 2720 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 2721 src.push_back(fetchSrc(1, c)); 2722 2723 mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0, 2724 dummy, src)->tex.mask = tgsi.getDst(0).getMask(); 2725 } 2726*/ 2727} 2728 2729// XXX: These only work on resources with the single-component u32/s32 formats. 2730// Therefore the result is replicated. This might not be intended by TGSI, but 2731// operating on more than 1 component would produce undefined results because 2732// they do not exist. 2733void 2734Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) 2735{ 2736 const int r = tgsi.getSrc(0).getIndex(0); 2737 std::vector<Value *> srcv; 2738 std::vector<Value *> defv; 2739 LValue *dst = getScratch(); 2740 2741 switch (tgsi.getSrc(0).getFile()) { 2742 case TGSI_FILE_BUFFER: 2743 case TGSI_FILE_MEMORY: 2744 for (int c = 0; c < 4; ++c) { 2745 if (!dst0[c]) 2746 continue; 2747 2748 Instruction *insn; 2749 Value *off = fetchSrc(1, c), *off2 = NULL; 2750 Value *sym; 2751 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) 2752 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 2753 tgsi.getSrc(1).getValueU32(c, info)); 2754 else 2755 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0); 2756 if (tgsi.getSrc(0).isIndirect(0)) 2757 off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); 2758 if (subOp == NV50_IR_SUBOP_ATOM_CAS) 2759 insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c)); 2760 else 2761 insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c)); 2762 if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE) 2763 insn->setIndirect(0, 0, off); 2764 if (off2) 2765 insn->setIndirect(0, 1, off2); 2766 insn->subOp = subOp; 2767 } 2768 for (int c = 0; c < 4; ++c) 2769 if (dst0[c]) 2770 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov 2771 break; 2772 case TGSI_FILE_IMAGE: { 2773 assert(!code->images[r].raw); 2774 2775 getImageCoords(srcv, r, 1); 2776 defv.push_back(dst); 2777 srcv.push_back(fetchSrc(2, 0)); 2778 2779 if (subOp == NV50_IR_SUBOP_ATOM_CAS) 2780 srcv.push_back(fetchSrc(3, 0)); 2781 2782 TexInstruction *tex = mkTex(OP_SUREDP, getImageTarget(code, r), 2783 code->images[r].slot, 0, defv, srcv); 2784 tex->subOp = subOp; 2785 tex->tex.mask = 1; 2786 tex->tex.format = getImageFormat(code, r); 2787 tex->setType(ty); 2788 if (tgsi.getSrc(0).isIndirect(0)) 2789 tex->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); 2790 2791 for (int c = 0; c < 4; ++c) 2792 if (dst0[c]) 2793 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov 2794 } 2795 break; 2796 default: 2797 assert(!"Unsupported srcFile for ATOM"); 2798 } 2799 2800/* Keep this around for now as reference when adding img support 2801 getResourceCoords(srcv, r, 1); 2802 2803 if (isResourceSpecial(r)) { 2804 assert(r != TGSI_RESOURCE_INPUT); 2805 Instruction *insn; 2806 insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0)); 2807 insn->subOp = subOp; 2808 if (subOp == NV50_IR_SUBOP_ATOM_CAS) 2809 insn->setSrc(2, fetchSrc(3, 0)); 2810 insn->setIndirect(0, 0, srcv.at(0)); 2811 } else { 2812 operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP; 2813 TexTarget targ = getResourceTarget(code, r); 2814 int idx = code->resources[r].slot; 2815 defv.push_back(dst); 2816 srcv.push_back(fetchSrc(2, 0)); 2817 if (subOp == NV50_IR_SUBOP_ATOM_CAS) 2818 srcv.push_back(fetchSrc(3, 0)); 2819 TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv); 2820 tex->subOp = subOp; 2821 tex->tex.mask = 1; 2822 tex->setType(ty); 2823 } 2824 2825 for (int c = 0; c < 4; ++c) 2826 if (dst0[c]) 2827 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov 2828*/ 2829} 2830 2831void 2832Converter::handleINTERP(Value *dst[4]) 2833{ 2834 // Check whether the input is linear. All other attributes ignored. 2835 Instruction *insn; 2836 Value *offset = NULL, *ptr = NULL, *w = NULL; 2837 Symbol *sym[4] = { NULL }; 2838 bool linear; 2839 operation op = OP_NOP; 2840 int c, mode = 0; 2841 2842 tgsi::Instruction::SrcRegister src = tgsi.getSrc(0); 2843 2844 // In some odd cases, in large part due to varying packing, the source 2845 // might not actually be an input. This is illegal TGSI, but it's easier to 2846 // account for it here than it is to fix it where the TGSI is being 2847 // generated. In that case, it's going to be a straight up mov (or sequence 2848 // of mov's) from the input in question. We follow the mov chain to see 2849 // which input we need to use. 2850 if (src.getFile() != TGSI_FILE_INPUT) { 2851 if (src.isIndirect(0)) { 2852 ERROR("Ignoring indirect input interpolation\n"); 2853 return; 2854 } 2855 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 2856 Value *val = fetchSrc(0, c); 2857 assert(val->defs.size() == 1); 2858 insn = val->getInsn(); 2859 while (insn->op == OP_MOV) { 2860 assert(insn->getSrc(0)->defs.size() == 1); 2861 insn = insn->getSrc(0)->getInsn(); 2862 if (!insn) { 2863 ERROR("Miscompiling shader due to unhandled INTERP\n"); 2864 return; 2865 } 2866 } 2867 if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) { 2868 ERROR("Trying to interpolate non-input, this is not allowed.\n"); 2869 return; 2870 } 2871 sym[c] = insn->getSrc(0)->asSym(); 2872 assert(sym[c]); 2873 op = insn->op; 2874 mode = insn->ipa; 2875 } 2876 } else { 2877 if (src.isIndirect(0)) 2878 ptr = fetchSrc(src.getIndirect(0), 0, NULL); 2879 2880 // We can assume that the fixed index will point to an input of the same 2881 // interpolation type in case of an indirect. 2882 // TODO: Make use of ArrayID. 2883 linear = info->in[src.getIndex(0)].linear; 2884 if (linear) { 2885 op = OP_LINTERP; 2886 mode = NV50_IR_INTERP_LINEAR; 2887 } else { 2888 op = OP_PINTERP; 2889 mode = NV50_IR_INTERP_PERSPECTIVE; 2890 } 2891 } 2892 2893 switch (tgsi.getOpcode()) { 2894 case TGSI_OPCODE_INTERP_CENTROID: 2895 mode |= NV50_IR_INTERP_CENTROID; 2896 break; 2897 case TGSI_OPCODE_INTERP_SAMPLE: 2898 insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0)); 2899 insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET; 2900 mode |= NV50_IR_INTERP_OFFSET; 2901 break; 2902 case TGSI_OPCODE_INTERP_OFFSET: { 2903 // The input in src1.xy is float, but we need a single 32-bit value 2904 // where the upper and lower 16 bits are encoded in S0.12 format. We need 2905 // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096, 2906 // and then convert to s32. 2907 Value *offs[2]; 2908 for (c = 0; c < 2; c++) { 2909 offs[c] = getScratch(); 2910 mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f)); 2911 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f)); 2912 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f)); 2913 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]); 2914 } 2915 offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(), 2916 offs[1], mkImm(0x1010), offs[0]); 2917 mode |= NV50_IR_INTERP_OFFSET; 2918 break; 2919 } 2920 } 2921 2922 if (op == OP_PINTERP) { 2923 if (offset) { 2924 w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset); 2925 mkOp1(OP_RCP, TYPE_F32, w, w); 2926 } else { 2927 w = fragCoord[3]; 2928 } 2929 } 2930 2931 2932 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 2933 insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c)); 2934 if (op == OP_PINTERP) 2935 insn->setSrc(1, w); 2936 if (ptr) 2937 insn->setIndirect(0, 0, ptr); 2938 if (offset) 2939 insn->setSrc(op == OP_PINTERP ? 2 : 1, offset); 2940 2941 insn->setInterpolate(mode); 2942 } 2943} 2944 2945Converter::Subroutine * 2946Converter::getSubroutine(unsigned ip) 2947{ 2948 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); 2949 2950 if (it == sub.map.end()) 2951 it = sub.map.insert(std::make_pair( 2952 ip, Subroutine(new Function(prog, "SUB", ip)))).first; 2953 2954 return &it->second; 2955} 2956 2957Converter::Subroutine * 2958Converter::getSubroutine(Function *f) 2959{ 2960 unsigned ip = f->getLabel(); 2961 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); 2962 2963 if (it == sub.map.end()) 2964 it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; 2965 2966 return &it->second; 2967} 2968 2969bool 2970Converter::isEndOfSubroutine(uint ip) 2971{ 2972 assert(ip < code->scan.num_instructions); 2973 tgsi::Instruction insn(&code->insns[ip]); 2974 return (insn.getOpcode() == TGSI_OPCODE_END || 2975 insn.getOpcode() == TGSI_OPCODE_ENDSUB || 2976 // does END occur at end of main or the very end ? 2977 insn.getOpcode() == TGSI_OPCODE_BGNSUB); 2978} 2979 2980bool 2981Converter::handleInstruction(const struct tgsi_full_instruction *insn) 2982{ 2983 Instruction *geni; 2984 2985 Value *dst0[4], *rDst0[4]; 2986 Value *src0, *src1, *src2, *src3; 2987 Value *val0, *val1; 2988 int c; 2989 2990 tgsi = tgsi::Instruction(insn); 2991 2992 bool useScratchDst = tgsi.checkDstSrcAliasing(); 2993 2994 operation op = tgsi.getOP(); 2995 dstTy = tgsi.inferDstType(); 2996 srcTy = tgsi.inferSrcType(); 2997 2998 unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; 2999 3000 if (tgsi.dstCount()) { 3001 for (c = 0; c < 4; ++c) { 3002 rDst0[c] = acquireDst(0, c); 3003 dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; 3004 } 3005 } 3006 3007 switch (tgsi.getOpcode()) { 3008 case TGSI_OPCODE_ADD: 3009 case TGSI_OPCODE_UADD: 3010 case TGSI_OPCODE_AND: 3011 case TGSI_OPCODE_DIV: 3012 case TGSI_OPCODE_IDIV: 3013 case TGSI_OPCODE_UDIV: 3014 case TGSI_OPCODE_MAX: 3015 case TGSI_OPCODE_MIN: 3016 case TGSI_OPCODE_IMAX: 3017 case TGSI_OPCODE_IMIN: 3018 case TGSI_OPCODE_UMAX: 3019 case TGSI_OPCODE_UMIN: 3020 case TGSI_OPCODE_MOD: 3021 case TGSI_OPCODE_UMOD: 3022 case TGSI_OPCODE_MUL: 3023 case TGSI_OPCODE_UMUL: 3024 case TGSI_OPCODE_IMUL_HI: 3025 case TGSI_OPCODE_UMUL_HI: 3026 case TGSI_OPCODE_OR: 3027 case TGSI_OPCODE_SHL: 3028 case TGSI_OPCODE_ISHR: 3029 case TGSI_OPCODE_USHR: 3030 case TGSI_OPCODE_XOR: 3031 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3032 src0 = fetchSrc(0, c); 3033 src1 = fetchSrc(1, c); 3034 geni = mkOp2(op, dstTy, dst0[c], src0, src1); 3035 geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); 3036 } 3037 break; 3038 case TGSI_OPCODE_MAD: 3039 case TGSI_OPCODE_UMAD: 3040 case TGSI_OPCODE_SAD: 3041 case TGSI_OPCODE_FMA: 3042 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3043 src0 = fetchSrc(0, c); 3044 src1 = fetchSrc(1, c); 3045 src2 = fetchSrc(2, c); 3046 mkOp3(op, dstTy, dst0[c], src0, src1, src2); 3047 } 3048 break; 3049 case TGSI_OPCODE_MOV: 3050 case TGSI_OPCODE_CEIL: 3051 case TGSI_OPCODE_FLR: 3052 case TGSI_OPCODE_TRUNC: 3053 case TGSI_OPCODE_RCP: 3054 case TGSI_OPCODE_SQRT: 3055 case TGSI_OPCODE_IABS: 3056 case TGSI_OPCODE_INEG: 3057 case TGSI_OPCODE_NOT: 3058 case TGSI_OPCODE_DDX: 3059 case TGSI_OPCODE_DDY: 3060 case TGSI_OPCODE_DDX_FINE: 3061 case TGSI_OPCODE_DDY_FINE: 3062 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3063 mkOp1(op, dstTy, dst0[c], fetchSrc(0, c)); 3064 break; 3065 case TGSI_OPCODE_RSQ: 3066 src0 = fetchSrc(0, 0); 3067 val0 = getScratch(); 3068 mkOp1(OP_ABS, TYPE_F32, val0, src0); 3069 mkOp1(OP_RSQ, TYPE_F32, val0, val0); 3070 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3071 mkMov(dst0[c], val0); 3072 break; 3073 case TGSI_OPCODE_ARL: 3074 case TGSI_OPCODE_ARR: 3075 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3076 const RoundMode rnd = 3077 tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M; 3078 src0 = fetchSrc(0, c); 3079 mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd; 3080 } 3081 break; 3082 case TGSI_OPCODE_UARL: 3083 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3084 mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); 3085 break; 3086 case TGSI_OPCODE_POW: 3087 val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); 3088 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3089 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); 3090 break; 3091 case TGSI_OPCODE_EX2: 3092 case TGSI_OPCODE_LG2: 3093 val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0); 3094 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3095 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); 3096 break; 3097 case TGSI_OPCODE_COS: 3098 case TGSI_OPCODE_SIN: 3099 val0 = getScratch(); 3100 if (mask & 7) { 3101 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0)); 3102 mkOp1(op, TYPE_F32, val0, val0); 3103 for (c = 0; c < 3; ++c) 3104 if (dst0[c]) 3105 mkMov(dst0[c], val0); 3106 } 3107 if (dst0[3]) { 3108 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3)); 3109 mkOp1(op, TYPE_F32, dst0[3], val0); 3110 } 3111 break; 3112 case TGSI_OPCODE_SCS: 3113 if (mask & 3) { 3114 val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0)); 3115 if (dst0[0]) 3116 mkOp1(OP_COS, TYPE_F32, dst0[0], val0); 3117 if (dst0[1]) 3118 mkOp1(OP_SIN, TYPE_F32, dst0[1], val0); 3119 } 3120 if (dst0[2]) 3121 loadImm(dst0[2], 0.0f); 3122 if (dst0[3]) 3123 loadImm(dst0[3], 1.0f); 3124 break; 3125 case TGSI_OPCODE_EXP: 3126 src0 = fetchSrc(0, 0); 3127 val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0); 3128 if (dst0[1]) 3129 mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0); 3130 if (dst0[0]) 3131 mkOp1(OP_EX2, TYPE_F32, dst0[0], val0); 3132 if (dst0[2]) 3133 mkOp1(OP_EX2, TYPE_F32, dst0[2], src0); 3134 if (dst0[3]) 3135 loadImm(dst0[3], 1.0f); 3136 break; 3137 case TGSI_OPCODE_LOG: 3138 src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0)); 3139 val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0); 3140 if (dst0[0] || dst0[1]) 3141 val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0); 3142 if (dst0[1]) { 3143 mkOp1(OP_EX2, TYPE_F32, dst0[1], val1); 3144 mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]); 3145 mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0); 3146 } 3147 if (dst0[3]) 3148 loadImm(dst0[3], 1.0f); 3149 break; 3150 case TGSI_OPCODE_DP2: 3151 val0 = buildDot(2); 3152 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3153 mkMov(dst0[c], val0); 3154 break; 3155 case TGSI_OPCODE_DP3: 3156 val0 = buildDot(3); 3157 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3158 mkMov(dst0[c], val0); 3159 break; 3160 case TGSI_OPCODE_DP4: 3161 val0 = buildDot(4); 3162 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3163 mkMov(dst0[c], val0); 3164 break; 3165 case TGSI_OPCODE_DPH: 3166 val0 = buildDot(3); 3167 src1 = fetchSrc(1, 3); 3168 mkOp2(OP_ADD, TYPE_F32, val0, val0, src1); 3169 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3170 mkMov(dst0[c], val0); 3171 break; 3172 case TGSI_OPCODE_DST: 3173 if (dst0[0]) 3174 loadImm(dst0[0], 1.0f); 3175 if (dst0[1]) { 3176 src0 = fetchSrc(0, 1); 3177 src1 = fetchSrc(1, 1); 3178 mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1); 3179 } 3180 if (dst0[2]) 3181 mkMov(dst0[2], fetchSrc(0, 2)); 3182 if (dst0[3]) 3183 mkMov(dst0[3], fetchSrc(1, 3)); 3184 break; 3185 case TGSI_OPCODE_LRP: 3186 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3187 src0 = fetchSrc(0, c); 3188 src1 = fetchSrc(1, c); 3189 src2 = fetchSrc(2, c); 3190 mkOp3(OP_MAD, TYPE_F32, dst0[c], 3191 mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2); 3192 } 3193 break; 3194 case TGSI_OPCODE_LIT: 3195 handleLIT(dst0); 3196 break; 3197 case TGSI_OPCODE_XPD: 3198 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3199 if (c < 3) { 3200 val0 = getSSA(); 3201 src0 = fetchSrc(1, (c + 1) % 3); 3202 src1 = fetchSrc(0, (c + 2) % 3); 3203 mkOp2(OP_MUL, TYPE_F32, val0, src0, src1); 3204 mkOp1(OP_NEG, TYPE_F32, val0, val0); 3205 3206 src0 = fetchSrc(0, (c + 1) % 3); 3207 src1 = fetchSrc(1, (c + 2) % 3); 3208 mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0); 3209 } else { 3210 loadImm(dst0[c], 1.0f); 3211 } 3212 } 3213 break; 3214 case TGSI_OPCODE_ISSG: 3215 case TGSI_OPCODE_SSG: 3216 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3217 src0 = fetchSrc(0, c); 3218 val0 = getScratch(); 3219 val1 = getScratch(); 3220 mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero); 3221 mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero); 3222 if (srcTy == TYPE_F32) 3223 mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1); 3224 else 3225 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0); 3226 } 3227 break; 3228 case TGSI_OPCODE_UCMP: 3229 srcTy = TYPE_U32; 3230 /* fallthrough */ 3231 case TGSI_OPCODE_CMP: 3232 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3233 src0 = fetchSrc(0, c); 3234 src1 = fetchSrc(1, c); 3235 src2 = fetchSrc(2, c); 3236 if (src1 == src2) 3237 mkMov(dst0[c], src1); 3238 else 3239 mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE, 3240 srcTy, dst0[c], srcTy, src1, src2, src0); 3241 } 3242 break; 3243 case TGSI_OPCODE_FRC: 3244 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3245 src0 = fetchSrc(0, c); 3246 val0 = getScratch(); 3247 mkOp1(OP_FLOOR, TYPE_F32, val0, src0); 3248 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); 3249 } 3250 break; 3251 case TGSI_OPCODE_ROUND: 3252 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3253 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c)) 3254 ->rnd = ROUND_NI; 3255 break; 3256 case TGSI_OPCODE_CLAMP: 3257 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3258 src0 = fetchSrc(0, c); 3259 src1 = fetchSrc(1, c); 3260 src2 = fetchSrc(2, c); 3261 val0 = getScratch(); 3262 mkOp2(OP_MIN, TYPE_F32, val0, src0, src1); 3263 mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2); 3264 } 3265 break; 3266 case TGSI_OPCODE_SLT: 3267 case TGSI_OPCODE_SGE: 3268 case TGSI_OPCODE_SEQ: 3269 case TGSI_OPCODE_SGT: 3270 case TGSI_OPCODE_SLE: 3271 case TGSI_OPCODE_SNE: 3272 case TGSI_OPCODE_FSEQ: 3273 case TGSI_OPCODE_FSGE: 3274 case TGSI_OPCODE_FSLT: 3275 case TGSI_OPCODE_FSNE: 3276 case TGSI_OPCODE_ISGE: 3277 case TGSI_OPCODE_ISLT: 3278 case TGSI_OPCODE_USEQ: 3279 case TGSI_OPCODE_USGE: 3280 case TGSI_OPCODE_USLT: 3281 case TGSI_OPCODE_USNE: 3282 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3283 src0 = fetchSrc(0, c); 3284 src1 = fetchSrc(1, c); 3285 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1); 3286 } 3287 break; 3288 case TGSI_OPCODE_VOTE_ALL: 3289 case TGSI_OPCODE_VOTE_ANY: 3290 case TGSI_OPCODE_VOTE_EQ: 3291 val0 = new_LValue(func, FILE_PREDICATE); 3292 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3293 mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero); 3294 mkOp1(op, dstTy, val0, val0) 3295 ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); 3296 mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0); 3297 } 3298 break; 3299 case TGSI_OPCODE_KILL_IF: 3300 val0 = new_LValue(func, FILE_PREDICATE); 3301 mask = 0; 3302 for (c = 0; c < 4; ++c) { 3303 const int s = tgsi.getSrc(0).getSwizzle(c); 3304 if (mask & (1 << s)) 3305 continue; 3306 mask |= 1 << s; 3307 mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero); 3308 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0); 3309 } 3310 break; 3311 case TGSI_OPCODE_KILL: 3312 mkOp(OP_DISCARD, TYPE_NONE, NULL); 3313 break; 3314 case TGSI_OPCODE_TEX: 3315 case TGSI_OPCODE_TXB: 3316 case TGSI_OPCODE_TXL: 3317 case TGSI_OPCODE_TXP: 3318 case TGSI_OPCODE_LODQ: 3319 // R S L C Dx Dy 3320 handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00); 3321 break; 3322 case TGSI_OPCODE_TXD: 3323 handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20); 3324 break; 3325 case TGSI_OPCODE_TG4: 3326 handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00); 3327 break; 3328 case TGSI_OPCODE_TEX2: 3329 handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00); 3330 break; 3331 case TGSI_OPCODE_TXB2: 3332 case TGSI_OPCODE_TXL2: 3333 handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00); 3334 break; 3335 case TGSI_OPCODE_SAMPLE: 3336 case TGSI_OPCODE_SAMPLE_B: 3337 case TGSI_OPCODE_SAMPLE_D: 3338 case TGSI_OPCODE_SAMPLE_L: 3339 case TGSI_OPCODE_SAMPLE_C: 3340 case TGSI_OPCODE_SAMPLE_C_LZ: 3341 handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40); 3342 break; 3343 case TGSI_OPCODE_TXF: 3344 handleTXF(dst0, 1, 0x03); 3345 break; 3346 case TGSI_OPCODE_SAMPLE_I: 3347 handleTXF(dst0, 1, 0x03); 3348 break; 3349 case TGSI_OPCODE_SAMPLE_I_MS: 3350 handleTXF(dst0, 1, 0x20); 3351 break; 3352 case TGSI_OPCODE_TXQ: 3353 case TGSI_OPCODE_SVIEWINFO: 3354 handleTXQ(dst0, TXQ_DIMS, 1); 3355 break; 3356 case TGSI_OPCODE_TXQS: 3357 // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to 3358 // be in .x 3359 dst0[1] = dst0[2] = dst0[3] = NULL; 3360 std::swap(dst0[0], dst0[2]); 3361 handleTXQ(dst0, TXQ_TYPE, 0); 3362 std::swap(dst0[0], dst0[2]); 3363 break; 3364 case TGSI_OPCODE_FBFETCH: 3365 handleFBFETCH(dst0); 3366 break; 3367 case TGSI_OPCODE_F2I: 3368 case TGSI_OPCODE_F2U: 3369 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3370 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z; 3371 break; 3372 case TGSI_OPCODE_I2F: 3373 case TGSI_OPCODE_U2F: 3374 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3375 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); 3376 break; 3377 case TGSI_OPCODE_PK2H: 3378 val0 = getScratch(); 3379 val1 = getScratch(); 3380 mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0)); 3381 mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1)); 3382 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 3383 mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0); 3384 break; 3385 case TGSI_OPCODE_UP2H: 3386 src0 = fetchSrc(0, 0); 3387 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3388 geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0); 3389 geni->subOp = c & 1; 3390 } 3391 break; 3392 case TGSI_OPCODE_EMIT: 3393 /* export the saved viewport index */ 3394 if (viewport != NULL) { 3395 Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32, 3396 info->out[info->io.viewportId].slot[0] * 4); 3397 mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport); 3398 } 3399 /* fallthrough */ 3400 case TGSI_OPCODE_ENDPRIM: 3401 { 3402 // get vertex stream (must be immediate) 3403 unsigned int stream = tgsi.getSrc(0).getValueU32(0, info); 3404 if (stream && op == OP_RESTART) 3405 break; 3406 if (info->prop.gp.maxVertices == 0) 3407 break; 3408 src0 = mkImm(stream); 3409 mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1; 3410 break; 3411 } 3412 case TGSI_OPCODE_IF: 3413 case TGSI_OPCODE_UIF: 3414 { 3415 BasicBlock *ifBB = new BasicBlock(func); 3416 3417 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); 3418 condBBs.push(bb); 3419 joinBBs.push(bb); 3420 3421 mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy); 3422 3423 setPosition(ifBB, true); 3424 } 3425 break; 3426 case TGSI_OPCODE_ELSE: 3427 { 3428 BasicBlock *elseBB = new BasicBlock(func); 3429 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); 3430 3431 forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); 3432 condBBs.push(bb); 3433 3434 forkBB->getExit()->asFlow()->target.bb = elseBB; 3435 if (!bb->isTerminated()) 3436 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL); 3437 3438 setPosition(elseBB, true); 3439 } 3440 break; 3441 case TGSI_OPCODE_ENDIF: 3442 { 3443 BasicBlock *convBB = new BasicBlock(func); 3444 BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); 3445 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p); 3446 3447 if (!bb->isTerminated()) { 3448 // we only want join if none of the clauses ended with CONT/BREAK/RET 3449 if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6) 3450 insertConvergenceOps(convBB, forkBB); 3451 mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL); 3452 bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); 3453 } 3454 3455 if (prevBB->getExit()->op == OP_BRA) { 3456 prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); 3457 prevBB->getExit()->asFlow()->target.bb = convBB; 3458 } 3459 setPosition(convBB, true); 3460 } 3461 break; 3462 case TGSI_OPCODE_BGNLOOP: 3463 { 3464 BasicBlock *lbgnBB = new BasicBlock(func); 3465 BasicBlock *lbrkBB = new BasicBlock(func); 3466 3467 loopBBs.push(lbgnBB); 3468 breakBBs.push(lbrkBB); 3469 if (loopBBs.getSize() > func->loopNestingBound) 3470 func->loopNestingBound++; 3471 3472 mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL); 3473 3474 bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE); 3475 setPosition(lbgnBB, true); 3476 mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL); 3477 } 3478 break; 3479 case TGSI_OPCODE_ENDLOOP: 3480 { 3481 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p); 3482 3483 if (!bb->isTerminated()) { 3484 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); 3485 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); 3486 } 3487 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true); 3488 3489 // If the loop never breaks (e.g. only has RET's inside), then there 3490 // will be no way to get to the break bb. However BGNLOOP will have 3491 // already made a PREBREAK to it, so it must be in the CFG. 3492 if (getBB()->cfg.incidentCount() == 0) 3493 loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE); 3494 } 3495 break; 3496 case TGSI_OPCODE_BRK: 3497 { 3498 if (bb->isTerminated()) 3499 break; 3500 BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p); 3501 mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL); 3502 bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS); 3503 } 3504 break; 3505 case TGSI_OPCODE_CONT: 3506 { 3507 if (bb->isTerminated()) 3508 break; 3509 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p); 3510 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL); 3511 contBB->explicitCont = true; 3512 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); 3513 } 3514 break; 3515 case TGSI_OPCODE_BGNSUB: 3516 { 3517 Subroutine *s = getSubroutine(ip); 3518 BasicBlock *entry = new BasicBlock(s->f); 3519 BasicBlock *leave = new BasicBlock(s->f); 3520 3521 // multiple entrypoints possible, keep the graph connected 3522 if (prog->getType() == Program::TYPE_COMPUTE) 3523 prog->main->call.attach(&s->f->call, Graph::Edge::TREE); 3524 3525 sub.cur = s; 3526 s->f->setEntry(entry); 3527 s->f->setExit(leave); 3528 setPosition(entry, true); 3529 return true; 3530 } 3531 case TGSI_OPCODE_ENDSUB: 3532 { 3533 sub.cur = getSubroutine(prog->main); 3534 setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true); 3535 return true; 3536 } 3537 case TGSI_OPCODE_CAL: 3538 { 3539 Subroutine *s = getSubroutine(tgsi.getLabel()); 3540 mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL); 3541 func->call.attach(&s->f->call, Graph::Edge::TREE); 3542 return true; 3543 } 3544 case TGSI_OPCODE_RET: 3545 { 3546 if (bb->isTerminated()) 3547 return true; 3548 BasicBlock *leave = BasicBlock::get(func->cfgExit); 3549 3550 if (!isEndOfSubroutine(ip + 1)) { 3551 // insert a PRERET at the entry if this is an early return 3552 // (only needed for sharing code in the epilogue) 3553 BasicBlock *root = BasicBlock::get(func->cfg.getRoot()); 3554 if (root->getEntry() == NULL || root->getEntry()->op != OP_PRERET) { 3555 BasicBlock *pos = getBB(); 3556 setPosition(root, false); 3557 mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1; 3558 setPosition(pos, true); 3559 } 3560 } 3561 mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1; 3562 bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS); 3563 } 3564 break; 3565 case TGSI_OPCODE_END: 3566 { 3567 // attach and generate epilogue code 3568 BasicBlock *epilogue = BasicBlock::get(func->cfgExit); 3569 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE); 3570 setPosition(epilogue, true); 3571 if (prog->getType() == Program::TYPE_FRAGMENT) 3572 exportOutputs(); 3573 if (info->io.genUserClip > 0) 3574 handleUserClipPlanes(); 3575 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; 3576 } 3577 break; 3578 case TGSI_OPCODE_SWITCH: 3579 case TGSI_OPCODE_CASE: 3580 ERROR("switch/case opcode encountered, should have been lowered\n"); 3581 abort(); 3582 break; 3583 case TGSI_OPCODE_LOAD: 3584 handleLOAD(dst0); 3585 break; 3586 case TGSI_OPCODE_STORE: 3587 handleSTORE(); 3588 break; 3589 case TGSI_OPCODE_BARRIER: 3590 geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); 3591 geni->fixed = 1; 3592 geni->subOp = NV50_IR_SUBOP_BAR_SYNC; 3593 break; 3594 case TGSI_OPCODE_MFENCE: 3595 case TGSI_OPCODE_LFENCE: 3596 case TGSI_OPCODE_SFENCE: 3597 geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); 3598 geni->fixed = 1; 3599 geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); 3600 break; 3601 case TGSI_OPCODE_MEMBAR: 3602 { 3603 uint32_t level = tgsi.getSrc(0).getValueU32(0, info); 3604 geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); 3605 geni->fixed = 1; 3606 if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED))) 3607 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA); 3608 else 3609 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL); 3610 } 3611 break; 3612 case TGSI_OPCODE_ATOMUADD: 3613 case TGSI_OPCODE_ATOMXCHG: 3614 case TGSI_OPCODE_ATOMCAS: 3615 case TGSI_OPCODE_ATOMAND: 3616 case TGSI_OPCODE_ATOMOR: 3617 case TGSI_OPCODE_ATOMXOR: 3618 case TGSI_OPCODE_ATOMUMIN: 3619 case TGSI_OPCODE_ATOMIMIN: 3620 case TGSI_OPCODE_ATOMUMAX: 3621 case TGSI_OPCODE_ATOMIMAX: 3622 handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); 3623 break; 3624 case TGSI_OPCODE_RESQ: 3625 if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { 3626 geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0], 3627 makeSym(tgsi.getSrc(0).getFile(), 3628 tgsi.getSrc(0).getIndex(0), -1, 0, 0)); 3629 if (tgsi.getSrc(0).isIndirect(0)) 3630 geni->setIndirect(0, 1, 3631 fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); 3632 } else { 3633 assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE); 3634 3635 TexInstruction *texi = new_TexInstruction(func, OP_SUQ); 3636 for (int c = 0, d = 0; c < 4; ++c) { 3637 if (dst0[c]) { 3638 texi->setDef(d++, dst0[c]); 3639 texi->tex.mask |= 1 << c; 3640 } 3641 } 3642 texi->tex.r = tgsi.getSrc(0).getIndex(0); 3643 texi->tex.target = getImageTarget(code, texi->tex.r); 3644 bb->insertTail(texi); 3645 3646 if (tgsi.getSrc(0).isIndirect(0)) 3647 texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); 3648 } 3649 break; 3650 case TGSI_OPCODE_IBFE: 3651 case TGSI_OPCODE_UBFE: 3652 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3653 src0 = fetchSrc(0, c); 3654 val0 = getScratch(); 3655 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE && 3656 tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) { 3657 loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) | 3658 tgsi.getSrc(1).getValueU32(c, info)); 3659 } else { 3660 src1 = fetchSrc(1, c); 3661 src2 = fetchSrc(2, c); 3662 mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1); 3663 } 3664 mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0); 3665 } 3666 break; 3667 case TGSI_OPCODE_BFI: 3668 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3669 src0 = fetchSrc(0, c); 3670 src1 = fetchSrc(1, c); 3671 src2 = fetchSrc(2, c); 3672 src3 = fetchSrc(3, c); 3673 val0 = getScratch(); 3674 mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2); 3675 mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0); 3676 } 3677 break; 3678 case TGSI_OPCODE_LSB: 3679 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3680 src0 = fetchSrc(0, c); 3681 val0 = getScratch(); 3682 geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000)); 3683 geni->subOp = NV50_IR_SUBOP_EXTBF_REV; 3684 geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0); 3685 geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; 3686 } 3687 break; 3688 case TGSI_OPCODE_IMSB: 3689 case TGSI_OPCODE_UMSB: 3690 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3691 src0 = fetchSrc(0, c); 3692 mkOp1(OP_BFIND, srcTy, dst0[c], src0); 3693 } 3694 break; 3695 case TGSI_OPCODE_BREV: 3696 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3697 src0 = fetchSrc(0, c); 3698 geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000)); 3699 geni->subOp = NV50_IR_SUBOP_EXTBF_REV; 3700 } 3701 break; 3702 case TGSI_OPCODE_POPC: 3703 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3704 src0 = fetchSrc(0, c); 3705 mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0); 3706 } 3707 break; 3708 case TGSI_OPCODE_INTERP_CENTROID: 3709 case TGSI_OPCODE_INTERP_SAMPLE: 3710 case TGSI_OPCODE_INTERP_OFFSET: 3711 handleINTERP(dst0); 3712 break; 3713 case TGSI_OPCODE_D2I: 3714 case TGSI_OPCODE_D2U: 3715 case TGSI_OPCODE_D2F: { 3716 int pos = 0; 3717 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3718 Value *dreg = getSSA(8); 3719 src0 = fetchSrc(0, pos); 3720 src1 = fetchSrc(0, pos + 1); 3721 mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1); 3722 Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg); 3723 if (!isFloatType(dstTy)) 3724 cvt->rnd = ROUND_Z; 3725 pos += 2; 3726 } 3727 break; 3728 } 3729 case TGSI_OPCODE_I2D: 3730 case TGSI_OPCODE_U2D: 3731 case TGSI_OPCODE_F2D: 3732 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3733 Value *dreg = getSSA(8); 3734 mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2)); 3735 mkSplit(&dst0[c], 4, dreg); 3736 c++; 3737 } 3738 break; 3739 case TGSI_OPCODE_DABS: 3740 case TGSI_OPCODE_DNEG: 3741 case TGSI_OPCODE_DRCP: 3742 case TGSI_OPCODE_DSQRT: 3743 case TGSI_OPCODE_DRSQ: 3744 case TGSI_OPCODE_DTRUNC: 3745 case TGSI_OPCODE_DCEIL: 3746 case TGSI_OPCODE_DFLR: 3747 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3748 src0 = getSSA(8); 3749 Value *dst = getSSA(8), *tmp[2]; 3750 tmp[0] = fetchSrc(0, c); 3751 tmp[1] = fetchSrc(0, c + 1); 3752 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); 3753 mkOp1(op, dstTy, dst, src0); 3754 mkSplit(&dst0[c], 4, dst); 3755 c++; 3756 } 3757 break; 3758 case TGSI_OPCODE_DFRAC: 3759 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3760 src0 = getSSA(8); 3761 Value *dst = getSSA(8), *tmp[2]; 3762 tmp[0] = fetchSrc(0, c); 3763 tmp[1] = fetchSrc(0, c + 1); 3764 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); 3765 mkOp1(OP_FLOOR, TYPE_F64, dst, src0); 3766 mkOp2(OP_SUB, TYPE_F64, dst, src0, dst); 3767 mkSplit(&dst0[c], 4, dst); 3768 c++; 3769 } 3770 break; 3771 case TGSI_OPCODE_DSLT: 3772 case TGSI_OPCODE_DSGE: 3773 case TGSI_OPCODE_DSEQ: 3774 case TGSI_OPCODE_DSNE: { 3775 int pos = 0; 3776 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3777 Value *tmp[2]; 3778 3779 src0 = getSSA(8); 3780 src1 = getSSA(8); 3781 tmp[0] = fetchSrc(0, pos); 3782 tmp[1] = fetchSrc(0, pos + 1); 3783 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); 3784 tmp[0] = fetchSrc(1, pos); 3785 tmp[1] = fetchSrc(1, pos + 1); 3786 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); 3787 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1); 3788 pos += 2; 3789 } 3790 break; 3791 } 3792 case TGSI_OPCODE_DADD: 3793 case TGSI_OPCODE_DMUL: 3794 case TGSI_OPCODE_DDIV: 3795 case TGSI_OPCODE_DMAX: 3796 case TGSI_OPCODE_DMIN: 3797 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3798 src0 = getSSA(8); 3799 src1 = getSSA(8); 3800 Value *dst = getSSA(8), *tmp[2]; 3801 tmp[0] = fetchSrc(0, c); 3802 tmp[1] = fetchSrc(0, c + 1); 3803 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); 3804 tmp[0] = fetchSrc(1, c); 3805 tmp[1] = fetchSrc(1, c + 1); 3806 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); 3807 mkOp2(op, dstTy, dst, src0, src1); 3808 mkSplit(&dst0[c], 4, dst); 3809 c++; 3810 } 3811 break; 3812 case TGSI_OPCODE_DMAD: 3813 case TGSI_OPCODE_DFMA: 3814 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3815 src0 = getSSA(8); 3816 src1 = getSSA(8); 3817 src2 = getSSA(8); 3818 Value *dst = getSSA(8), *tmp[2]; 3819 tmp[0] = fetchSrc(0, c); 3820 tmp[1] = fetchSrc(0, c + 1); 3821 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); 3822 tmp[0] = fetchSrc(1, c); 3823 tmp[1] = fetchSrc(1, c + 1); 3824 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); 3825 tmp[0] = fetchSrc(2, c); 3826 tmp[1] = fetchSrc(2, c + 1); 3827 mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]); 3828 mkOp3(op, dstTy, dst, src0, src1, src2); 3829 mkSplit(&dst0[c], 4, dst); 3830 c++; 3831 } 3832 break; 3833 case TGSI_OPCODE_DROUND: 3834 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3835 src0 = getSSA(8); 3836 Value *dst = getSSA(8), *tmp[2]; 3837 tmp[0] = fetchSrc(0, c); 3838 tmp[1] = fetchSrc(0, c + 1); 3839 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); 3840 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0) 3841 ->rnd = ROUND_NI; 3842 mkSplit(&dst0[c], 4, dst); 3843 c++; 3844 } 3845 break; 3846 case TGSI_OPCODE_DSSG: 3847 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 3848 src0 = getSSA(8); 3849 Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2]; 3850 tmp[0] = fetchSrc(0, c); 3851 tmp[1] = fetchSrc(0, c + 1); 3852 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); 3853 3854 val0 = getScratch(); 3855 val1 = getScratch(); 3856 // The zero is wrong here since it's only 32-bit, but it works out in 3857 // the end since it gets replaced with $r63. 3858 mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero); 3859 mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero); 3860 mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1); 3861 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32); 3862 mkSplit(&dst0[c], 4, dst); 3863 c++; 3864 } 3865 break; 3866 default: 3867 ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); 3868 assert(0); 3869 break; 3870 } 3871 3872 if (tgsi.dstCount()) { 3873 for (c = 0; c < 4; ++c) { 3874 if (!dst0[c]) 3875 continue; 3876 if (dst0[c] != rDst0[c]) 3877 mkMov(rDst0[c], dst0[c]); 3878 storeDst(0, c, rDst0[c]); 3879 } 3880 } 3881 vtxBaseValid = 0; 3882 3883 return true; 3884} 3885 3886void 3887Converter::handleUserClipPlanes() 3888{ 3889 Value *res[8]; 3890 int n, i, c; 3891 3892 for (c = 0; c < 4; ++c) { 3893 for (i = 0; i < info->io.genUserClip; ++i) { 3894 Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot, 3895 TYPE_F32, info->io.ucpBase + i * 16 + c * 4); 3896 Value *ucp = mkLoadv(TYPE_F32, sym, NULL); 3897 if (c == 0) 3898 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); 3899 else 3900 mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); 3901 } 3902 } 3903 3904 const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; 3905 3906 for (i = 0; i < info->io.genUserClip; ++i) { 3907 n = i / 4 + first; 3908 c = i % 4; 3909 Symbol *sym = 3910 mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); 3911 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); 3912 } 3913} 3914 3915void 3916Converter::exportOutputs() 3917{ 3918 if (info->io.alphaRefBase) { 3919 for (unsigned int i = 0; i < info->numOutputs; ++i) { 3920 if (info->out[i].sn != TGSI_SEMANTIC_COLOR || 3921 info->out[i].si != 0) 3922 continue; 3923 const unsigned int c = 3; 3924 if (!oData.exists(sub.cur->values, i, c)) 3925 continue; 3926 Value *val = oData.load(sub.cur->values, i, c, NULL); 3927 if (!val) 3928 continue; 3929 3930 Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot, 3931 TYPE_U32, info->io.alphaRefBase); 3932 Value *pred = new_LValue(func, FILE_PREDICATE); 3933 mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val, 3934 mkLoadv(TYPE_U32, ref, NULL)) 3935 ->subOp = 1; 3936 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred); 3937 } 3938 } 3939 3940 for (unsigned int i = 0; i < info->numOutputs; ++i) { 3941 for (unsigned int c = 0; c < 4; ++c) { 3942 if (!oData.exists(sub.cur->values, i, c)) 3943 continue; 3944 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, 3945 info->out[i].slot[c] * 4); 3946 Value *val = oData.load(sub.cur->values, i, c, NULL); 3947 if (val) { 3948 if (info->out[i].sn == TGSI_SEMANTIC_POSITION) 3949 mkOp1(OP_SAT, TYPE_F32, val, val); 3950 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); 3951 } 3952 } 3953 } 3954} 3955 3956Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), 3957 code(code), 3958 tgsi(NULL), 3959 tData(this), lData(this), aData(this), pData(this), oData(this) 3960{ 3961 info = code->info; 3962 3963 const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY); 3964 const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE); 3965 const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS); 3966 const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT); 3967 3968 tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0); 3969 lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0); 3970 pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); 3971 aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0); 3972 oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); 3973 3974 zero = mkImm((uint32_t)0); 3975 3976 vtxBaseValid = 0; 3977} 3978 3979Converter::~Converter() 3980{ 3981} 3982 3983inline const Converter::Location * 3984Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v) 3985{ 3986 ValueMap::l_iterator it = s->values.l.find(v); 3987 return it == s->values.l.end() ? NULL : &it->second; 3988} 3989 3990template<typename T> inline void 3991Converter::BindArgumentsPass::updateCallArgs( 3992 Instruction *i, void (Instruction::*setArg)(int, Value *), 3993 T (Function::*proto)) 3994{ 3995 Function *g = i->asFlow()->target.fn; 3996 Subroutine *subg = conv.getSubroutine(g); 3997 3998 for (unsigned a = 0; a < (g->*proto).size(); ++a) { 3999 Value *v = (g->*proto)[a].get(); 4000 const Converter::Location &l = *getValueLocation(subg, v); 4001 Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx); 4002 4003 (i->*setArg)(a, array->acquire(sub->values, l.i, l.c)); 4004 } 4005} 4006 4007template<typename T> inline void 4008Converter::BindArgumentsPass::updatePrototype( 4009 BitSet *set, void (Function::*updateSet)(), T (Function::*proto)) 4010{ 4011 (func->*updateSet)(); 4012 4013 for (unsigned i = 0; i < set->getSize(); ++i) { 4014 Value *v = func->getLValue(i); 4015 const Converter::Location *l = getValueLocation(sub, v); 4016 4017 // only include values with a matching TGSI register 4018 if (set->test(i) && l && !conv.code->locals.count(*l)) 4019 (func->*proto).push_back(v); 4020 } 4021} 4022 4023bool 4024Converter::BindArgumentsPass::visit(Function *f) 4025{ 4026 sub = conv.getSubroutine(f); 4027 4028 for (ArrayList::Iterator bi = f->allBBlocks.iterator(); 4029 !bi.end(); bi.next()) { 4030 for (Instruction *i = BasicBlock::get(bi)->getFirst(); 4031 i; i = i->next) { 4032 if (i->op == OP_CALL && !i->asFlow()->builtin) { 4033 updateCallArgs(i, &Instruction::setSrc, &Function::ins); 4034 updateCallArgs(i, &Instruction::setDef, &Function::outs); 4035 } 4036 } 4037 } 4038 4039 if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE) 4040 return true; 4041 updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet, 4042 &Function::buildLiveSets, &Function::ins); 4043 updatePrototype(&BasicBlock::get(f->cfgExit)->defSet, 4044 &Function::buildDefSets, &Function::outs); 4045 4046 return true; 4047} 4048 4049bool 4050Converter::run() 4051{ 4052 BasicBlock *entry = new BasicBlock(prog->main); 4053 BasicBlock *leave = new BasicBlock(prog->main); 4054 4055 prog->main->setEntry(entry); 4056 prog->main->setExit(leave); 4057 4058 setPosition(entry, true); 4059 sub.cur = getSubroutine(prog->main); 4060 4061 if (info->io.genUserClip > 0) { 4062 for (int c = 0; c < 4; ++c) 4063 clipVtx[c] = getScratch(); 4064 } 4065 4066 switch (prog->getType()) { 4067 case Program::TYPE_TESSELLATION_CONTROL: 4068 outBase = mkOp2v( 4069 OP_SUB, TYPE_U32, getSSA(), 4070 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)), 4071 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0))); 4072 break; 4073 case Program::TYPE_FRAGMENT: { 4074 Symbol *sv = mkSysVal(SV_POSITION, 3); 4075 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); 4076 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); 4077 break; 4078 } 4079 default: 4080 break; 4081 } 4082 4083 if (info->io.viewportId >= 0) 4084 viewport = getScratch(); 4085 else 4086 viewport = NULL; 4087 4088 for (ip = 0; ip < code->scan.num_instructions; ++ip) { 4089 if (!handleInstruction(&code->insns[ip])) 4090 return false; 4091 } 4092 4093 if (!BindArgumentsPass(*this).run(prog)) 4094 return false; 4095 4096 return true; 4097} 4098 4099} // unnamed namespace 4100 4101namespace nv50_ir { 4102 4103bool 4104Program::makeFromTGSI(struct nv50_ir_prog_info *info) 4105{ 4106 tgsi::Source src(info); 4107 if (!src.scanSource()) 4108 return false; 4109 tlsSize = info->bin.tlsSpace; 4110 4111 Converter builder(this, &src); 4112 return builder.run(); 4113} 4114 4115} // namespace nv50_ir 4116