sm4_to_tgsi.cpp revision db6f1d0436b66435bac5e2b6db5d2f4e07e80473
1/************************************************************************** 2 * 3 * Copyright 2010 Luca Barbieri 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 **************************************************************************/ 26 27#include "d3d1xstutil.h" 28#include "sm4.h" 29#include "tgsi/tgsi_ureg.h" 30#include <vector> 31 32#if 1 33#define check(x) assert(x) 34#define fail(x) assert(0 && (x)) 35#else 36#define check(x) do {if(!(x)) throw(#x);} while(0) 37#define fail(x) throw(x) 38#endif 39 40struct tgsi_interpolation 41{ 42 unsigned interpolation; 43 bool centroid; 44}; 45 46static tgsi_interpolation sm4_to_pipe_interpolation[] = 47{ 48 {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */ 49 {TGSI_INTERPOLATE_CONSTANT, false}, 50 {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */ 51 {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */ 52 {TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */ 53 {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */ 54 55 // Added in D3D10.1 56 {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */ 57 {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */ 58}; 59 60static int sm4_to_pipe_sv[] = 61{ 62 -1, 63 TGSI_SEMANTIC_POSITION, 64 -1, /*TGSI_SEMANTIC_CLIP_DISTANCE */ 65 -1, /*TGSI_SEMANTIC_CULL_DISTANCE */ 66 -1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */ 67 -1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */ 68 -1, /*TGSI_SEMANTIC_VERTEXID,*/ 69 TGSI_SEMANTIC_PRIMID, 70 TGSI_SEMANTIC_INSTANCEID, 71 TGSI_SEMANTIC_FACE, 72 -1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/ 73}; 74 75struct sm4_to_tgsi_converter 76{ 77 struct ureg_program* ureg; 78 std::vector<struct ureg_dst> temps; 79 std::vector<struct ureg_dst> outputs; 80 std::vector<struct ureg_src> inputs; 81 std::vector<struct ureg_src> samplers; 82 std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison 83 std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison 84 std::vector<std::pair<unsigned, unsigned> > loops; 85 sm4_insn* insn; 86 struct sm4_program& program; 87 std::vector<unsigned> sm4_to_tgsi_insn_num; 88 std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num; 89 bool in_sub; 90 bool avoid_txf; 91 bool avoid_int; 92 93 sm4_to_tgsi_converter(struct sm4_program& program) 94 : program(program) 95 { 96 avoid_txf = true; 97 avoid_int = false; 98 } 99 100 struct ureg_dst _reg(sm4_op& op) 101 { 102 switch(op.file) 103 { 104 case SM4_FILE_NULL: 105 { 106 struct ureg_dst d; 107 memset(&d, 0, sizeof(d)); 108 d.File = TGSI_FILE_NULL; 109 return d; 110 } 111 case SM4_FILE_TEMP: 112 check(op.has_simple_index()); 113 check(op.indices[0].disp < temps.size()); 114 return temps[op.indices[0].disp]; 115 case SM4_FILE_OUTPUT: 116 check(op.has_simple_index()); 117 check(op.indices[0].disp < outputs.size()); 118 return outputs[op.indices[0].disp]; 119 default: 120 check(0); 121 return ureg_dst_undef(); 122 } 123 } 124 125 struct ureg_dst _dst(unsigned i = 0) 126 { 127 check(i < insn->num_ops); 128 sm4_op& op = *insn->ops[i]; 129 check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR); 130 struct ureg_dst d = ureg_writemask(_reg(op), op.mask); 131 if(insn->insn.sat) 132 d = ureg_saturate(d); 133 return d; 134 } 135 136 struct ureg_src _src(unsigned i) 137 { 138 check(i < insn->num_ops); 139 sm4_op& op = *insn->ops[i]; 140 struct ureg_src s; 141 switch(op.file) 142 { 143 case SM4_FILE_IMMEDIATE32: 144 s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32); 145 break; 146 case SM4_FILE_INPUT: 147 check(op.is_index_simple(0)); 148 check(op.num_indices == 1 || op.num_indices == 2); 149 // TODO: is this correct, or are incorrectly swapping the two indices in the GS case? 150 check(op.indices[op.num_indices - 1].disp < inputs.size()); 151 s = inputs[op.indices[op.num_indices - 1].disp]; 152 if(op.num_indices == 2) 153 { 154 s.Dimension = 1; 155 s.DimensionIndex = op.indices[0].disp; 156 } 157 break; 158 case SM4_FILE_CONSTANT_BUFFER: 159 // TODO: indirect addressing 160 check(op.num_indices == 2); 161 check(op.is_index_simple(0)); 162 check(op.is_index_simple(1)); 163 s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp); 164 s.Dimension = 1; 165 s.DimensionIndex = op.indices[0].disp; 166 break; 167 default: 168 s = ureg_src(_reg(op)); 169 break; 170 } 171 if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR) 172 s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]); 173 else 174 { 175 /* immediates are masked to show needed values */ 176 check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64); 177 } 178 if(op.abs) 179 s = ureg_abs(s); 180 if(op.neg) 181 s = ureg_negate(s); 182 return s; 183 }; 184 185 int _idx(sm4_file file, unsigned i = 0) 186 { 187 check(i < insn->num_ops); 188 sm4_op& op = *insn->ops[i]; 189 check(op.file == file); 190 check(op.has_simple_index()); 191 return (int)op.indices[0].disp; 192 } 193 194 int _texslot(bool have_sampler = true) 195 { 196 std::map<std::pair<int, int>, int>::iterator i; 197 i = program.resource_sampler_to_slot.find(std::make_pair(_idx(SM4_FILE_RESOURCE, 2), have_sampler ? _idx(SM4_FILE_SAMPLER, 3) : -1)); 198 check(i != program.resource_sampler_to_slot.end()); 199 return i->second; 200 } 201 202 unsigned tex_target(unsigned texslot) 203 { 204 unsigned mode = sampler_modes[program.slot_to_sampler[texslot]]; 205 unsigned target; 206 if(mode) 207 target = targets[program.slot_to_resource[texslot]].second; 208 else 209 target = targets[program.slot_to_resource[texslot]].first; 210 check(target); 211 return target; 212 } 213 214 std::vector<struct ureg_dst> insn_tmps; 215 216 struct ureg_dst _tmp() 217 { 218 struct ureg_dst t = ureg_DECL_temporary(ureg); 219 insn_tmps.push_back(t); 220 return t; 221 } 222 223 struct ureg_dst _tmp(struct ureg_dst d) 224 { 225 if(d.File == TGSI_FILE_TEMPORARY) 226 return d; 227 else 228 return ureg_writemask(_tmp(), d.WriteMask); 229 } 230 231#define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break 232#define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break 233#define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break 234#define OP1(n) OP1_(n, n) 235#define OP2(n) OP2_(n, n) 236#define OP3(n) OP3_(n, n) 237#define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break; 238 239 void translate_insns(unsigned begin, unsigned end) 240 { 241 for(unsigned insn_num = begin; insn_num < end; ++insn_num) 242 { 243 sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg); 244 unsigned label; 245 insn = program.insns[insn_num]; 246 bool ok; 247 ok = true; 248 switch(insn->opcode) 249 { 250 // trivial instructions 251 case SM4_OPCODE_NOP: 252 break; 253 OP1(MOV); 254 255 // float 256 OP2(ADD); 257 OP2(MUL); 258 OP3(MAD); 259 OP2(DIV); 260 OP1(FRC); 261 OP1(RCP); 262 OP2(MIN); 263 OP2(MAX); 264 OP2_(LT, SLT); 265 OP2_(GE, SGE); 266 OP2_(EQ, SEQ); 267 OP2_(NE, SNE); 268 269 // bitwise 270 OP1(NOT); 271 OP2(AND); 272 OP2(OR); 273 OP2(XOR); 274 275 // special mathematical 276 OP2(DP2); 277 OP2(DP3); 278 OP2(DP4); 279 OP1(RSQ); 280 OP1_(LOG, LG2); 281 OP1_(EXP, EX2); 282 283 // rounding 284 OP1_(ROUND_NE, ROUND); 285 OP1_(ROUND_Z, TRUNC); 286 OP1_(ROUND_PI, CEIL); 287 OP1_(ROUND_NI, FLR); 288 289 // cross-thread 290 OP1_(DERIV_RTX, DDX); 291 OP1_(DERIV_RTX_COARSE, DDX); 292 OP1_(DERIV_RTX_FINE, DDX); 293 OP1_(DERIV_RTY, DDY); 294 OP1_(DERIV_RTY_COARSE, DDY); 295 OP1_(DERIV_RTY_FINE, DDY); 296 case SM4_OPCODE_EMIT: 297 ureg_EMIT(ureg); 298 break; 299 case SM4_OPCODE_CUT: 300 ureg_ENDPRIM(ureg); 301 break; 302 case SM4_OPCODE_EMITTHENCUT: 303 ureg_EMIT(ureg); 304 ureg_ENDPRIM(ureg); 305 break; 306 307 // non-trivial instructions 308 case SM4_OPCODE_MOVC: 309 /* CMP checks for < 0, but MOVC checks for != 0 310 * but fortunately, x != 0 is equivalent to -abs(x) < 0 311 * XXX: can test_nz apply to this?! 312 */ 313 ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3)); 314 break; 315 case SM4_OPCODE_SQRT: 316 { 317 struct ureg_dst d = _dst(); 318 struct ureg_dst t = _tmp(d); 319 ureg_RSQ(ureg, t, _src(1)); 320 ureg_RCP(ureg, d, ureg_src(t)); 321 break; 322 } 323 case SM4_OPCODE_SINCOS: 324 { 325 struct ureg_dst s = _dst(0); 326 struct ureg_dst c = _dst(1); 327 struct ureg_src v = _src(2); 328 if(s.File != TGSI_FILE_NULL) 329 ureg_SIN(ureg, s, v); 330 if(c.File != TGSI_FILE_NULL) 331 ureg_COS(ureg, c, v); 332 break; 333 } 334 335 // control flow 336 case SM4_OPCODE_DISCARD: 337 ureg_KIL(ureg, _src(0)); 338 break; 339 OP_CF(LOOP, BGNLOOP); 340 OP_CF(ENDLOOP, ENDLOOP); 341 case SM4_OPCODE_BREAK: 342 ureg_BRK(ureg); 343 break; 344 case SM4_OPCODE_BREAKC: 345 // XXX: can test_nz apply to this?! 346 ureg_BREAKC(ureg, _src(0)); 347 break; 348 case SM4_OPCODE_CONTINUE: 349 ureg_CONT(ureg); 350 break; 351 case SM4_OPCODE_CONTINUEC: 352 // XXX: can test_nz apply to this?! 353 ureg_IF(ureg, _src(0), &label); 354 ureg_CONT(ureg); 355 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 356 ureg_ENDIF(ureg); 357 break; 358 case SM4_OPCODE_SWITCH: 359 ureg_SWITCH(ureg, _src(0)); 360 break; 361 case SM4_OPCODE_CASE: 362 ureg_CASE(ureg, _src(0)); 363 break; 364 case SM4_OPCODE_DEFAULT: 365 ureg_DEFAULT(ureg); 366 break; 367 case SM4_OPCODE_ENDSWITCH: 368 ureg_ENDSWITCH(ureg); 369 break; 370 case SM4_OPCODE_CALL: 371 ureg_CAL(ureg, &label); 372 label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)])); 373 break; 374 case SM4_OPCODE_LABEL: 375 if(in_sub) 376 ureg_ENDSUB(ureg); 377 else 378 ureg_END(ureg); 379 ureg_BGNSUB(ureg); 380 in_sub = true; 381 break; 382 case SM4_OPCODE_RET: 383 if(in_sub || insn_num != (program.insns.size() - 1)) 384 ureg_RET(ureg); 385 break; 386 case SM4_OPCODE_RETC: 387 ureg_IF(ureg, _src(0), &label); 388 if(insn->insn.test_nz) 389 ureg_RET(ureg); 390 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 391 if(!insn->insn.test_nz) 392 { 393 ureg_ELSE(ureg, &label); 394 ureg_RET(ureg); 395 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 396 } 397 ureg_ENDIF(ureg); 398 break; 399 OP_CF(ELSE, ELSE); 400 case SM4_OPCODE_ENDIF: 401 ureg_ENDIF(ureg); 402 break; 403 case SM4_OPCODE_IF: 404 if(insn->insn.test_nz) 405 { 406 ureg_IF(ureg, _src(0), &label); 407 label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); 408 } 409 else 410 { 411 unsigned linked = program.cf_insn_linked[insn_num]; 412 if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF) 413 { 414 ureg_IF(ureg, _src(0), &label); 415 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 416 ureg_ELSE(ureg, &label); 417 label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); 418 } 419 else 420 { 421 /* we have to swap the branches in this case (fun!) 422 * TODO: maybe just emit a SEQ 0? 423 * */ 424 unsigned endif = program.cf_insn_linked[linked]; 425 426 ureg_IF(ureg, _src(0), &label); 427 label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); 428 429 translate_insns(linked + 1, endif); 430 431 sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg); 432 ureg_ELSE(ureg, &label); 433 label_to_sm4_insn_num.push_back(std::make_pair(label, endif)); 434 435 translate_insns(insn_num + 1, linked); 436 437 insn_num = endif - 1; 438 goto next; 439 } 440 } 441 break; 442 case SM4_OPCODE_RESINFO: 443 { 444 std::map<int, int>::iterator i; 445 i = program.resource_to_slot.find(_idx(SM4_FILE_RESOURCE, 2)); 446 check(i != program.resource_to_slot.end()); 447 unsigned texslot = i->second; 448 449 // no driver actually provides this, unfortunately 450 ureg_TXQ(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); 451 break; 452 }; 453 // TODO: sample offset, sample index 454 case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg (ouch) 455 case SM4_OPCODE_LD_MS: 456 { 457 unsigned texslot = _texslot(false); 458 unsigned dim = 0; 459 switch(targets[texslot].first) 460 { 461 case TGSI_TEXTURE_1D: 462 dim = 1; 463 break; 464 case TGSI_TEXTURE_2D: 465 case TGSI_TEXTURE_RECT: 466 dim = 2; 467 break; 468 case TGSI_TEXTURE_3D: 469 dim = 3; 470 break; 471 default: 472 check(0); 473 } 474 struct ureg_dst tmp = _tmp(); 475 if(avoid_txf) 476 { 477 struct ureg_src texcoord; 478 if(!avoid_int) 479 { 480 ureg_I2F(ureg, tmp, _src(1)); 481 texcoord = ureg_src(tmp); 482 } 483 else 484 texcoord = _src(1); 485 486 ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_swizzle(texcoord, 0, 1, 2, dim), samplers[texslot]); 487 } 488 else 489 ureg_TXF(ureg, _dst(), tex_target(texslot), ureg_swizzle(_src(1), 0, 1, 2, dim), samplers[texslot]); 490 break; 491 } 492 case SM4_OPCODE_SAMPLE: // dst, coord, res, samp 493 { 494 unsigned texslot = _texslot(); 495 ureg_TEX(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); 496 break; 497 } 498 case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x 499 { 500 unsigned texslot = _texslot(); 501 struct ureg_dst tmp = _tmp(); 502 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); 503 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); 504 ureg_TXB(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); 505 break; 506 } 507 case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x 508 { 509 unsigned texslot = _texslot(); 510 struct ureg_dst tmp = _tmp(); 511 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); 512 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); 513 ureg_TEX(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); 514 break; 515 } 516 case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x 517 { 518 unsigned texslot = _texslot(); 519 struct ureg_dst tmp = _tmp(); 520 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); 521 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); 522 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 0.0)); 523 ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); 524 break; 525 } 526 case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy 527 { 528 unsigned texslot = _texslot(); 529 ureg_TXD(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot], _src(4), _src(5)); 530 break; 531 } 532 case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x 533 { 534 unsigned texslot = _texslot(); 535 struct ureg_dst tmp = _tmp(); 536 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); 537 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); 538 ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); 539 break; 540 } 541 default: 542 ok = false; 543 break; 544 } 545 546 if(!ok && !avoid_int) 547 { 548 ok = true; 549 switch(insn->opcode) 550 { 551 // integer 552 OP1_(ITOF, I2F); 553 OP1_(FTOI, F2I); 554 OP2_(IADD, UADD); 555 OP1(INEG); 556 OP2_(IMUL, UMUL); 557 OP3_(IMAD, UMAD); 558 OP2_(ISHL, SHL); 559 OP2_(ISHR, ISHR); 560 OP2(IMIN); 561 OP2(IMAX); 562 OP2_(ILT, ISLT); 563 OP2_(IGE, ISGE); 564 OP2_(IEQ, USEQ); 565 OP2_(INE, USNE); 566 567 // unsigned 568 OP1_(UTOF, U2F); 569 OP1_(FTOU, F2U); 570 OP2(UMUL); 571 OP3(UMAD); 572 OP2(UMIN); 573 OP2(UMAX); 574 OP2_(ULT, USLT); 575 OP2_(UGE, USGE); 576 OP2(USHR); 577 578 case SM4_OPCODE_UDIV: 579 { 580 struct ureg_dst q = _dst(0); 581 struct ureg_dst r = _dst(1); 582 struct ureg_src a = _src(2); 583 struct ureg_src b = _src(3); 584 if(q.File != TGSI_FILE_NULL) 585 ureg_UDIV(ureg, q, a, b); 586 if(r.File != TGSI_FILE_NULL) 587 ureg_UMOD(ureg, r, a, b); 588 break; 589 } 590 default: 591 ok = false; 592 } 593 } 594 595 if(!ok && avoid_int) 596 { 597 ok = true; 598 switch(insn->opcode) 599 { 600 case SM4_OPCODE_ITOF: 601 case SM4_OPCODE_UTOF: 602 break; 603 OP1_(FTOI, TRUNC); 604 OP1_(FTOU, FLR); 605 // integer 606 OP2_(IADD, ADD); 607 OP2_(IMUL, MUL); 608 OP3_(IMAD, MAD); 609 OP2_(MIN, MIN); 610 OP2_(MAX, MAX); 611 OP2_(ILT, SLT); 612 OP2_(IGE, SGE); 613 OP2_(IEQ, SEQ); 614 OP2_(INE, SNE); 615 616 // unsigned 617 OP2_(UMUL, MUL); 618 OP3_(UMAD, MAD); 619 OP2_(UMIN, MIN); 620 OP2_(UMAX, MAX); 621 OP2_(ULT, SLT); 622 OP2_(UGE, SGE); 623 624 case SM4_OPCODE_INEG: 625 ureg_MOV(ureg, _dst(), ureg_negate(_src(1))); 626 break; 627 case SM4_OPCODE_ISHL: 628 { 629 struct ureg_dst d = _dst(); 630 struct ureg_dst t = _tmp(d); 631 ureg_EX2(ureg, t, _src(2)); 632 ureg_MUL(ureg, d, ureg_src(t), _src(1)); 633 break; 634 } 635 case SM4_OPCODE_ISHR: 636 case SM4_OPCODE_USHR: 637 { 638 struct ureg_dst d = _dst(); 639 struct ureg_dst t = _tmp(d); 640 ureg_EX2(ureg, t, ureg_negate(_src(2))); 641 ureg_MUL(ureg, t, ureg_src(t), _src(1)); 642 ureg_FLR(ureg, d, ureg_src(t)); 643 break; 644 } 645 case SM4_OPCODE_UDIV: 646 { 647 struct ureg_dst q = _dst(0); 648 struct ureg_dst r = _dst(1); 649 struct ureg_src a = _src(2); 650 struct ureg_src b = _src(3); 651 struct ureg_dst f = _tmp(); 652 ureg_DIV(ureg, f, a, b); 653 if(q.File != TGSI_FILE_NULL) 654 ureg_FLR(ureg, q, ureg_src(f)); 655 if(r.File != TGSI_FILE_NULL) 656 { 657 ureg_FRC(ureg, f, ureg_src(f)); 658 ureg_MUL(ureg, r, ureg_src(f), b); 659 } 660 break; 661 } 662 default: 663 ok = false; 664 } 665 } 666 667 check(ok); 668 669 if(!insn_tmps.empty()) 670 { 671 for(unsigned i = 0; i < insn_tmps.size(); ++i) 672 ureg_release_temporary(ureg, insn_tmps[i]); 673 insn_tmps.clear(); 674 } 675next:; 676 } 677 } 678 679 void* do_translate() 680 { 681 unsigned processor; 682 switch(program.version.type) 683 { 684 case 0: 685 processor = TGSI_PROCESSOR_FRAGMENT; 686 break; 687 case 1: 688 processor = TGSI_PROCESSOR_VERTEX; 689 break; 690 case 2: 691 processor = TGSI_PROCESSOR_GEOMETRY; 692 break; 693 default: 694 fail("Tessellation and compute shaders not yet supported"); 695 return 0; 696 } 697 698 if(!sm4_link_cf_insns(program)) 699 fail("Malformed control flow"); 700 if(!sm4_find_labels(program)) 701 fail("Failed to locate labels"); 702 if(!sm4_allocate_resource_sampler_pairs(program)) 703 fail("Unsupported (indirect?) accesses to resources and/or samplers"); 704 705 ureg = ureg_create(processor); 706 707 in_sub = false; 708 709 for(unsigned i = 0; i < program.slot_to_resource.size(); ++i) 710 samplers.push_back(ureg_DECL_sampler(ureg, i)); 711 712 sm4_to_tgsi_insn_num.resize(program.insns.size()); 713 for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num) 714 { 715 sm4_dcl& dcl = *program.dcls[insn_num]; 716 int idx = -1; 717 if(dcl.op.get() && dcl.op->is_index_simple(0)) 718 idx = dcl.op->indices[0].disp; 719 switch(dcl.opcode) 720 { 721 case SM4_OPCODE_DCL_GLOBAL_FLAGS: 722 break; 723 case SM4_OPCODE_DCL_TEMPS: 724 for(unsigned i = 0; i < dcl.num; ++i) 725 temps.push_back(ureg_DECL_temporary(ureg)); 726 break; 727 case SM4_OPCODE_DCL_INPUT: 728 check(idx >= 0); 729 if(processor == TGSI_PROCESSOR_VERTEX) 730 { 731 if(inputs.size() <= (unsigned)idx) 732 inputs.resize(idx + 1); 733 inputs[idx] = ureg_DECL_vs_input(ureg, idx); 734 } 735 else if(processor == TGSI_PROCESSOR_GEOMETRY) 736 { 737 // TODO: is this correct? 738 unsigned gsidx = dcl.op->indices[1].disp; 739 if(inputs.size() <= (unsigned)gsidx) 740 inputs.resize(gsidx + 1); 741 inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx); 742 } 743 else 744 check(0); 745 break; 746 case SM4_OPCODE_DCL_INPUT_PS: 747 check(idx >= 0); 748 if(inputs.size() <= (unsigned)idx) 749 inputs.resize(idx + 1); 750 inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid); 751 break; 752 case SM4_OPCODE_DCL_OUTPUT: 753 check(idx >= 0); 754 if(outputs.size() <= (unsigned)idx) 755 outputs.resize(idx + 1); 756 if(processor == TGSI_PROCESSOR_FRAGMENT) 757 outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx); 758 else 759 outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx); 760 break; 761 case SM4_OPCODE_DCL_INPUT_SIV: 762 case SM4_OPCODE_DCL_INPUT_SGV: 763 case SM4_OPCODE_DCL_INPUT_PS_SIV: 764 case SM4_OPCODE_DCL_INPUT_PS_SGV: 765 check(idx >= 0); 766 if(inputs.size() <= (unsigned)idx) 767 inputs.resize(idx + 1); 768 // TODO: is this correct? 769 inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0); 770 break; 771 case SM4_OPCODE_DCL_OUTPUT_SIV: 772 case SM4_OPCODE_DCL_OUTPUT_SGV: 773 check(idx >= 0); 774 if(outputs.size() <= (unsigned)idx) 775 outputs.resize(idx + 1); 776 check(sm4_to_pipe_sv[dcl.sv] >= 0); 777 outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0); 778 break; 779 case SM4_OPCODE_DCL_RESOURCE: 780 check(idx >= 0); 781 if(targets.size() <= (unsigned)idx) 782 targets.resize(idx + 1); 783 switch(dcl.dcl_resource.target) 784 { 785 case SM4_TARGET_TEXTURE1D: 786 targets[idx].first = TGSI_TEXTURE_1D; 787 targets[idx].second = TGSI_TEXTURE_SHADOW1D; 788 break; 789 case SM4_TARGET_TEXTURE2D: 790 targets[idx].first = TGSI_TEXTURE_2D; 791 targets[idx].second = TGSI_TEXTURE_SHADOW2D; 792 break; 793 case SM4_TARGET_TEXTURE3D: 794 targets[idx].first = TGSI_TEXTURE_3D; 795 targets[idx].second = 0; 796 break; 797 case SM4_TARGET_TEXTURECUBE: 798 targets[idx].first = TGSI_TEXTURE_CUBE; 799 targets[idx].second = 0; 800 break; 801 default: 802 // HACK to make SimpleSample10 work 803 //check(0); 804 targets[idx].first = TGSI_TEXTURE_2D; 805 targets[idx].second = TGSI_TEXTURE_SHADOW2D; 806 break; 807 } 808 break; 809 case SM4_OPCODE_DCL_SAMPLER: 810 check(idx >= 0); 811 if(sampler_modes.size() <= (unsigned)idx) 812 sampler_modes.resize(idx + 1); 813 check(!dcl.dcl_sampler.mono); 814 sampler_modes[idx] = dcl.dcl_sampler.shadow; 815 break; 816 case SM4_OPCODE_DCL_CONSTANT_BUFFER: 817 check(dcl.op->num_indices == 2); 818 check(dcl.op->is_index_simple(0)); 819 check(dcl.op->is_index_simple(1)); 820 idx = dcl.op->indices[0].disp; 821 ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx); 822 break; 823 case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE: 824 ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]); 825 break; 826 case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: 827 ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]); 828 break; 829 case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: 830 ureg_property_gs_max_vertices(ureg, dcl.num); 831 break; 832 default: 833 check(0); 834 } 835 } 836 837 translate_insns(0, program.insns.size()); 838 sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg)); 839 if(in_sub) 840 ureg_ENDSUB(ureg); 841 else 842 ureg_END(ureg); 843 844 for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i) 845 ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]); 846 847 const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0); 848 ureg_destroy(ureg); 849 return (void*)tokens; 850 } 851 852 void* translate() 853 { 854 try 855 { 856 return do_translate(); 857 } 858 catch(const char*) 859 { 860 return 0; 861 } 862 } 863}; 864 865void* sm4_to_tgsi(struct sm4_program& program) 866{ 867 sm4_to_tgsi_converter conv(program); 868 return conv.translate(); 869} 870