sm4_to_tgsi.cpp revision 6c598c78bd17642d731cf57b8369cc794f64ba2f
1/************************************************************************** 2 * 3 * Copyright 2010 Luca Barbieri 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 **************************************************************************/ 26 27#include "sm4.h" 28#include "tgsi/tgsi_ureg.h" 29#include <vector> 30 31#if 1 32#define check(x) assert(x) 33#define fail(x) assert(0 && (x)) 34#else 35#define check(x) do {if(!(x)) throw(#x);} while(0) 36#define fail(x) throw(x) 37#endif 38 39static unsigned sm4_to_pipe_interpolation[] = 40{ 41 TGSI_INTERPOLATE_PERSPECTIVE, /* UNDEFINED */ 42 TGSI_INTERPOLATE_CONSTANT, 43 TGSI_INTERPOLATE_PERSPECTIVE, /* LINEAR */ 44 TGSI_INTERPOLATE_PERSPECTIVE, /* LINEAR_CENTROID */ 45 TGSI_INTERPOLATE_LINEAR, /* LINEAR_NOPERSPECTIVE */ 46 TGSI_INTERPOLATE_LINEAR, /* LINEAR_NOPERSPECTIVE_CENTROID */ 47 48 // Added in D3D10.1 49 TGSI_INTERPOLATE_PERSPECTIVE, /* LINEAR_SAMPLE */ 50 TGSI_INTERPOLATE_LINEAR, /* LINEAR_NOPERSPECTIVE_SAMPLE */ 51}; 52 53static int sm4_to_pipe_sv[] = 54{ 55 -1, 56 TGSI_SEMANTIC_POSITION, 57 -1, /*TGSI_SEMANTIC_CLIP_DISTANCE */ 58 -1, /*TGSI_SEMANTIC_CULL_DISTANCE */ 59 -1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */ 60 -1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */ 61 -1, /*TGSI_SEMANTIC_VERTEXID,*/ 62 TGSI_SEMANTIC_PRIMID, 63 TGSI_SEMANTIC_INSTANCEID, 64 TGSI_SEMANTIC_FACE, 65 -1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/ 66}; 67 68struct sm4_to_tgsi_converter 69{ 70 struct ureg_program* ureg; 71 std::vector<struct ureg_dst> temps; 72 std::vector<struct ureg_dst> outputs; 73 std::vector<struct ureg_src> inputs; 74 std::vector<struct ureg_src> samplers; 75 std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison 76 std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison 77 std::vector<std::pair<unsigned, unsigned> > loops; 78 sm4_insn* insn; 79 struct sm4_program& program; 80 std::vector<unsigned> sm4_to_tgsi_insn_num; 81 std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num; 82 bool in_sub; 83 bool avoid_txf; 84 bool avoid_int; 85 86 sm4_to_tgsi_converter(struct sm4_program& program) 87 : program(program) 88 { 89 avoid_txf = true; 90 avoid_int = false; 91 } 92 93 struct ureg_dst _reg(sm4_op& op) 94 { 95 switch(op.file) 96 { 97 case SM4_FILE_NULL: 98 { 99 struct ureg_dst d; 100 memset(&d, 0, sizeof(d)); 101 d.File = TGSI_FILE_NULL; 102 return d; 103 } 104 case SM4_FILE_TEMP: 105 check(op.has_simple_index()); 106 check(op.indices[0].disp < temps.size()); 107 return temps[op.indices[0].disp]; 108 case SM4_FILE_OUTPUT: 109 check(op.has_simple_index()); 110 check(op.indices[0].disp < outputs.size()); 111 return outputs[op.indices[0].disp]; 112 default: 113 check(0); 114 return ureg_dst_undef(); 115 } 116 } 117 118 struct ureg_dst _dst(unsigned i = 0) 119 { 120 check(i < insn->num_ops); 121 sm4_op& op = *insn->ops[i]; 122 check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR); 123 struct ureg_dst d = ureg_writemask(_reg(op), op.mask); 124 if(insn->insn.sat) 125 d = ureg_saturate(d); 126 return d; 127 } 128 129 struct ureg_src _src(unsigned i) 130 { 131 check(i < insn->num_ops); 132 sm4_op& op = *insn->ops[i]; 133 struct ureg_src s; 134 switch(op.file) 135 { 136 case SM4_FILE_IMMEDIATE32: 137 s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32); 138 break; 139 case SM4_FILE_INPUT: 140 check(op.has_simple_index()); 141 check(op.indices[0].disp < inputs.size()); 142 s = inputs[op.indices[0].disp]; 143 break; 144 case SM4_FILE_CONSTANT_BUFFER: 145 // TODO: indirect addressing 146 check(op.num_indices == 2); 147 check(op.is_index_simple(0)); 148 check(op.is_index_simple(1)); 149 s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp); 150 s.Dimension = 1; 151 s.DimensionIndex = op.indices[0].disp; 152 break; 153 default: 154 s = ureg_src(_reg(op)); 155 break; 156 } 157 if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR) 158 s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]); 159 else 160 { 161 /* immediates are masked to show needed values */ 162 check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64); 163 } 164 if(op.abs) 165 s = ureg_abs(s); 166 if(op.neg) 167 s = ureg_negate(s); 168 return s; 169 }; 170 171 int _idx(sm4_file file, unsigned i = 0) 172 { 173 check(i < insn->num_ops); 174 sm4_op& op = *insn->ops[i]; 175 check(op.file == file); 176 check(op.has_simple_index()); 177 return (int)op.indices[0].disp; 178 } 179 180 int _texslot(bool have_sampler = true) 181 { 182 std::map<std::pair<int, int>, int>::iterator i; 183 i = program.resource_sampler_to_slot.find(std::make_pair(_idx(SM4_FILE_RESOURCE, 2), have_sampler ? _idx(SM4_FILE_SAMPLER, 3) : -1)); 184 check(i != program.resource_sampler_to_slot.end()); 185 return i->second; 186 } 187 188 unsigned tex_target(unsigned texslot) 189 { 190 unsigned mode = sampler_modes[program.slot_to_sampler[texslot]]; 191 unsigned target; 192 if(mode) 193 target = targets[program.slot_to_resource[texslot]].second; 194 else 195 target = targets[program.slot_to_resource[texslot]].first; 196 check(target); 197 return target; 198 } 199 200 std::vector<struct ureg_dst> insn_tmps; 201 202 struct ureg_dst _tmp() 203 { 204 struct ureg_dst t = ureg_DECL_temporary(ureg); 205 insn_tmps.push_back(t); 206 return t; 207 } 208 209 struct ureg_dst _tmp(struct ureg_dst d) 210 { 211 if(d.File == TGSI_FILE_TEMPORARY) 212 return d; 213 else 214 return ureg_writemask(_tmp(), d.WriteMask); 215 } 216 217#define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break 218#define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break 219#define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break 220#define OP1(n) OP1_(n, n) 221#define OP2(n) OP2_(n, n) 222#define OP3(n) OP3_(n, n) 223#define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break; 224 225 void translate_insns(unsigned begin, unsigned end) 226 { 227 for(unsigned insn_num = begin; insn_num < end; ++insn_num) 228 { 229 sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg); 230 unsigned label; 231 insn = program.insns[insn_num]; 232 bool ok; 233 ok = true; 234 switch(insn->opcode) 235 { 236 // trivial instructions 237 case SM4_OPCODE_NOP: 238 break; 239 OP1(MOV); 240 241 // float 242 OP2(ADD); 243 OP2(MUL); 244 OP3(MAD); 245 OP2(DIV); 246 OP1(FRC); 247 OP1(RCP); 248 OP2(MIN); 249 OP2(MAX); 250 OP2_(LT, SLT); 251 OP2_(GE, SGE); 252 OP2_(EQ, SEQ); 253 OP2_(NE, SNE); 254 255 // bitwise 256 OP1(NOT); 257 OP2(AND); 258 OP2(OR); 259 OP2(XOR); 260 261 // special mathematical 262 OP2(DP2); 263 OP2(DP3); 264 OP2(DP4); 265 OP1(RSQ); 266 OP1_(LOG, LG2); 267 OP1_(EXP, EX2); 268 269 // rounding 270 OP1_(ROUND_NE, ROUND); 271 OP1_(ROUND_Z, TRUNC); 272 OP1_(ROUND_PI, CEIL); 273 OP1_(ROUND_NI, FLR); 274 275 // cross-thread 276 OP1_(DERIV_RTX, DDX); 277 OP1_(DERIV_RTX_COARSE, DDX); 278 OP1_(DERIV_RTX_FINE, DDX); 279 OP1_(DERIV_RTY, DDY); 280 OP1_(DERIV_RTY_COARSE, DDY); 281 OP1_(DERIV_RTY_FINE, DDY); 282 case SM4_OPCODE_EMIT: 283 ureg_EMIT(ureg); 284 break; 285 case SM4_OPCODE_CUT: 286 ureg_ENDPRIM(ureg); 287 break; 288 case SM4_OPCODE_EMITTHENCUT: 289 ureg_EMIT(ureg); 290 ureg_ENDPRIM(ureg); 291 break; 292 293 // non-trivial instructions 294 case SM4_OPCODE_MOVC: 295 /* CMP checks for < 0, but MOVC checks for != 0 296 * but fortunately, x != 0 is equivalent to -abs(x) < 0 297 * XXX: can test_nz apply to this?! 298 */ 299 ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3)); 300 break; 301 case SM4_OPCODE_SQRT: 302 { 303 struct ureg_dst d = _dst(); 304 struct ureg_dst t = _tmp(d); 305 ureg_RSQ(ureg, t, _src(1)); 306 ureg_RCP(ureg, d, ureg_src(t)); 307 break; 308 } 309 case SM4_OPCODE_SINCOS: 310 { 311 struct ureg_dst s = _dst(0); 312 struct ureg_dst c = _dst(1); 313 struct ureg_src v = _src(2); 314 if(s.File != TGSI_FILE_NULL) 315 ureg_SIN(ureg, s, v); 316 if(c.File != TGSI_FILE_NULL) 317 ureg_COS(ureg, c, v); 318 break; 319 } 320 321 // control flow 322 case SM4_OPCODE_DISCARD: 323 ureg_KIL(ureg, _src(0)); 324 break; 325 OP_CF(LOOP, BGNLOOP); 326 OP_CF(ENDLOOP, ENDLOOP); 327 case SM4_OPCODE_BREAK: 328 ureg_BRK(ureg); 329 break; 330 case SM4_OPCODE_BREAKC: 331 // XXX: can test_nz apply to this?! 332 ureg_BREAKC(ureg, _src(0)); 333 break; 334 case SM4_OPCODE_CONTINUE: 335 ureg_CONT(ureg); 336 break; 337 case SM4_OPCODE_CONTINUEC: 338 // XXX: can test_nz apply to this?! 339 ureg_IF(ureg, _src(0), &label); 340 ureg_CONT(ureg); 341 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 342 ureg_ENDIF(ureg); 343 break; 344 case SM4_OPCODE_SWITCH: 345 ureg_SWITCH(ureg, _src(0)); 346 break; 347 case SM4_OPCODE_CASE: 348 ureg_CASE(ureg, _src(0)); 349 break; 350 case SM4_OPCODE_DEFAULT: 351 ureg_DEFAULT(ureg); 352 break; 353 case SM4_OPCODE_ENDSWITCH: 354 ureg_ENDSWITCH(ureg); 355 break; 356 case SM4_OPCODE_CALL: 357 ureg_CAL(ureg, &label); 358 label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)])); 359 break; 360 case SM4_OPCODE_LABEL: 361 if(in_sub) 362 ureg_ENDSUB(ureg); 363 else 364 ureg_END(ureg); 365 ureg_BGNSUB(ureg); 366 in_sub = true; 367 break; 368 case SM4_OPCODE_RET: 369 if(in_sub || insn_num != (program.insns.size() - 1)) 370 ureg_RET(ureg); 371 break; 372 case SM4_OPCODE_RETC: 373 ureg_IF(ureg, _src(0), &label); 374 if(insn->insn.test_nz) 375 ureg_RET(ureg); 376 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 377 if(!insn->insn.test_nz) 378 { 379 ureg_ELSE(ureg, &label); 380 ureg_RET(ureg); 381 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 382 } 383 ureg_ENDIF(ureg); 384 break; 385 OP_CF(ELSE, ELSE); 386 case SM4_OPCODE_ENDIF: 387 ureg_ENDIF(ureg); 388 break; 389 case SM4_OPCODE_IF: 390 if(insn->insn.test_nz) 391 { 392 ureg_IF(ureg, _src(0), &label); 393 label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); 394 } 395 else 396 { 397 unsigned linked = program.cf_insn_linked[insn_num]; 398 if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF) 399 { 400 ureg_IF(ureg, _src(0), &label); 401 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 402 ureg_ELSE(ureg, &label); 403 label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); 404 } 405 else 406 { 407 /* we have to swap the branches in this case (fun!) 408 * TODO: maybe just emit a SEQ 0? 409 * */ 410 unsigned endif = program.cf_insn_linked[linked]; 411 412 ureg_IF(ureg, _src(0), &label); 413 label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); 414 415 translate_insns(linked + 1, endif); 416 417 sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg); 418 ureg_ELSE(ureg, &label); 419 label_to_sm4_insn_num.push_back(std::make_pair(label, endif)); 420 421 translate_insns(insn_num + 1, linked); 422 423 insn_num = endif - 1; 424 goto next; 425 } 426 } 427 break; 428 case SM4_OPCODE_RESINFO: 429 { 430 std::map<int, int>::iterator i; 431 i = program.resource_to_slot.find(_idx(SM4_FILE_RESOURCE, 2)); 432 check(i != program.resource_to_slot.end()); 433 unsigned texslot = i->second; 434 435 // no driver actually provides this, unfortunately 436 ureg_TXQ(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); 437 break; 438 }; 439 // TODO: sample offset, sample index 440 case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg (ouch) 441 case SM4_OPCODE_LD_MS: 442 { 443 unsigned texslot = _texslot(false); 444 unsigned dim = 0; 445 switch(targets[texslot].first) 446 { 447 case TGSI_TEXTURE_1D: 448 dim = 1; 449 break; 450 case TGSI_TEXTURE_2D: 451 case TGSI_TEXTURE_RECT: 452 dim = 2; 453 break; 454 case TGSI_TEXTURE_3D: 455 dim = 3; 456 break; 457 default: 458 check(0); 459 } 460 struct ureg_dst tmp = _tmp(); 461 if(avoid_txf) 462 { 463 struct ureg_src texcoord; 464 if(!avoid_int) 465 { 466 ureg_I2F(ureg, tmp, _src(1)); 467 texcoord = ureg_src(tmp); 468 } 469 else 470 texcoord = _src(1); 471 472 ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_swizzle(texcoord, 0, 1, 2, dim), samplers[texslot]); 473 } 474 else 475 ureg_TXF(ureg, _dst(), tex_target(texslot), ureg_swizzle(_src(1), 0, 1, 2, dim), samplers[texslot]); 476 break; 477 } 478 case SM4_OPCODE_SAMPLE: // dst, coord, res, samp 479 { 480 unsigned texslot = _texslot(); 481 ureg_TEX(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); 482 break; 483 } 484 case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x 485 { 486 unsigned texslot = _texslot(); 487 struct ureg_dst tmp = _tmp(); 488 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); 489 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); 490 ureg_TXB(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); 491 break; 492 } 493 case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x 494 { 495 unsigned texslot = _texslot(); 496 struct ureg_dst tmp = _tmp(); 497 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); 498 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); 499 ureg_TEX(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); 500 break; 501 } 502 case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x 503 { 504 unsigned texslot = _texslot(); 505 struct ureg_dst tmp = _tmp(); 506 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); 507 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); 508 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 0.0)); 509 ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); 510 break; 511 } 512 case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy 513 { 514 unsigned texslot = _texslot(); 515 ureg_TXD(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot], _src(4), _src(5)); 516 break; 517 } 518 case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x 519 { 520 unsigned texslot = _texslot(); 521 struct ureg_dst tmp = _tmp(); 522 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); 523 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); 524 ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); 525 break; 526 } 527 default: 528 ok = false; 529 break; 530 } 531 532 if(!ok && !avoid_int) 533 { 534 ok = true; 535 switch(insn->opcode) 536 { 537 // integer 538 OP1_(ITOF, I2F); 539 OP1_(FTOI, F2I); 540 OP2_(IADD, UADD); 541 OP1(INEG); 542 OP2_(IMUL, UMUL); 543 OP3_(IMAD, UMAD); 544 OP2_(ISHL, SHL); 545 OP2_(ISHR, ISHR); 546 OP2(IMIN); 547 OP2(IMAX); 548 OP2_(ILT, ISLT); 549 OP2_(IGE, ISGE); 550 OP2_(IEQ, USEQ); 551 OP2_(INE, USNE); 552 553 // unsigned 554 OP1_(UTOF, U2F); 555 OP1_(FTOU, F2U); 556 OP2(UMUL); 557 OP3(UMAD); 558 OP2(UMIN); 559 OP2(UMAX); 560 OP2_(ULT, USLT); 561 OP2_(UGE, USGE); 562 OP2(USHR); 563 564 case SM4_OPCODE_UDIV: 565 { 566 struct ureg_dst q = _dst(0); 567 struct ureg_dst r = _dst(1); 568 struct ureg_src a = _src(2); 569 struct ureg_src b = _src(3); 570 if(q.File != TGSI_FILE_NULL) 571 ureg_UDIV(ureg, q, a, b); 572 if(r.File != TGSI_FILE_NULL) 573 ureg_UMOD(ureg, r, a, b); 574 break; 575 } 576 default: 577 ok = false; 578 } 579 } 580 581 if(!ok && avoid_int) 582 { 583 ok = true; 584 switch(insn->opcode) 585 { 586 case SM4_OPCODE_ITOF: 587 case SM4_OPCODE_UTOF: 588 break; 589 OP1_(FTOI, TRUNC); 590 OP1_(FTOU, FLR); 591 // integer 592 OP2_(IADD, ADD); 593 OP2_(IMUL, MUL); 594 OP3_(IMAD, MAD); 595 OP2_(MIN, MIN); 596 OP2_(MAX, MAX); 597 OP2_(ILT, SLT); 598 OP2_(IGE, SGE); 599 OP2_(IEQ, SEQ); 600 OP2_(INE, SNE); 601 602 // unsigned 603 OP2_(UMUL, MUL); 604 OP3_(UMAD, MAD); 605 OP2_(UMIN, MIN); 606 OP2_(UMAX, MAX); 607 OP2_(ULT, SLT); 608 OP2_(UGE, SGE); 609 610 case SM4_OPCODE_INEG: 611 ureg_MOV(ureg, _dst(), ureg_negate(_src(1))); 612 break; 613 case SM4_OPCODE_ISHL: 614 { 615 struct ureg_dst d = _dst(); 616 struct ureg_dst t = _tmp(d); 617 ureg_EX2(ureg, t, _src(2)); 618 ureg_MUL(ureg, d, ureg_src(t), _src(1)); 619 break; 620 } 621 case SM4_OPCODE_ISHR: 622 case SM4_OPCODE_USHR: 623 { 624 struct ureg_dst d = _dst(); 625 struct ureg_dst t = _tmp(d); 626 ureg_EX2(ureg, t, ureg_negate(_src(2))); 627 ureg_MUL(ureg, t, ureg_src(t), _src(1)); 628 ureg_FLR(ureg, d, ureg_src(t)); 629 break; 630 } 631 case SM4_OPCODE_UDIV: 632 { 633 struct ureg_dst q = _dst(0); 634 struct ureg_dst r = _dst(1); 635 struct ureg_src a = _src(2); 636 struct ureg_src b = _src(3); 637 struct ureg_dst f = _tmp(); 638 ureg_DIV(ureg, f, a, b); 639 if(q.File != TGSI_FILE_NULL) 640 ureg_FLR(ureg, q, ureg_src(f)); 641 if(r.File != TGSI_FILE_NULL) 642 { 643 ureg_FRC(ureg, f, ureg_src(f)); 644 ureg_MUL(ureg, r, ureg_src(f), b); 645 } 646 break; 647 } 648 default: 649 ok = false; 650 } 651 } 652 653 check(ok); 654 655 if(!insn_tmps.empty()) 656 { 657 for(unsigned i = 0; i < insn_tmps.size(); ++i) 658 ureg_release_temporary(ureg, insn_tmps[i]); 659 insn_tmps.clear(); 660 } 661next:; 662 } 663 } 664 665 void* do_translate() 666 { 667 unsigned processor; 668 switch(program.version.type) 669 { 670 case 0: 671 processor = TGSI_PROCESSOR_FRAGMENT; 672 break; 673 case 1: 674 processor = TGSI_PROCESSOR_VERTEX; 675 break; 676 case 2: 677 processor = TGSI_PROCESSOR_GEOMETRY; 678 break; 679 default: 680 fail("Tessellation and compute shaders not yet supported"); 681 return 0; 682 } 683 684 if(!sm4_link_cf_insns(program)) 685 fail("Malformed control flow"); 686 if(!sm4_find_labels(program)) 687 fail("Failed to locate labels"); 688 if(!sm4_allocate_resource_sampler_pairs(program)) 689 fail("Unsupported (indirect?) accesses to resources and/or samplers"); 690 691 ureg = ureg_create(processor); 692 693 in_sub = false; 694 695 for(unsigned i = 0; i < program.slot_to_resource.size(); ++i) 696 samplers.push_back(ureg_DECL_sampler(ureg, i)); 697 698 sm4_to_tgsi_insn_num.resize(program.insns.size()); 699 for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num) 700 { 701 sm4_dcl& dcl = *program.dcls[insn_num]; 702 int idx = -1; 703 if(dcl.op.get() && dcl.op->has_simple_index()) 704 idx = dcl.op->indices[0].disp; 705 switch(dcl.opcode) 706 { 707 case SM4_OPCODE_DCL_GLOBAL_FLAGS: 708 break; 709 case SM4_OPCODE_DCL_TEMPS: 710 for(unsigned i = 0; i < dcl.num; ++i) 711 temps.push_back(ureg_DECL_temporary(ureg)); 712 break; 713 case SM4_OPCODE_DCL_INPUT: 714 check(idx >= 0); 715 if(inputs.size() <= (unsigned)idx) 716 inputs.resize(idx + 1); 717 if(processor == TGSI_PROCESSOR_VERTEX) 718 inputs[idx] = ureg_DECL_vs_input(ureg, idx); 719 else 720 check(0); 721 break; 722 case SM4_OPCODE_DCL_INPUT_PS: 723 check(idx >= 0); 724 if(inputs.size() <= (unsigned)idx) 725 inputs.resize(idx + 1); 726 inputs[idx] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation]); 727 break; 728 case SM4_OPCODE_DCL_OUTPUT: 729 check(idx >= 0); 730 if(outputs.size() <= (unsigned)idx) 731 outputs.resize(idx + 1); 732 if(processor == TGSI_PROCESSOR_FRAGMENT) 733 outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx); 734 else 735 outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx); 736 break; 737 case SM4_OPCODE_DCL_INPUT_SIV: 738 case SM4_OPCODE_DCL_INPUT_SGV: 739 case SM4_OPCODE_DCL_INPUT_PS_SIV: 740 case SM4_OPCODE_DCL_INPUT_PS_SGV: 741 check(idx >= 0); 742 if(inputs.size() <= (unsigned)idx) 743 inputs.resize(idx + 1); 744 // TODO: is this correct? 745 inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0); 746 break; 747 case SM4_OPCODE_DCL_OUTPUT_SIV: 748 case SM4_OPCODE_DCL_OUTPUT_SGV: 749 check(idx >= 0); 750 if(outputs.size() <= (unsigned)idx) 751 outputs.resize(idx + 1); 752 check(sm4_to_pipe_sv[dcl.sv] >= 0); 753 outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0); 754 break; 755 case SM4_OPCODE_DCL_RESOURCE: 756 check(idx >= 0); 757 if(targets.size() <= (unsigned)idx) 758 targets.resize(idx + 1); 759 switch(dcl.dcl_resource.target) 760 { 761 case SM4_TARGET_TEXTURE1D: 762 targets[idx].first = TGSI_TEXTURE_1D; 763 targets[idx].second = TGSI_TEXTURE_SHADOW1D; 764 break; 765 case SM4_TARGET_TEXTURE2D: 766 targets[idx].first = TGSI_TEXTURE_2D; 767 targets[idx].second = TGSI_TEXTURE_SHADOW2D; 768 break; 769 case SM4_TARGET_TEXTURE3D: 770 targets[idx].first = TGSI_TEXTURE_3D; 771 targets[idx].second = 0; 772 break; 773 case SM4_TARGET_TEXTURECUBE: 774 targets[idx].first = TGSI_TEXTURE_CUBE; 775 targets[idx].second = 0; 776 break; 777 default: 778 check(0); 779 } 780 break; 781 case SM4_OPCODE_DCL_SAMPLER: 782 check(idx >= 0); 783 if(sampler_modes.size() <= (unsigned)idx) 784 sampler_modes.resize(idx + 1); 785 check(!dcl.dcl_sampler.mono); 786 sampler_modes[idx] = dcl.dcl_sampler.shadow; 787 break; 788 case SM4_OPCODE_DCL_CONSTANT_BUFFER: 789 check(dcl.op->num_indices == 2); 790 check(dcl.op->is_index_simple(0)); 791 check(dcl.op->is_index_simple(1)); 792 idx = dcl.op->indices[0].disp; 793 ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx); 794 break; 795 default: 796 check(0); 797 } 798 } 799 800 translate_insns(0, program.insns.size()); 801 sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg)); 802 if(in_sub) 803 ureg_ENDSUB(ureg); 804 else 805 ureg_END(ureg); 806 807 for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i) 808 ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]); 809 810 const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0); 811 ureg_destroy(ureg); 812 return (void*)tokens; 813 } 814 815 void* translate() 816 { 817 try 818 { 819 return do_translate(); 820 } 821 catch(const char*) 822 { 823 return 0; 824 } 825 } 826}; 827 828void* sm4_to_tgsi(struct sm4_program& program) 829{ 830 sm4_to_tgsi_converter conv(program); 831 return conv.translate(); 832} 833