st_mesa_to_tgsi.c revision 337d3b2b391c50130fa825ee853a570e1a84d309
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * \author 30 * Michal Krol, 31 * Keith Whitwell 32 */ 33 34#include "pipe/p_compiler.h" 35#include "pipe/p_shader_tokens.h" 36#include "pipe/p_state.h" 37#include "pipe/p_context.h" 38#include "tgsi/tgsi_ureg.h" 39#include "st_mesa_to_tgsi.h" 40#include "st_context.h" 41#include "shader/prog_instruction.h" 42#include "shader/prog_parameter.h" 43#include "util/u_debug.h" 44#include "util/u_math.h" 45#include "util/u_memory.h" 46 47struct label { 48 unsigned branch_target; 49 unsigned token; 50}; 51 52 53/** 54 * Intermediate state used during shader translation. 55 */ 56struct st_translate { 57 struct ureg_program *ureg; 58 59 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 60 struct ureg_src *constants; 61 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 62 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 63 struct ureg_dst address[1]; 64 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 65 66 const GLuint *inputMapping; 67 const GLuint *outputMapping; 68 69 /* For every instruction that contains a label (eg CALL), keep 70 * details so that we can go back afterwards and emit the correct 71 * tgsi instruction number for each label. 72 */ 73 struct label *labels; 74 unsigned labels_size; 75 unsigned labels_count; 76 77 /* Keep a record of the tgsi instruction number that each mesa 78 * instruction starts at, will be used to fix up labels after 79 * translation. 80 */ 81 unsigned *insn; 82 unsigned insn_size; 83 unsigned insn_count; 84 85 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 86 87 boolean error; 88}; 89 90 91static unsigned *get_label( struct st_translate *t, 92 unsigned branch_target ) 93{ 94 unsigned i; 95 96 if (t->labels_count + 1 >= t->labels_size) { 97 unsigned old_size = t->labels_size; 98 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 99 t->labels = REALLOC( t->labels, 100 old_size * sizeof t->labels[0], 101 t->labels_size * sizeof t->labels[0] ); 102 if (t->labels == NULL) { 103 static unsigned dummy; 104 t->error = TRUE; 105 return &dummy; 106 } 107 } 108 109 i = t->labels_count++; 110 t->labels[i].branch_target = branch_target; 111 return &t->labels[i].token; 112} 113 114 115static void set_insn_start( struct st_translate *t, 116 unsigned start ) 117{ 118 if (t->insn_count + 1 >= t->insn_size) { 119 unsigned old_size = t->insn_size; 120 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 121 t->insn = REALLOC( t->insn, 122 old_size * sizeof t->insn[0], 123 t->insn_size * sizeof t->insn[0] ); 124 if (t->insn == NULL) { 125 t->error = TRUE; 126 return; 127 } 128 } 129 130 t->insn[t->insn_count++] = start; 131} 132 133 134/* 135 * Map mesa register file to TGSI register file. 136 */ 137static struct ureg_dst 138dst_register( struct st_translate *t, 139 gl_register_file file, 140 GLuint index ) 141{ 142 switch( file ) { 143 case PROGRAM_UNDEFINED: 144 return ureg_dst_undef(); 145 146 case PROGRAM_TEMPORARY: 147 if (ureg_dst_is_undef(t->temps[index])) 148 t->temps[index] = ureg_DECL_temporary( t->ureg ); 149 150 return t->temps[index]; 151 152 case PROGRAM_OUTPUT: 153 return t->outputs[t->outputMapping[index]]; 154 155 case PROGRAM_ADDRESS: 156 return t->address[index]; 157 158 default: 159 debug_assert( 0 ); 160 return ureg_dst_undef(); 161 } 162} 163 164 165static struct ureg_src 166src_register( struct st_translate *t, 167 gl_register_file file, 168 GLint index ) 169{ 170 switch( file ) { 171 case PROGRAM_UNDEFINED: 172 return ureg_src_undef(); 173 174 case PROGRAM_TEMPORARY: 175 ASSERT(index >= 0); 176 if (ureg_dst_is_undef(t->temps[index])) 177 t->temps[index] = ureg_DECL_temporary( t->ureg ); 178 return ureg_src(t->temps[index]); 179 180 case PROGRAM_NAMED_PARAM: 181 case PROGRAM_ENV_PARAM: 182 case PROGRAM_LOCAL_PARAM: 183 case PROGRAM_UNIFORM: 184 ASSERT(index >= 0); 185 return t->constants[index]; 186 case PROGRAM_STATE_VAR: 187 case PROGRAM_CONSTANT: /* ie, immediate */ 188 if (index < 0) 189 return ureg_DECL_constant( t->ureg, 0 ); 190 else 191 return t->constants[index]; 192 193 case PROGRAM_INPUT: 194 return t->inputs[t->inputMapping[index]]; 195 196 case PROGRAM_OUTPUT: 197 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 198 199 case PROGRAM_ADDRESS: 200 return ureg_src(t->address[index]); 201 202 default: 203 debug_assert( 0 ); 204 return ureg_src_undef(); 205 } 206} 207 208 209/** 210 * Map mesa texture target to TGSI texture target. 211 */ 212static unsigned 213translate_texture_target( GLuint textarget, 214 GLboolean shadow ) 215{ 216 if (shadow) { 217 switch( textarget ) { 218 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D; 219 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D; 220 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT; 221 default: break; 222 } 223 } 224 225 switch( textarget ) { 226 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; 227 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; 228 case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; 229 case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; 230 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; 231 default: 232 debug_assert( 0 ); 233 return TGSI_TEXTURE_1D; 234 } 235} 236 237 238static struct ureg_dst 239translate_dst( struct st_translate *t, 240 const struct prog_dst_register *DstReg, 241 boolean saturate ) 242{ 243 struct ureg_dst dst = dst_register( t, 244 DstReg->File, 245 DstReg->Index ); 246 247 dst = ureg_writemask( dst, 248 DstReg->WriteMask ); 249 250 if (saturate) 251 dst = ureg_saturate( dst ); 252 253 if (DstReg->RelAddr) 254 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); 255 256 return dst; 257} 258 259 260static struct ureg_src 261translate_src( struct st_translate *t, 262 const struct prog_src_register *SrcReg ) 263{ 264 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 265 266 src = ureg_swizzle( src, 267 GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, 268 GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, 269 GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3, 270 GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3); 271 272 if (SrcReg->Negate == NEGATE_XYZW) 273 src = ureg_negate(src); 274 275 if (SrcReg->Abs) 276 src = ureg_abs(src); 277 278 if (SrcReg->RelAddr) { 279 src = ureg_src_indirect( src, ureg_src(t->address[0])); 280 /* If SrcReg->Index was negative, it was set to zero in 281 * src_register(). Reassign it now. 282 */ 283 src.Index = SrcReg->Index; 284 } 285 286 return src; 287} 288 289 290static struct ureg_src swizzle_4v( struct ureg_src src, 291 const unsigned *swz ) 292{ 293 return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] ); 294} 295 296 297/** 298 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: 299 * 300 * SWZ dst, src.x-y10 301 * 302 * becomes: 303 * 304 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} 305 */ 306static void emit_swz( struct st_translate *t, 307 struct ureg_dst dst, 308 const struct prog_src_register *SrcReg ) 309{ 310 struct ureg_program *ureg = t->ureg; 311 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 312 313 unsigned negate_mask = SrcReg->Negate; 314 315 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | 316 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | 317 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | 318 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); 319 320 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | 321 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | 322 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | 323 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); 324 325 unsigned negative_one_mask = one_mask & negate_mask; 326 unsigned positive_one_mask = one_mask & ~negate_mask; 327 328 struct ureg_src imm; 329 unsigned i; 330 unsigned mul_swizzle[4] = {0,0,0,0}; 331 unsigned add_swizzle[4] = {0,0,0,0}; 332 unsigned src_swizzle[4] = {0,0,0,0}; 333 boolean need_add = FALSE; 334 boolean need_mul = FALSE; 335 336 if (dst.WriteMask == 0) 337 return; 338 339 /* Is this just a MOV? 340 */ 341 if (zero_mask == 0 && 342 one_mask == 0 && 343 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) 344 { 345 ureg_MOV( ureg, dst, translate_src( t, SrcReg )); 346 return; 347 } 348 349#define IMM_ZERO 0 350#define IMM_ONE 1 351#define IMM_NEG_ONE 2 352 353 imm = ureg_imm3f( ureg, 0, 1, -1 ); 354 355 for (i = 0; i < 4; i++) { 356 unsigned bit = 1 << i; 357 358 if (dst.WriteMask & bit) { 359 if (positive_one_mask & bit) { 360 mul_swizzle[i] = IMM_ZERO; 361 add_swizzle[i] = IMM_ONE; 362 need_add = TRUE; 363 } 364 else if (negative_one_mask & bit) { 365 mul_swizzle[i] = IMM_ZERO; 366 add_swizzle[i] = IMM_NEG_ONE; 367 need_add = TRUE; 368 } 369 else if (zero_mask & bit) { 370 mul_swizzle[i] = IMM_ZERO; 371 add_swizzle[i] = IMM_ZERO; 372 need_add = TRUE; 373 } 374 else { 375 add_swizzle[i] = IMM_ZERO; 376 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); 377 need_mul = TRUE; 378 if (negate_mask & bit) { 379 mul_swizzle[i] = IMM_NEG_ONE; 380 } 381 else { 382 mul_swizzle[i] = IMM_ONE; 383 } 384 } 385 } 386 } 387 388 if (need_mul && need_add) { 389 ureg_MAD( ureg, 390 dst, 391 swizzle_4v( src, src_swizzle ), 392 swizzle_4v( imm, mul_swizzle ), 393 swizzle_4v( imm, add_swizzle ) ); 394 } 395 else if (need_mul) { 396 ureg_MUL( ureg, 397 dst, 398 swizzle_4v( src, src_swizzle ), 399 swizzle_4v( imm, mul_swizzle ) ); 400 } 401 else if (need_add) { 402 ureg_MOV( ureg, 403 dst, 404 swizzle_4v( imm, add_swizzle ) ); 405 } 406 else { 407 debug_assert(0); 408 } 409 410#undef IMM_ZERO 411#undef IMM_ONE 412#undef IMM_NEG_ONE 413} 414 415 416/** 417 * Negate the value of DDY to match GL semantics where (0,0) is the 418 * lower-left corner of the window. 419 * Note that the GL_ARB_fragment_coord_conventions extension will 420 * effect this someday. 421 */ 422static void emit_ddy( struct st_translate *t, 423 struct ureg_dst dst, 424 const struct prog_src_register *SrcReg ) 425{ 426 struct ureg_program *ureg = t->ureg; 427 struct ureg_src src = translate_src( t, SrcReg ); 428 src = ureg_negate( src ); 429 ureg_DDY( ureg, dst, src ); 430} 431 432 433 434static unsigned 435translate_opcode( unsigned op ) 436{ 437 switch( op ) { 438 case OPCODE_ARL: 439 return TGSI_OPCODE_ARL; 440 case OPCODE_ABS: 441 return TGSI_OPCODE_ABS; 442 case OPCODE_ADD: 443 return TGSI_OPCODE_ADD; 444 case OPCODE_BGNLOOP: 445 return TGSI_OPCODE_BGNLOOP; 446 case OPCODE_BGNSUB: 447 return TGSI_OPCODE_BGNSUB; 448 case OPCODE_BRA: 449 return TGSI_OPCODE_BRA; 450 case OPCODE_BRK: 451 return TGSI_OPCODE_BRK; 452 case OPCODE_CAL: 453 return TGSI_OPCODE_CAL; 454 case OPCODE_CMP: 455 return TGSI_OPCODE_CMP; 456 case OPCODE_CONT: 457 return TGSI_OPCODE_CONT; 458 case OPCODE_COS: 459 return TGSI_OPCODE_COS; 460 case OPCODE_DDX: 461 return TGSI_OPCODE_DDX; 462 case OPCODE_DDY: 463 return TGSI_OPCODE_DDY; 464 case OPCODE_DP2: 465 return TGSI_OPCODE_DP2; 466 case OPCODE_DP2A: 467 return TGSI_OPCODE_DP2A; 468 case OPCODE_DP3: 469 return TGSI_OPCODE_DP3; 470 case OPCODE_DP4: 471 return TGSI_OPCODE_DP4; 472 case OPCODE_DPH: 473 return TGSI_OPCODE_DPH; 474 case OPCODE_DST: 475 return TGSI_OPCODE_DST; 476 case OPCODE_ELSE: 477 return TGSI_OPCODE_ELSE; 478 case OPCODE_ENDIF: 479 return TGSI_OPCODE_ENDIF; 480 case OPCODE_ENDLOOP: 481 return TGSI_OPCODE_ENDLOOP; 482 case OPCODE_ENDSUB: 483 return TGSI_OPCODE_ENDSUB; 484 case OPCODE_EX2: 485 return TGSI_OPCODE_EX2; 486 case OPCODE_EXP: 487 return TGSI_OPCODE_EXP; 488 case OPCODE_FLR: 489 return TGSI_OPCODE_FLR; 490 case OPCODE_FRC: 491 return TGSI_OPCODE_FRC; 492 case OPCODE_IF: 493 return TGSI_OPCODE_IF; 494 case OPCODE_TRUNC: 495 return TGSI_OPCODE_TRUNC; 496 case OPCODE_KIL: 497 return TGSI_OPCODE_KIL; 498 case OPCODE_KIL_NV: 499 return TGSI_OPCODE_KILP; 500 case OPCODE_LG2: 501 return TGSI_OPCODE_LG2; 502 case OPCODE_LOG: 503 return TGSI_OPCODE_LOG; 504 case OPCODE_LIT: 505 return TGSI_OPCODE_LIT; 506 case OPCODE_LRP: 507 return TGSI_OPCODE_LRP; 508 case OPCODE_MAD: 509 return TGSI_OPCODE_MAD; 510 case OPCODE_MAX: 511 return TGSI_OPCODE_MAX; 512 case OPCODE_MIN: 513 return TGSI_OPCODE_MIN; 514 case OPCODE_MOV: 515 return TGSI_OPCODE_MOV; 516 case OPCODE_MUL: 517 return TGSI_OPCODE_MUL; 518 case OPCODE_NOP: 519 return TGSI_OPCODE_NOP; 520 case OPCODE_NRM3: 521 return TGSI_OPCODE_NRM; 522 case OPCODE_NRM4: 523 return TGSI_OPCODE_NRM4; 524 case OPCODE_POW: 525 return TGSI_OPCODE_POW; 526 case OPCODE_RCP: 527 return TGSI_OPCODE_RCP; 528 case OPCODE_RET: 529 return TGSI_OPCODE_RET; 530 case OPCODE_RSQ: 531 return TGSI_OPCODE_RSQ; 532 case OPCODE_SCS: 533 return TGSI_OPCODE_SCS; 534 case OPCODE_SEQ: 535 return TGSI_OPCODE_SEQ; 536 case OPCODE_SGE: 537 return TGSI_OPCODE_SGE; 538 case OPCODE_SGT: 539 return TGSI_OPCODE_SGT; 540 case OPCODE_SIN: 541 return TGSI_OPCODE_SIN; 542 case OPCODE_SLE: 543 return TGSI_OPCODE_SLE; 544 case OPCODE_SLT: 545 return TGSI_OPCODE_SLT; 546 case OPCODE_SNE: 547 return TGSI_OPCODE_SNE; 548 case OPCODE_SSG: 549 return TGSI_OPCODE_SSG; 550 case OPCODE_SUB: 551 return TGSI_OPCODE_SUB; 552 case OPCODE_TEX: 553 return TGSI_OPCODE_TEX; 554 case OPCODE_TXB: 555 return TGSI_OPCODE_TXB; 556 case OPCODE_TXD: 557 return TGSI_OPCODE_TXD; 558 case OPCODE_TXL: 559 return TGSI_OPCODE_TXL; 560 case OPCODE_TXP: 561 return TGSI_OPCODE_TXP; 562 case OPCODE_XPD: 563 return TGSI_OPCODE_XPD; 564 case OPCODE_END: 565 return TGSI_OPCODE_END; 566 default: 567 debug_assert( 0 ); 568 return TGSI_OPCODE_NOP; 569 } 570} 571 572 573static void 574compile_instruction( 575 struct st_translate *t, 576 const struct prog_instruction *inst ) 577{ 578 struct ureg_program *ureg = t->ureg; 579 GLuint i; 580 struct ureg_dst dst[1]; 581 struct ureg_src src[4]; 582 unsigned num_dst; 583 unsigned num_src; 584 585 num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); 586 num_src = _mesa_num_inst_src_regs( inst->Opcode ); 587 588 if (num_dst) 589 dst[0] = translate_dst( t, 590 &inst->DstReg, 591 inst->SaturateMode ); 592 593 for (i = 0; i < num_src; i++) 594 src[i] = translate_src( t, &inst->SrcReg[i] ); 595 596 switch( inst->Opcode ) { 597 case OPCODE_SWZ: 598 emit_swz( t, dst[0], &inst->SrcReg[0] ); 599 return; 600 601 case OPCODE_BGNLOOP: 602 case OPCODE_CAL: 603 case OPCODE_ELSE: 604 case OPCODE_ENDLOOP: 605 case OPCODE_IF: 606 debug_assert(num_dst == 0); 607 ureg_label_insn( ureg, 608 translate_opcode( inst->Opcode ), 609 src, num_src, 610 get_label( t, inst->BranchTarget )); 611 return; 612 613 case OPCODE_TEX: 614 case OPCODE_TXB: 615 case OPCODE_TXD: 616 case OPCODE_TXL: 617 case OPCODE_TXP: 618 src[num_src++] = t->samplers[inst->TexSrcUnit]; 619 ureg_tex_insn( ureg, 620 translate_opcode( inst->Opcode ), 621 dst, num_dst, 622 translate_texture_target( inst->TexSrcTarget, 623 inst->TexShadow ), 624 src, num_src ); 625 return; 626 627 case OPCODE_SCS: 628 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); 629 ureg_insn( ureg, 630 translate_opcode( inst->Opcode ), 631 dst, num_dst, 632 src, num_src ); 633 break; 634 635 case OPCODE_XPD: 636 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); 637 ureg_insn( ureg, 638 translate_opcode( inst->Opcode ), 639 dst, num_dst, 640 src, num_src ); 641 break; 642 643 case OPCODE_NOISE1: 644 case OPCODE_NOISE2: 645 case OPCODE_NOISE3: 646 case OPCODE_NOISE4: 647 /* At some point, a motivated person could add a better 648 * implementation of noise. Currently not even the nvidia 649 * binary drivers do anything more than this. In any case, the 650 * place to do this is in the GL state tracker, not the poor 651 * driver. 652 */ 653 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); 654 break; 655 656 case OPCODE_DDY: 657 emit_ddy( t, dst[0], &inst->SrcReg[0] ); 658 break; 659 660 default: 661 ureg_insn( ureg, 662 translate_opcode( inst->Opcode ), 663 dst, num_dst, 664 src, num_src ); 665 break; 666 } 667} 668 669/** 670 * Emit the TGSI instructions to adjust the WPOS pixel center convention 671 */ 672static void 673emit_adjusted_wpos( struct st_translate *t, 674 const struct gl_program *program, GLfloat value) 675{ 676 struct ureg_program *ureg = t->ureg; 677 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 678 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 679 680 ureg_ADD(ureg, ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y), 681 wpos_input, ureg_imm1f(ureg, value)); 682 683 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 684} 685 686/** 687 * Emit the TGSI instructions for inverting the WPOS y coordinate. 688 */ 689static void 690emit_inverted_wpos( struct st_translate *t, 691 const struct gl_program *program ) 692{ 693 struct ureg_program *ureg = t->ureg; 694 695 /* Fragment program uses fragment position input. 696 * Need to replace instances of INPUT[WPOS] with temp T 697 * where T = INPUT[WPOS] by y is inverted. 698 */ 699 static const gl_state_index winSizeState[STATE_LENGTH] 700 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; 701 702 /* XXX: note we are modifying the incoming shader here! Need to 703 * do this before emitting the constant decls below, or this 704 * will be missed: 705 */ 706 unsigned winHeightConst = _mesa_add_state_reference(program->Parameters, 707 winSizeState); 708 709 struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst ); 710 struct ureg_dst wpos_temp; 711 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 712 713 /* MOV wpos_temp, input[wpos] 714 */ 715 if (wpos_input.File == TGSI_FILE_TEMPORARY) 716 wpos_temp = ureg_dst(wpos_input); 717 else { 718 wpos_temp = ureg_DECL_temporary( ureg ); 719 ureg_MOV( ureg, wpos_temp, wpos_input ); 720 } 721 722 /* SUB wpos_temp.y, winsize_const, wpos_input 723 */ 724 ureg_SUB( ureg, 725 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 726 winsize, 727 wpos_input); 728 729 /* Use wpos_temp as position input from here on: 730 */ 731 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 732} 733 734 735/** 736 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 737 * TGSI uses +1 for front, -1 for back. 738 * This function converts the TGSI value to the GL value. Simply clamping/ 739 * saturating the value to [0,1] does the job. 740 */ 741static void 742emit_face_var( struct st_translate *t, 743 const struct gl_program *program ) 744{ 745 struct ureg_program *ureg = t->ureg; 746 struct ureg_dst face_temp = ureg_DECL_temporary( ureg ); 747 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 748 749 /* MOV_SAT face_temp, input[face] 750 */ 751 face_temp = ureg_saturate( face_temp ); 752 ureg_MOV( ureg, face_temp, face_input ); 753 754 /* Use face_temp as face input from here on: 755 */ 756 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 757} 758 759 760static void 761emit_edgeflags( struct st_translate *t, 762 const struct gl_program *program ) 763{ 764 struct ureg_program *ureg = t->ureg; 765 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 766 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 767 768 ureg_MOV( ureg, edge_dst, edge_src ); 769} 770 771 772/** 773 * Translate Mesa program to TGSI format. 774 * \param program the program to translate 775 * \param numInputs number of input registers used 776 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 777 * input indexes 778 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 779 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 780 * each input 781 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 782 * \param numOutputs number of output registers used 783 * \param outputMapping maps Mesa fragment program outputs to TGSI 784 * generic outputs 785 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 786 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 787 * each output 788 * 789 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 790 */ 791enum pipe_error 792st_translate_mesa_program( 793 GLcontext *ctx, 794 uint procType, 795 struct ureg_program *ureg, 796 const struct gl_program *program, 797 GLuint numInputs, 798 const GLuint inputMapping[], 799 const ubyte inputSemanticName[], 800 const ubyte inputSemanticIndex[], 801 const GLuint interpMode[], 802 GLuint numOutputs, 803 const GLuint outputMapping[], 804 const ubyte outputSemanticName[], 805 const ubyte outputSemanticIndex[], 806 boolean passthrough_edgeflags ) 807{ 808 struct st_translate translate, *t; 809 unsigned i; 810 enum pipe_error ret = PIPE_OK; 811 812 t = &translate; 813 memset(t, 0, sizeof *t); 814 815 t->procType = procType; 816 t->inputMapping = inputMapping; 817 t->outputMapping = outputMapping; 818 t->ureg = ureg; 819 820 /*_mesa_print_program(program);*/ 821 822 /* 823 * Declare input attributes. 824 */ 825 if (procType == TGSI_PROCESSOR_FRAGMENT) { 826 struct gl_fragment_program* fp = (struct gl_fragment_program*)program; 827 for (i = 0; i < numInputs; i++) { 828 t->inputs[i] = ureg_DECL_fs_input(ureg, 829 inputSemanticName[i], 830 inputSemanticIndex[i], 831 interpMode[i]); 832 } 833 834 if (program->InputsRead & FRAG_BIT_WPOS) { 835 /* Must do this after setting up t->inputs, and before 836 * emitting constant references, below: 837 */ 838 struct pipe_screen* pscreen = st_context(ctx)->pipe->screen; 839 boolean invert = FALSE; 840 841 if (fp->OriginUpperLeft) { 842 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 843 } 844 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 845 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 846 invert = TRUE; 847 } 848 else 849 assert(0); 850 } 851 else { 852 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 853 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 854 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 855 invert = TRUE; 856 else 857 assert(0); 858 } 859 860 if (fp->PixelCenterInteger) { 861 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 862 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 863 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 864 emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f); 865 else 866 assert(0); 867 } 868 else { 869 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 870 } 871 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 872 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 873 emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f); 874 } 875 else 876 assert(0); 877 } 878 879 /* we invert after adjustment so that we avoid the MOV to temporary, 880 * and reuse the adjustment ADD instead */ 881 if (invert) 882 emit_inverted_wpos(t, program); 883 } 884 885 if (program->InputsRead & FRAG_BIT_FACE) { 886 emit_face_var( t, program ); 887 } 888 889 /* 890 * Declare output attributes. 891 */ 892 for (i = 0; i < numOutputs; i++) { 893 switch (outputSemanticName[i]) { 894 case TGSI_SEMANTIC_POSITION: 895 t->outputs[i] = ureg_DECL_output( ureg, 896 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 897 outputSemanticIndex[i] ); 898 899 t->outputs[i] = ureg_writemask( t->outputs[i], 900 TGSI_WRITEMASK_Z ); 901 break; 902 case TGSI_SEMANTIC_COLOR: 903 t->outputs[i] = ureg_DECL_output( ureg, 904 TGSI_SEMANTIC_COLOR, 905 outputSemanticIndex[i] ); 906 break; 907 default: 908 debug_assert(0); 909 return 0; 910 } 911 } 912 } 913 else { 914 for (i = 0; i < numInputs; i++) { 915 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 916 } 917 918 for (i = 0; i < numOutputs; i++) { 919 t->outputs[i] = ureg_DECL_output( ureg, 920 outputSemanticName[i], 921 outputSemanticIndex[i] ); 922 } 923 if (passthrough_edgeflags) 924 emit_edgeflags( t, program ); 925 } 926 927 /* Declare address register. 928 */ 929 if (program->NumAddressRegs > 0) { 930 debug_assert( program->NumAddressRegs == 1 ); 931 t->address[0] = ureg_DECL_address( ureg ); 932 } 933 934 935 /* Emit constants and immediates. Mesa uses a single index space 936 * for these, so we put all the translated regs in t->constants. 937 */ 938 if (program->Parameters) { 939 940 t->constants = CALLOC( program->Parameters->NumParameters, 941 sizeof t->constants[0] ); 942 if (t->constants == NULL) { 943 ret = PIPE_ERROR_OUT_OF_MEMORY; 944 goto out; 945 } 946 947 for (i = 0; i < program->Parameters->NumParameters; i++) { 948 switch (program->Parameters->Parameters[i].Type) { 949 case PROGRAM_ENV_PARAM: 950 case PROGRAM_LOCAL_PARAM: 951 case PROGRAM_STATE_VAR: 952 case PROGRAM_NAMED_PARAM: 953 case PROGRAM_UNIFORM: 954 t->constants[i] = ureg_DECL_constant( ureg, i ); 955 break; 956 957 /* Emit immediates only when there is no address register 958 * in use. FIXME: Be smarter and recognize param arrays: 959 * indirect addressing is only valid within the referenced 960 * array. 961 */ 962 case PROGRAM_CONSTANT: 963 if (program->NumAddressRegs > 0) 964 t->constants[i] = ureg_DECL_constant( ureg, i ); 965 else 966 t->constants[i] = 967 ureg_DECL_immediate( ureg, 968 program->Parameters->ParameterValues[i], 969 4 ); 970 break; 971 default: 972 break; 973 } 974 } 975 } 976 977 /* texture samplers */ 978 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 979 if (program->SamplersUsed & (1 << i)) { 980 t->samplers[i] = ureg_DECL_sampler( ureg, i ); 981 } 982 } 983 984 /* Emit each instruction in turn: 985 */ 986 for (i = 0; i < program->NumInstructions; i++) { 987 set_insn_start( t, ureg_get_instruction_number( ureg )); 988 compile_instruction( t, &program->Instructions[i] ); 989 } 990 991 /* Fix up all emitted labels: 992 */ 993 for (i = 0; i < t->labels_count; i++) { 994 ureg_fixup_label( ureg, 995 t->labels[i].token, 996 t->insn[t->labels[i].branch_target] ); 997 } 998 999out: 1000 FREE(t->insn); 1001 FREE(t->labels); 1002 FREE(t->constants); 1003 1004 if (t->error) { 1005 debug_printf("%s: translate error flag set\n", __FUNCTION__); 1006 } 1007 1008 return ret; 1009} 1010 1011 1012/** 1013 * Tokens cannot be free with _mesa_free otherwise the builtin gallium 1014 * malloc debugging will get confused. 1015 */ 1016void 1017st_free_tokens(const struct tgsi_token *tokens) 1018{ 1019 FREE((void *)tokens); 1020} 1021