st_mesa_to_tgsi.c revision 489eced800cb0f20a1e1d66b84a048e4df99503b
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * \author 30 * Michal Krol, 31 * Keith Whitwell 32 */ 33 34#include "pipe/p_compiler.h" 35#include "pipe/p_shader_tokens.h" 36#include "pipe/p_state.h" 37#include "pipe/p_context.h" 38#include "tgsi/tgsi_ureg.h" 39#include "st_mesa_to_tgsi.h" 40#include "st_context.h" 41#include "shader/prog_instruction.h" 42#include "shader/prog_parameter.h" 43#include "util/u_debug.h" 44#include "util/u_math.h" 45#include "util/u_memory.h" 46 47struct label { 48 unsigned branch_target; 49 unsigned token; 50}; 51 52 53/** 54 * Intermediate state used during shader translation. 55 */ 56struct st_translate { 57 struct ureg_program *ureg; 58 59 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 60 struct ureg_src *constants; 61 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 62 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 63 struct ureg_dst address[1]; 64 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 65 struct ureg_dst psizregreal; 66 struct ureg_src pointSizeConst; 67 GLint psizoutindex; 68 GLboolean prevInstWrotePsiz; 69 70 const GLuint *inputMapping; 71 const GLuint *outputMapping; 72 73 /* For every instruction that contains a label (eg CALL), keep 74 * details so that we can go back afterwards and emit the correct 75 * tgsi instruction number for each label. 76 */ 77 struct label *labels; 78 unsigned labels_size; 79 unsigned labels_count; 80 81 /* Keep a record of the tgsi instruction number that each mesa 82 * instruction starts at, will be used to fix up labels after 83 * translation. 84 */ 85 unsigned *insn; 86 unsigned insn_size; 87 unsigned insn_count; 88 89 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 90 91 boolean error; 92}; 93 94 95static unsigned *get_label( struct st_translate *t, 96 unsigned branch_target ) 97{ 98 unsigned i; 99 100 if (t->labels_count + 1 >= t->labels_size) { 101 unsigned old_size = t->labels_size; 102 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 103 t->labels = REALLOC( t->labels, 104 old_size * sizeof t->labels[0], 105 t->labels_size * sizeof t->labels[0] ); 106 if (t->labels == NULL) { 107 static unsigned dummy; 108 t->error = TRUE; 109 return &dummy; 110 } 111 } 112 113 i = t->labels_count++; 114 t->labels[i].branch_target = branch_target; 115 return &t->labels[i].token; 116} 117 118 119static void set_insn_start( struct st_translate *t, 120 unsigned start ) 121{ 122 if (t->insn_count + 1 >= t->insn_size) { 123 unsigned old_size = t->insn_size; 124 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 125 t->insn = REALLOC( t->insn, 126 old_size * sizeof t->insn[0], 127 t->insn_size * sizeof t->insn[0] ); 128 if (t->insn == NULL) { 129 t->error = TRUE; 130 return; 131 } 132 } 133 134 t->insn[t->insn_count++] = start; 135} 136 137 138/* 139 * Map mesa register file to TGSI register file. 140 */ 141static struct ureg_dst 142dst_register( struct st_translate *t, 143 gl_register_file file, 144 GLuint index ) 145{ 146 switch( file ) { 147 case PROGRAM_UNDEFINED: 148 return ureg_dst_undef(); 149 150 case PROGRAM_TEMPORARY: 151 if (ureg_dst_is_undef(t->temps[index])) 152 t->temps[index] = ureg_DECL_temporary( t->ureg ); 153 154 return t->temps[index]; 155 156 case PROGRAM_OUTPUT: 157 if (index == t->psizoutindex) 158 t->prevInstWrotePsiz = GL_TRUE; 159 return t->outputs[t->outputMapping[index]]; 160 161 case PROGRAM_ADDRESS: 162 return t->address[index]; 163 164 default: 165 debug_assert( 0 ); 166 return ureg_dst_undef(); 167 } 168} 169 170 171static struct ureg_src 172src_register( struct st_translate *t, 173 gl_register_file file, 174 GLint index ) 175{ 176 switch( file ) { 177 case PROGRAM_UNDEFINED: 178 return ureg_src_undef(); 179 180 case PROGRAM_TEMPORARY: 181 ASSERT(index >= 0); 182 if (ureg_dst_is_undef(t->temps[index])) 183 t->temps[index] = ureg_DECL_temporary( t->ureg ); 184 return ureg_src(t->temps[index]); 185 186 case PROGRAM_NAMED_PARAM: 187 case PROGRAM_ENV_PARAM: 188 case PROGRAM_LOCAL_PARAM: 189 case PROGRAM_UNIFORM: 190 ASSERT(index >= 0); 191 return t->constants[index]; 192 case PROGRAM_STATE_VAR: 193 case PROGRAM_CONSTANT: /* ie, immediate */ 194 if (index < 0) 195 return ureg_DECL_constant( t->ureg, 0 ); 196 else 197 return t->constants[index]; 198 199 case PROGRAM_INPUT: 200 return t->inputs[t->inputMapping[index]]; 201 202 case PROGRAM_OUTPUT: 203 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 204 205 case PROGRAM_ADDRESS: 206 return ureg_src(t->address[index]); 207 208 default: 209 debug_assert( 0 ); 210 return ureg_src_undef(); 211 } 212} 213 214 215/** 216 * Map mesa texture target to TGSI texture target. 217 */ 218static unsigned 219translate_texture_target( GLuint textarget, 220 GLboolean shadow ) 221{ 222 if (shadow) { 223 switch( textarget ) { 224 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D; 225 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D; 226 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT; 227 default: break; 228 } 229 } 230 231 switch( textarget ) { 232 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; 233 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; 234 case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; 235 case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; 236 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; 237 default: 238 debug_assert( 0 ); 239 return TGSI_TEXTURE_1D; 240 } 241} 242 243 244static struct ureg_dst 245translate_dst( struct st_translate *t, 246 const struct prog_dst_register *DstReg, 247 boolean saturate ) 248{ 249 struct ureg_dst dst = dst_register( t, 250 DstReg->File, 251 DstReg->Index ); 252 253 dst = ureg_writemask( dst, 254 DstReg->WriteMask ); 255 256 if (saturate) 257 dst = ureg_saturate( dst ); 258 259 if (DstReg->RelAddr) 260 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); 261 262 return dst; 263} 264 265 266static struct ureg_src 267translate_src( struct st_translate *t, 268 const struct prog_src_register *SrcReg ) 269{ 270 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 271 272 src = ureg_swizzle( src, 273 GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, 274 GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, 275 GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3, 276 GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3); 277 278 if (SrcReg->Negate == NEGATE_XYZW) 279 src = ureg_negate(src); 280 281 if (SrcReg->Abs) 282 src = ureg_abs(src); 283 284 if (SrcReg->RelAddr) { 285 src = ureg_src_indirect( src, ureg_src(t->address[0])); 286 /* If SrcReg->Index was negative, it was set to zero in 287 * src_register(). Reassign it now. 288 */ 289 src.Index = SrcReg->Index; 290 } 291 292 return src; 293} 294 295 296static struct ureg_src swizzle_4v( struct ureg_src src, 297 const unsigned *swz ) 298{ 299 return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] ); 300} 301 302 303/** 304 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: 305 * 306 * SWZ dst, src.x-y10 307 * 308 * becomes: 309 * 310 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} 311 */ 312static void emit_swz( struct st_translate *t, 313 struct ureg_dst dst, 314 const struct prog_src_register *SrcReg ) 315{ 316 struct ureg_program *ureg = t->ureg; 317 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 318 319 unsigned negate_mask = SrcReg->Negate; 320 321 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | 322 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | 323 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | 324 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); 325 326 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | 327 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | 328 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | 329 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); 330 331 unsigned negative_one_mask = one_mask & negate_mask; 332 unsigned positive_one_mask = one_mask & ~negate_mask; 333 334 struct ureg_src imm; 335 unsigned i; 336 unsigned mul_swizzle[4] = {0,0,0,0}; 337 unsigned add_swizzle[4] = {0,0,0,0}; 338 unsigned src_swizzle[4] = {0,0,0,0}; 339 boolean need_add = FALSE; 340 boolean need_mul = FALSE; 341 342 if (dst.WriteMask == 0) 343 return; 344 345 /* Is this just a MOV? 346 */ 347 if (zero_mask == 0 && 348 one_mask == 0 && 349 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) 350 { 351 ureg_MOV( ureg, dst, translate_src( t, SrcReg )); 352 return; 353 } 354 355#define IMM_ZERO 0 356#define IMM_ONE 1 357#define IMM_NEG_ONE 2 358 359 imm = ureg_imm3f( ureg, 0, 1, -1 ); 360 361 for (i = 0; i < 4; i++) { 362 unsigned bit = 1 << i; 363 364 if (dst.WriteMask & bit) { 365 if (positive_one_mask & bit) { 366 mul_swizzle[i] = IMM_ZERO; 367 add_swizzle[i] = IMM_ONE; 368 need_add = TRUE; 369 } 370 else if (negative_one_mask & bit) { 371 mul_swizzle[i] = IMM_ZERO; 372 add_swizzle[i] = IMM_NEG_ONE; 373 need_add = TRUE; 374 } 375 else if (zero_mask & bit) { 376 mul_swizzle[i] = IMM_ZERO; 377 add_swizzle[i] = IMM_ZERO; 378 need_add = TRUE; 379 } 380 else { 381 add_swizzle[i] = IMM_ZERO; 382 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); 383 need_mul = TRUE; 384 if (negate_mask & bit) { 385 mul_swizzle[i] = IMM_NEG_ONE; 386 } 387 else { 388 mul_swizzle[i] = IMM_ONE; 389 } 390 } 391 } 392 } 393 394 if (need_mul && need_add) { 395 ureg_MAD( ureg, 396 dst, 397 swizzle_4v( src, src_swizzle ), 398 swizzle_4v( imm, mul_swizzle ), 399 swizzle_4v( imm, add_swizzle ) ); 400 } 401 else if (need_mul) { 402 ureg_MUL( ureg, 403 dst, 404 swizzle_4v( src, src_swizzle ), 405 swizzle_4v( imm, mul_swizzle ) ); 406 } 407 else if (need_add) { 408 ureg_MOV( ureg, 409 dst, 410 swizzle_4v( imm, add_swizzle ) ); 411 } 412 else { 413 debug_assert(0); 414 } 415 416#undef IMM_ZERO 417#undef IMM_ONE 418#undef IMM_NEG_ONE 419} 420 421 422/** 423 * Negate the value of DDY to match GL semantics where (0,0) is the 424 * lower-left corner of the window. 425 * Note that the GL_ARB_fragment_coord_conventions extension will 426 * effect this someday. 427 */ 428static void emit_ddy( struct st_translate *t, 429 struct ureg_dst dst, 430 const struct prog_src_register *SrcReg ) 431{ 432 struct ureg_program *ureg = t->ureg; 433 struct ureg_src src = translate_src( t, SrcReg ); 434 src = ureg_negate( src ); 435 ureg_DDY( ureg, dst, src ); 436} 437 438 439 440static unsigned 441translate_opcode( unsigned op ) 442{ 443 switch( op ) { 444 case OPCODE_ARL: 445 return TGSI_OPCODE_ARL; 446 case OPCODE_ABS: 447 return TGSI_OPCODE_ABS; 448 case OPCODE_ADD: 449 return TGSI_OPCODE_ADD; 450 case OPCODE_BGNLOOP: 451 return TGSI_OPCODE_BGNLOOP; 452 case OPCODE_BGNSUB: 453 return TGSI_OPCODE_BGNSUB; 454 case OPCODE_BRA: 455 return TGSI_OPCODE_BRA; 456 case OPCODE_BRK: 457 return TGSI_OPCODE_BRK; 458 case OPCODE_CAL: 459 return TGSI_OPCODE_CAL; 460 case OPCODE_CMP: 461 return TGSI_OPCODE_CMP; 462 case OPCODE_CONT: 463 return TGSI_OPCODE_CONT; 464 case OPCODE_COS: 465 return TGSI_OPCODE_COS; 466 case OPCODE_DDX: 467 return TGSI_OPCODE_DDX; 468 case OPCODE_DDY: 469 return TGSI_OPCODE_DDY; 470 case OPCODE_DP2: 471 return TGSI_OPCODE_DP2; 472 case OPCODE_DP2A: 473 return TGSI_OPCODE_DP2A; 474 case OPCODE_DP3: 475 return TGSI_OPCODE_DP3; 476 case OPCODE_DP4: 477 return TGSI_OPCODE_DP4; 478 case OPCODE_DPH: 479 return TGSI_OPCODE_DPH; 480 case OPCODE_DST: 481 return TGSI_OPCODE_DST; 482 case OPCODE_ELSE: 483 return TGSI_OPCODE_ELSE; 484 case OPCODE_ENDIF: 485 return TGSI_OPCODE_ENDIF; 486 case OPCODE_ENDLOOP: 487 return TGSI_OPCODE_ENDLOOP; 488 case OPCODE_ENDSUB: 489 return TGSI_OPCODE_ENDSUB; 490 case OPCODE_EX2: 491 return TGSI_OPCODE_EX2; 492 case OPCODE_EXP: 493 return TGSI_OPCODE_EXP; 494 case OPCODE_FLR: 495 return TGSI_OPCODE_FLR; 496 case OPCODE_FRC: 497 return TGSI_OPCODE_FRC; 498 case OPCODE_IF: 499 return TGSI_OPCODE_IF; 500 case OPCODE_TRUNC: 501 return TGSI_OPCODE_TRUNC; 502 case OPCODE_KIL: 503 return TGSI_OPCODE_KIL; 504 case OPCODE_KIL_NV: 505 return TGSI_OPCODE_KILP; 506 case OPCODE_LG2: 507 return TGSI_OPCODE_LG2; 508 case OPCODE_LOG: 509 return TGSI_OPCODE_LOG; 510 case OPCODE_LIT: 511 return TGSI_OPCODE_LIT; 512 case OPCODE_LRP: 513 return TGSI_OPCODE_LRP; 514 case OPCODE_MAD: 515 return TGSI_OPCODE_MAD; 516 case OPCODE_MAX: 517 return TGSI_OPCODE_MAX; 518 case OPCODE_MIN: 519 return TGSI_OPCODE_MIN; 520 case OPCODE_MOV: 521 return TGSI_OPCODE_MOV; 522 case OPCODE_MUL: 523 return TGSI_OPCODE_MUL; 524 case OPCODE_NOP: 525 return TGSI_OPCODE_NOP; 526 case OPCODE_NRM3: 527 return TGSI_OPCODE_NRM; 528 case OPCODE_NRM4: 529 return TGSI_OPCODE_NRM4; 530 case OPCODE_POW: 531 return TGSI_OPCODE_POW; 532 case OPCODE_RCP: 533 return TGSI_OPCODE_RCP; 534 case OPCODE_RET: 535 return TGSI_OPCODE_RET; 536 case OPCODE_RSQ: 537 return TGSI_OPCODE_RSQ; 538 case OPCODE_SCS: 539 return TGSI_OPCODE_SCS; 540 case OPCODE_SEQ: 541 return TGSI_OPCODE_SEQ; 542 case OPCODE_SGE: 543 return TGSI_OPCODE_SGE; 544 case OPCODE_SGT: 545 return TGSI_OPCODE_SGT; 546 case OPCODE_SIN: 547 return TGSI_OPCODE_SIN; 548 case OPCODE_SLE: 549 return TGSI_OPCODE_SLE; 550 case OPCODE_SLT: 551 return TGSI_OPCODE_SLT; 552 case OPCODE_SNE: 553 return TGSI_OPCODE_SNE; 554 case OPCODE_SSG: 555 return TGSI_OPCODE_SSG; 556 case OPCODE_SUB: 557 return TGSI_OPCODE_SUB; 558 case OPCODE_TEX: 559 return TGSI_OPCODE_TEX; 560 case OPCODE_TXB: 561 return TGSI_OPCODE_TXB; 562 case OPCODE_TXD: 563 return TGSI_OPCODE_TXD; 564 case OPCODE_TXL: 565 return TGSI_OPCODE_TXL; 566 case OPCODE_TXP: 567 return TGSI_OPCODE_TXP; 568 case OPCODE_XPD: 569 return TGSI_OPCODE_XPD; 570 case OPCODE_END: 571 return TGSI_OPCODE_END; 572 default: 573 debug_assert( 0 ); 574 return TGSI_OPCODE_NOP; 575 } 576} 577 578 579static void 580compile_instruction( 581 struct st_translate *t, 582 const struct prog_instruction *inst ) 583{ 584 struct ureg_program *ureg = t->ureg; 585 GLuint i; 586 struct ureg_dst dst[1]; 587 struct ureg_src src[4]; 588 unsigned num_dst; 589 unsigned num_src; 590 591 num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); 592 num_src = _mesa_num_inst_src_regs( inst->Opcode ); 593 594 if (num_dst) 595 dst[0] = translate_dst( t, 596 &inst->DstReg, 597 inst->SaturateMode ); 598 599 for (i = 0; i < num_src; i++) 600 src[i] = translate_src( t, &inst->SrcReg[i] ); 601 602 switch( inst->Opcode ) { 603 case OPCODE_SWZ: 604 emit_swz( t, dst[0], &inst->SrcReg[0] ); 605 return; 606 607 case OPCODE_BGNLOOP: 608 case OPCODE_CAL: 609 case OPCODE_ELSE: 610 case OPCODE_ENDLOOP: 611 case OPCODE_IF: 612 debug_assert(num_dst == 0); 613 ureg_label_insn( ureg, 614 translate_opcode( inst->Opcode ), 615 src, num_src, 616 get_label( t, inst->BranchTarget )); 617 return; 618 619 case OPCODE_TEX: 620 case OPCODE_TXB: 621 case OPCODE_TXD: 622 case OPCODE_TXL: 623 case OPCODE_TXP: 624 src[num_src++] = t->samplers[inst->TexSrcUnit]; 625 ureg_tex_insn( ureg, 626 translate_opcode( inst->Opcode ), 627 dst, num_dst, 628 translate_texture_target( inst->TexSrcTarget, 629 inst->TexShadow ), 630 src, num_src ); 631 return; 632 633 case OPCODE_SCS: 634 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); 635 ureg_insn( ureg, 636 translate_opcode( inst->Opcode ), 637 dst, num_dst, 638 src, num_src ); 639 break; 640 641 case OPCODE_XPD: 642 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); 643 ureg_insn( ureg, 644 translate_opcode( inst->Opcode ), 645 dst, num_dst, 646 src, num_src ); 647 break; 648 649 case OPCODE_NOISE1: 650 case OPCODE_NOISE2: 651 case OPCODE_NOISE3: 652 case OPCODE_NOISE4: 653 /* At some point, a motivated person could add a better 654 * implementation of noise. Currently not even the nvidia 655 * binary drivers do anything more than this. In any case, the 656 * place to do this is in the GL state tracker, not the poor 657 * driver. 658 */ 659 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); 660 break; 661 662 case OPCODE_DDY: 663 emit_ddy( t, dst[0], &inst->SrcReg[0] ); 664 break; 665 666 default: 667 ureg_insn( ureg, 668 translate_opcode( inst->Opcode ), 669 dst, num_dst, 670 src, num_src ); 671 break; 672 } 673} 674 675/** 676 * Emit the TGSI instructions to adjust the WPOS pixel center convention 677 */ 678static void 679emit_adjusted_wpos( struct st_translate *t, 680 const struct gl_program *program, GLfloat value) 681{ 682 struct ureg_program *ureg = t->ureg; 683 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 684 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 685 686 ureg_ADD(ureg, ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y), 687 wpos_input, ureg_imm1f(ureg, value)); 688 689 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 690} 691 692/** 693 * Emit the TGSI instructions for inverting the WPOS y coordinate. 694 */ 695static void 696emit_inverted_wpos( struct st_translate *t, 697 const struct gl_program *program ) 698{ 699 struct ureg_program *ureg = t->ureg; 700 701 /* Fragment program uses fragment position input. 702 * Need to replace instances of INPUT[WPOS] with temp T 703 * where T = INPUT[WPOS] by y is inverted. 704 */ 705 static const gl_state_index winSizeState[STATE_LENGTH] 706 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; 707 708 /* XXX: note we are modifying the incoming shader here! Need to 709 * do this before emitting the constant decls below, or this 710 * will be missed: 711 */ 712 unsigned winHeightConst = _mesa_add_state_reference(program->Parameters, 713 winSizeState); 714 715 struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst ); 716 struct ureg_dst wpos_temp; 717 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 718 719 /* MOV wpos_temp, input[wpos] 720 */ 721 if (wpos_input.File == TGSI_FILE_TEMPORARY) 722 wpos_temp = ureg_dst(wpos_input); 723 else { 724 wpos_temp = ureg_DECL_temporary( ureg ); 725 ureg_MOV( ureg, wpos_temp, wpos_input ); 726 } 727 728 /* SUB wpos_temp.y, winsize_const, wpos_input 729 */ 730 ureg_SUB( ureg, 731 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 732 winsize, 733 wpos_input); 734 735 /* Use wpos_temp as position input from here on: 736 */ 737 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 738} 739 740 741/** 742 * Emit fragment position/ooordinate code. 743 */ 744static void 745emit_wpos(struct st_context *st, 746 struct st_translate *t, 747 const struct gl_program *program, 748 struct ureg_program *ureg) 749{ 750 const struct gl_fragment_program *fp = 751 (const struct gl_fragment_program *) program; 752 struct pipe_screen *pscreen = st->pipe->screen; 753 boolean invert = FALSE; 754 755 if (fp->OriginUpperLeft) { 756 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 757 } 758 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 759 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 760 invert = TRUE; 761 } 762 else 763 assert(0); 764 } 765 else { 766 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 767 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 768 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 769 invert = TRUE; 770 else 771 assert(0); 772 } 773 774 if (fp->PixelCenterInteger) { 775 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 776 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 777 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 778 emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f); 779 else 780 assert(0); 781 } 782 else { 783 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 784 } 785 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 786 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 787 emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f); 788 } 789 else 790 assert(0); 791 } 792 793 /* we invert after adjustment so that we avoid the MOV to temporary, 794 * and reuse the adjustment ADD instead */ 795 if (invert) 796 emit_inverted_wpos(t, program); 797} 798 799 800/** 801 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 802 * TGSI uses +1 for front, -1 for back. 803 * This function converts the TGSI value to the GL value. Simply clamping/ 804 * saturating the value to [0,1] does the job. 805 */ 806static void 807emit_face_var( struct st_translate *t, 808 const struct gl_program *program ) 809{ 810 struct ureg_program *ureg = t->ureg; 811 struct ureg_dst face_temp = ureg_DECL_temporary( ureg ); 812 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 813 814 /* MOV_SAT face_temp, input[face] 815 */ 816 face_temp = ureg_saturate( face_temp ); 817 ureg_MOV( ureg, face_temp, face_input ); 818 819 /* Use face_temp as face input from here on: 820 */ 821 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 822} 823 824 825static void 826emit_edgeflags( struct st_translate *t, 827 const struct gl_program *program ) 828{ 829 struct ureg_program *ureg = t->ureg; 830 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 831 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 832 833 ureg_MOV( ureg, edge_dst, edge_src ); 834} 835 836 837/** 838 * Translate Mesa program to TGSI format. 839 * \param program the program to translate 840 * \param numInputs number of input registers used 841 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 842 * input indexes 843 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 844 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 845 * each input 846 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 847 * \param numOutputs number of output registers used 848 * \param outputMapping maps Mesa fragment program outputs to TGSI 849 * generic outputs 850 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 851 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 852 * each output 853 * 854 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 855 */ 856enum pipe_error 857st_translate_mesa_program( 858 GLcontext *ctx, 859 uint procType, 860 struct ureg_program *ureg, 861 const struct gl_program *program, 862 GLuint numInputs, 863 const GLuint inputMapping[], 864 const ubyte inputSemanticName[], 865 const ubyte inputSemanticIndex[], 866 const GLuint interpMode[], 867 GLuint numOutputs, 868 const GLuint outputMapping[], 869 const ubyte outputSemanticName[], 870 const ubyte outputSemanticIndex[], 871 boolean passthrough_edgeflags ) 872{ 873 struct st_translate translate, *t; 874 unsigned i; 875 enum pipe_error ret = PIPE_OK; 876 877 t = &translate; 878 memset(t, 0, sizeof *t); 879 880 t->procType = procType; 881 t->inputMapping = inputMapping; 882 t->outputMapping = outputMapping; 883 t->ureg = ureg; 884 t->psizoutindex = -1; 885 t->prevInstWrotePsiz = GL_FALSE; 886 887 /*_mesa_print_program(program);*/ 888 889 /* 890 * Declare input attributes. 891 */ 892 if (procType == TGSI_PROCESSOR_FRAGMENT) { 893 for (i = 0; i < numInputs; i++) { 894 if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) { 895 t->inputs[i] = ureg_DECL_fs_input_cyl(ureg, 896 inputSemanticName[i], 897 inputSemanticIndex[i], 898 interpMode[i], 899 TGSI_CYLINDRICAL_WRAP_X); 900 } 901 else { 902 t->inputs[i] = ureg_DECL_fs_input(ureg, 903 inputSemanticName[i], 904 inputSemanticIndex[i], 905 interpMode[i]); 906 } 907 } 908 909 if (program->InputsRead & FRAG_BIT_WPOS) { 910 /* Must do this after setting up t->inputs, and before 911 * emitting constant references, below: 912 */ 913 emit_wpos(st_context(ctx), t, program, ureg); 914 } 915 916 if (program->InputsRead & FRAG_BIT_FACE) { 917 emit_face_var( t, program ); 918 } 919 920 /* 921 * Declare output attributes. 922 */ 923 for (i = 0; i < numOutputs; i++) { 924 switch (outputSemanticName[i]) { 925 case TGSI_SEMANTIC_POSITION: 926 t->outputs[i] = ureg_DECL_output( ureg, 927 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 928 outputSemanticIndex[i] ); 929 930 t->outputs[i] = ureg_writemask( t->outputs[i], 931 TGSI_WRITEMASK_Z ); 932 break; 933 case TGSI_SEMANTIC_COLOR: 934 t->outputs[i] = ureg_DECL_output( ureg, 935 TGSI_SEMANTIC_COLOR, 936 outputSemanticIndex[i] ); 937 break; 938 default: 939 debug_assert(0); 940 return 0; 941 } 942 } 943 } 944 else { 945 for (i = 0; i < numInputs; i++) { 946 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 947 } 948 949 for (i = 0; i < numOutputs; i++) { 950 t->outputs[i] = ureg_DECL_output( ureg, 951 outputSemanticName[i], 952 outputSemanticIndex[i] ); 953 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) { 954 static const gl_state_index pointSizeClampState[STATE_LENGTH] 955 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 }; 956 /* XXX: note we are modifying the incoming shader here! Need to 957 * do this before emitting the constant decls below, or this 958 * will be missed: 959 */ 960 unsigned pointSizeClampConst = _mesa_add_state_reference(program->Parameters, 961 pointSizeClampState); 962 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); 963 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); 964 t->psizregreal = t->outputs[i]; 965 t->psizoutindex = i; 966 t->outputs[i] = psizregtemp; 967 } 968 } 969 if (passthrough_edgeflags) 970 emit_edgeflags( t, program ); 971 } 972 973 /* Declare address register. 974 */ 975 if (program->NumAddressRegs > 0) { 976 debug_assert( program->NumAddressRegs == 1 ); 977 t->address[0] = ureg_DECL_address( ureg ); 978 } 979 980 981 /* Emit constants and immediates. Mesa uses a single index space 982 * for these, so we put all the translated regs in t->constants. 983 */ 984 if (program->Parameters) { 985 986 t->constants = CALLOC( program->Parameters->NumParameters, 987 sizeof t->constants[0] ); 988 if (t->constants == NULL) { 989 ret = PIPE_ERROR_OUT_OF_MEMORY; 990 goto out; 991 } 992 993 for (i = 0; i < program->Parameters->NumParameters; i++) { 994 switch (program->Parameters->Parameters[i].Type) { 995 case PROGRAM_ENV_PARAM: 996 case PROGRAM_LOCAL_PARAM: 997 case PROGRAM_STATE_VAR: 998 case PROGRAM_NAMED_PARAM: 999 case PROGRAM_UNIFORM: 1000 t->constants[i] = ureg_DECL_constant( ureg, i ); 1001 break; 1002 1003 /* Emit immediates only when there is no address register 1004 * in use. FIXME: Be smarter and recognize param arrays: 1005 * indirect addressing is only valid within the referenced 1006 * array. 1007 */ 1008 case PROGRAM_CONSTANT: 1009 if (program->NumAddressRegs > 0) 1010 t->constants[i] = ureg_DECL_constant( ureg, i ); 1011 else 1012 t->constants[i] = 1013 ureg_DECL_immediate( ureg, 1014 program->Parameters->ParameterValues[i], 1015 4 ); 1016 break; 1017 default: 1018 break; 1019 } 1020 } 1021 } 1022 1023 /* texture samplers */ 1024 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 1025 if (program->SamplersUsed & (1 << i)) { 1026 t->samplers[i] = ureg_DECL_sampler( ureg, i ); 1027 } 1028 } 1029 1030 /* Emit each instruction in turn: 1031 */ 1032 for (i = 0; i < program->NumInstructions; i++) { 1033 set_insn_start( t, ureg_get_instruction_number( ureg )); 1034 compile_instruction( t, &program->Instructions[i] ); 1035 1036 /* note can't do that easily at the end of prog due to 1037 possible early return */ 1038 if (t->prevInstWrotePsiz && program->Id) { 1039 set_insn_start( t, ureg_get_instruction_number( ureg )); 1040 ureg_MAX( t->ureg, ureg_writemask(t->outputs[t->psizoutindex], WRITEMASK_X), 1041 ureg_src(t->outputs[t->psizoutindex]), 1042 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 1043 ureg_MIN( t->ureg, ureg_writemask(t->psizregreal, WRITEMASK_X), 1044 ureg_src(t->outputs[t->psizoutindex]), 1045 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 1046 } 1047 t->prevInstWrotePsiz = GL_FALSE; 1048 } 1049 1050 /* Fix up all emitted labels: 1051 */ 1052 for (i = 0; i < t->labels_count; i++) { 1053 ureg_fixup_label( ureg, 1054 t->labels[i].token, 1055 t->insn[t->labels[i].branch_target] ); 1056 } 1057 1058out: 1059 FREE(t->insn); 1060 FREE(t->labels); 1061 FREE(t->constants); 1062 1063 if (t->error) { 1064 debug_printf("%s: translate error flag set\n", __FUNCTION__); 1065 } 1066 1067 return ret; 1068} 1069 1070 1071/** 1072 * Tokens cannot be free with free otherwise the builtin gallium 1073 * malloc debugging will get confused. 1074 */ 1075void 1076st_free_tokens(const struct tgsi_token *tokens) 1077{ 1078 FREE((void *)tokens); 1079} 1080