st_mesa_to_tgsi.c revision 2c326e72664e65166c68b027b26aaf373f3be36d
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * \author 30 * Michal Krol, 31 * Keith Whitwell 32 */ 33 34#include "pipe/p_compiler.h" 35#include "pipe/p_shader_tokens.h" 36#include "pipe/p_state.h" 37#include "tgsi/tgsi_ureg.h" 38#include "st_mesa_to_tgsi.h" 39#include "shader/prog_instruction.h" 40#include "shader/prog_parameter.h" 41#include "shader/prog_print.h" 42#include "util/u_debug.h" 43#include "util/u_math.h" 44#include "util/u_memory.h" 45 46struct label { 47 unsigned branch_target; 48 unsigned token; 49}; 50 51struct st_translate { 52 struct ureg_program *ureg; 53 54 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 55 struct ureg_src *constants; 56 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 57 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 58 struct ureg_dst address[1]; 59 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 60 struct ureg_dst psizregreal; 61 struct ureg_src pointSizeConst; 62 GLint psizoutindex; 63 GLboolean prevInstWrotePsiz; 64 65 const GLuint *inputMapping; 66 const GLuint *outputMapping; 67 68 /* For every instruction that contains a label (eg CALL), keep 69 * details so that we can go back afterwards and emit the correct 70 * tgsi instruction number for each label. 71 */ 72 struct label *labels; 73 unsigned labels_size; 74 unsigned labels_count; 75 76 /* Keep a record of the tgsi instruction number that each mesa 77 * instruction starts at, will be used to fix up labels after 78 * translation. 79 */ 80 unsigned *insn; 81 unsigned insn_size; 82 unsigned insn_count; 83 84 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 85 86 boolean error; 87}; 88 89 90static unsigned *get_label( struct st_translate *t, 91 unsigned branch_target ) 92{ 93 unsigned i; 94 95 if (t->labels_count + 1 >= t->labels_size) { 96 unsigned old_size = t->labels_size; 97 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 98 t->labels = REALLOC( t->labels, 99 old_size * sizeof t->labels[0], 100 t->labels_size * sizeof t->labels[0] ); 101 if (t->labels == NULL) { 102 static unsigned dummy; 103 t->error = TRUE; 104 return &dummy; 105 } 106 } 107 108 i = t->labels_count++; 109 t->labels[i].branch_target = branch_target; 110 return &t->labels[i].token; 111} 112 113 114static void set_insn_start( struct st_translate *t, 115 unsigned start ) 116{ 117 if (t->insn_count + 1 >= t->insn_size) { 118 unsigned old_size = t->insn_size; 119 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 120 t->insn = REALLOC( t->insn, 121 old_size * sizeof t->insn[0], 122 t->insn_size * sizeof t->insn[0] ); 123 if (t->insn == NULL) { 124 t->error = TRUE; 125 return; 126 } 127 } 128 129 t->insn[t->insn_count++] = start; 130} 131 132 133/* 134 * Map mesa register file to TGSI register file. 135 */ 136static struct ureg_dst 137dst_register( struct st_translate *t, 138 gl_register_file file, 139 GLuint index ) 140{ 141 switch( file ) { 142 case PROGRAM_UNDEFINED: 143 return ureg_dst_undef(); 144 145 case PROGRAM_TEMPORARY: 146 if (ureg_dst_is_undef(t->temps[index])) 147 t->temps[index] = ureg_DECL_temporary( t->ureg ); 148 149 return t->temps[index]; 150 151 case PROGRAM_OUTPUT: 152 if (index == t->psizoutindex) 153 t->prevInstWrotePsiz = GL_TRUE; 154 return t->outputs[t->outputMapping[index]]; 155 156 case PROGRAM_ADDRESS: 157 return t->address[index]; 158 159 default: 160 debug_assert( 0 ); 161 return ureg_dst_undef(); 162 } 163} 164 165 166static struct ureg_src 167src_register( struct st_translate *t, 168 gl_register_file file, 169 GLint index ) 170{ 171 switch( file ) { 172 case PROGRAM_UNDEFINED: 173 return ureg_src_undef(); 174 175 case PROGRAM_TEMPORARY: 176 ASSERT(index >= 0); 177 if (ureg_dst_is_undef(t->temps[index])) 178 t->temps[index] = ureg_DECL_temporary( t->ureg ); 179 return ureg_src(t->temps[index]); 180 181 case PROGRAM_STATE_VAR: 182 case PROGRAM_NAMED_PARAM: 183 case PROGRAM_ENV_PARAM: 184 case PROGRAM_LOCAL_PARAM: 185 case PROGRAM_UNIFORM: 186 ASSERT(index >= 0); 187 return t->constants[index]; 188 case PROGRAM_CONSTANT: /* ie, immediate */ 189 if (index < 0) 190 return ureg_DECL_constant( t->ureg, 0 ); 191 else 192 return t->constants[index]; 193 194 case PROGRAM_INPUT: 195 return t->inputs[t->inputMapping[index]]; 196 197 case PROGRAM_OUTPUT: 198 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 199 200 case PROGRAM_ADDRESS: 201 return ureg_src(t->address[index]); 202 203 default: 204 debug_assert( 0 ); 205 return ureg_src_undef(); 206 } 207} 208 209 210/** 211 * Map mesa texture target to TGSI texture target. 212 */ 213static unsigned 214translate_texture_target( GLuint textarget, 215 GLboolean shadow ) 216{ 217 if (shadow) { 218 switch( textarget ) { 219 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D; 220 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D; 221 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT; 222 default: break; 223 } 224 } 225 226 switch( textarget ) { 227 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; 228 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; 229 case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; 230 case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; 231 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; 232 default: 233 debug_assert( 0 ); 234 return TGSI_TEXTURE_1D; 235 } 236} 237 238 239static struct ureg_dst 240translate_dst( struct st_translate *t, 241 const struct prog_dst_register *DstReg, 242 boolean saturate ) 243{ 244 struct ureg_dst dst = dst_register( t, 245 DstReg->File, 246 DstReg->Index ); 247 248 dst = ureg_writemask( dst, 249 DstReg->WriteMask ); 250 251 if (saturate) 252 dst = ureg_saturate( dst ); 253 254 if (DstReg->RelAddr) 255 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); 256 257 return dst; 258} 259 260 261static struct ureg_src 262translate_src( struct st_translate *t, 263 const struct prog_src_register *SrcReg ) 264{ 265 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 266 267 src = ureg_swizzle( src, 268 GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, 269 GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, 270 GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3, 271 GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3); 272 273 if (SrcReg->Negate == NEGATE_XYZW) 274 src = ureg_negate(src); 275 276 if (SrcReg->Abs) 277 src = ureg_abs(src); 278 279 if (SrcReg->RelAddr) { 280 src = ureg_src_indirect( src, ureg_src(t->address[0])); 281 /* If SrcReg->Index was negative, it was set to zero in 282 * src_register(). Reassign it now. 283 */ 284 src.Index = SrcReg->Index; 285 } 286 287 return src; 288} 289 290 291static struct ureg_src swizzle_4v( struct ureg_src src, 292 const unsigned *swz ) 293{ 294 return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] ); 295} 296 297 298/** 299 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: 300 * 301 * SWZ dst, src.x-y10 302 * 303 * becomes: 304 * 305 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} 306 */ 307static void emit_swz( struct st_translate *t, 308 struct ureg_dst dst, 309 const struct prog_src_register *SrcReg ) 310{ 311 struct ureg_program *ureg = t->ureg; 312 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 313 314 unsigned negate_mask = SrcReg->Negate; 315 316 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | 317 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | 318 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | 319 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); 320 321 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | 322 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | 323 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | 324 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); 325 326 unsigned negative_one_mask = one_mask & negate_mask; 327 unsigned positive_one_mask = one_mask & ~negate_mask; 328 329 struct ureg_src imm; 330 unsigned i; 331 unsigned mul_swizzle[4] = {0,0,0,0}; 332 unsigned add_swizzle[4] = {0,0,0,0}; 333 unsigned src_swizzle[4] = {0,0,0,0}; 334 boolean need_add = FALSE; 335 boolean need_mul = FALSE; 336 337 if (dst.WriteMask == 0) 338 return; 339 340 /* Is this just a MOV? 341 */ 342 if (zero_mask == 0 && 343 one_mask == 0 && 344 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) 345 { 346 ureg_MOV( ureg, dst, translate_src( t, SrcReg )); 347 return; 348 } 349 350#define IMM_ZERO 0 351#define IMM_ONE 1 352#define IMM_NEG_ONE 2 353 354 imm = ureg_imm3f( ureg, 0, 1, -1 ); 355 356 for (i = 0; i < 4; i++) { 357 unsigned bit = 1 << i; 358 359 if (dst.WriteMask & bit) { 360 if (positive_one_mask & bit) { 361 mul_swizzle[i] = IMM_ZERO; 362 add_swizzle[i] = IMM_ONE; 363 need_add = TRUE; 364 } 365 else if (negative_one_mask & bit) { 366 mul_swizzle[i] = IMM_ZERO; 367 add_swizzle[i] = IMM_NEG_ONE; 368 need_add = TRUE; 369 } 370 else if (zero_mask & bit) { 371 mul_swizzle[i] = IMM_ZERO; 372 add_swizzle[i] = IMM_ZERO; 373 need_add = TRUE; 374 } 375 else { 376 add_swizzle[i] = IMM_ZERO; 377 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); 378 need_mul = TRUE; 379 if (negate_mask & bit) { 380 mul_swizzle[i] = IMM_NEG_ONE; 381 } 382 else { 383 mul_swizzle[i] = IMM_ONE; 384 } 385 } 386 } 387 } 388 389 if (need_mul && need_add) { 390 ureg_MAD( ureg, 391 dst, 392 swizzle_4v( src, src_swizzle ), 393 swizzle_4v( imm, mul_swizzle ), 394 swizzle_4v( imm, add_swizzle ) ); 395 } 396 else if (need_mul) { 397 ureg_MUL( ureg, 398 dst, 399 swizzle_4v( src, src_swizzle ), 400 swizzle_4v( imm, mul_swizzle ) ); 401 } 402 else if (need_add) { 403 ureg_MOV( ureg, 404 dst, 405 swizzle_4v( imm, add_swizzle ) ); 406 } 407 else { 408 debug_assert(0); 409 } 410 411#undef IMM_ZERO 412#undef IMM_ONE 413#undef IMM_NEG_ONE 414} 415 416 417/** 418 * Negate the value of DDY to match GL semantics where (0,0) is the 419 * lower-left corner of the window. 420 * Note that the GL_ARB_fragment_coord_conventions extension will 421 * effect this someday. 422 */ 423static void emit_ddy( struct st_translate *t, 424 struct ureg_dst dst, 425 const struct prog_src_register *SrcReg ) 426{ 427 struct ureg_program *ureg = t->ureg; 428 struct ureg_src src = translate_src( t, SrcReg ); 429 src = ureg_negate( src ); 430 ureg_DDY( ureg, dst, src ); 431} 432 433 434 435static unsigned 436translate_opcode( unsigned op ) 437{ 438 switch( op ) { 439 case OPCODE_ARL: 440 return TGSI_OPCODE_ARL; 441 case OPCODE_ABS: 442 return TGSI_OPCODE_ABS; 443 case OPCODE_ADD: 444 return TGSI_OPCODE_ADD; 445 case OPCODE_BGNLOOP: 446 return TGSI_OPCODE_BGNLOOP; 447 case OPCODE_BGNSUB: 448 return TGSI_OPCODE_BGNSUB; 449 case OPCODE_BRA: 450 return TGSI_OPCODE_BRA; 451 case OPCODE_BRK: 452 return TGSI_OPCODE_BRK; 453 case OPCODE_CAL: 454 return TGSI_OPCODE_CAL; 455 case OPCODE_CMP: 456 return TGSI_OPCODE_CMP; 457 case OPCODE_CONT: 458 return TGSI_OPCODE_CONT; 459 case OPCODE_COS: 460 return TGSI_OPCODE_COS; 461 case OPCODE_DDX: 462 return TGSI_OPCODE_DDX; 463 case OPCODE_DDY: 464 return TGSI_OPCODE_DDY; 465 case OPCODE_DP2: 466 return TGSI_OPCODE_DP2; 467 case OPCODE_DP2A: 468 return TGSI_OPCODE_DP2A; 469 case OPCODE_DP3: 470 return TGSI_OPCODE_DP3; 471 case OPCODE_DP4: 472 return TGSI_OPCODE_DP4; 473 case OPCODE_DPH: 474 return TGSI_OPCODE_DPH; 475 case OPCODE_DST: 476 return TGSI_OPCODE_DST; 477 case OPCODE_ELSE: 478 return TGSI_OPCODE_ELSE; 479 case OPCODE_ENDIF: 480 return TGSI_OPCODE_ENDIF; 481 case OPCODE_ENDLOOP: 482 return TGSI_OPCODE_ENDLOOP; 483 case OPCODE_ENDSUB: 484 return TGSI_OPCODE_ENDSUB; 485 case OPCODE_EX2: 486 return TGSI_OPCODE_EX2; 487 case OPCODE_EXP: 488 return TGSI_OPCODE_EXP; 489 case OPCODE_FLR: 490 return TGSI_OPCODE_FLR; 491 case OPCODE_FRC: 492 return TGSI_OPCODE_FRC; 493 case OPCODE_IF: 494 return TGSI_OPCODE_IF; 495 case OPCODE_TRUNC: 496 return TGSI_OPCODE_TRUNC; 497 case OPCODE_KIL: 498 return TGSI_OPCODE_KIL; 499 case OPCODE_KIL_NV: 500 return TGSI_OPCODE_KILP; 501 case OPCODE_LG2: 502 return TGSI_OPCODE_LG2; 503 case OPCODE_LOG: 504 return TGSI_OPCODE_LOG; 505 case OPCODE_LIT: 506 return TGSI_OPCODE_LIT; 507 case OPCODE_LRP: 508 return TGSI_OPCODE_LRP; 509 case OPCODE_MAD: 510 return TGSI_OPCODE_MAD; 511 case OPCODE_MAX: 512 return TGSI_OPCODE_MAX; 513 case OPCODE_MIN: 514 return TGSI_OPCODE_MIN; 515 case OPCODE_MOV: 516 return TGSI_OPCODE_MOV; 517 case OPCODE_MUL: 518 return TGSI_OPCODE_MUL; 519 case OPCODE_NOP: 520 return TGSI_OPCODE_NOP; 521 case OPCODE_NRM3: 522 return TGSI_OPCODE_NRM; 523 case OPCODE_NRM4: 524 return TGSI_OPCODE_NRM4; 525 case OPCODE_POW: 526 return TGSI_OPCODE_POW; 527 case OPCODE_RCP: 528 return TGSI_OPCODE_RCP; 529 case OPCODE_RET: 530 return TGSI_OPCODE_RET; 531 case OPCODE_RSQ: 532 return TGSI_OPCODE_RSQ; 533 case OPCODE_SCS: 534 return TGSI_OPCODE_SCS; 535 case OPCODE_SEQ: 536 return TGSI_OPCODE_SEQ; 537 case OPCODE_SGE: 538 return TGSI_OPCODE_SGE; 539 case OPCODE_SGT: 540 return TGSI_OPCODE_SGT; 541 case OPCODE_SIN: 542 return TGSI_OPCODE_SIN; 543 case OPCODE_SLE: 544 return TGSI_OPCODE_SLE; 545 case OPCODE_SLT: 546 return TGSI_OPCODE_SLT; 547 case OPCODE_SNE: 548 return TGSI_OPCODE_SNE; 549 case OPCODE_SSG: 550 return TGSI_OPCODE_SSG; 551 case OPCODE_SUB: 552 return TGSI_OPCODE_SUB; 553 case OPCODE_TEX: 554 return TGSI_OPCODE_TEX; 555 case OPCODE_TXB: 556 return TGSI_OPCODE_TXB; 557 case OPCODE_TXD: 558 return TGSI_OPCODE_TXD; 559 case OPCODE_TXL: 560 return TGSI_OPCODE_TXL; 561 case OPCODE_TXP: 562 return TGSI_OPCODE_TXP; 563 case OPCODE_XPD: 564 return TGSI_OPCODE_XPD; 565 case OPCODE_END: 566 return TGSI_OPCODE_END; 567 default: 568 debug_assert( 0 ); 569 return TGSI_OPCODE_NOP; 570 } 571} 572 573 574static void 575compile_instruction( 576 struct st_translate *t, 577 const struct prog_instruction *inst ) 578{ 579 struct ureg_program *ureg = t->ureg; 580 GLuint i; 581 struct ureg_dst dst[1]; 582 struct ureg_src src[4]; 583 unsigned num_dst; 584 unsigned num_src; 585 586 num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); 587 num_src = _mesa_num_inst_src_regs( inst->Opcode ); 588 589 if (num_dst) 590 dst[0] = translate_dst( t, 591 &inst->DstReg, 592 inst->SaturateMode ); 593 594 for (i = 0; i < num_src; i++) 595 src[i] = translate_src( t, &inst->SrcReg[i] ); 596 597 switch( inst->Opcode ) { 598 case OPCODE_SWZ: 599 emit_swz( t, dst[0], &inst->SrcReg[0] ); 600 return; 601 602 case OPCODE_BGNLOOP: 603 case OPCODE_CAL: 604 case OPCODE_ELSE: 605 case OPCODE_ENDLOOP: 606 case OPCODE_IF: 607 debug_assert(num_dst == 0); 608 ureg_label_insn( ureg, 609 translate_opcode( inst->Opcode ), 610 src, num_src, 611 get_label( t, inst->BranchTarget )); 612 return; 613 614 case OPCODE_TEX: 615 case OPCODE_TXB: 616 case OPCODE_TXD: 617 case OPCODE_TXL: 618 case OPCODE_TXP: 619 src[num_src++] = t->samplers[inst->TexSrcUnit]; 620 ureg_tex_insn( ureg, 621 translate_opcode( inst->Opcode ), 622 dst, num_dst, 623 translate_texture_target( inst->TexSrcTarget, 624 inst->TexShadow ), 625 src, num_src ); 626 return; 627 628 case OPCODE_SCS: 629 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); 630 ureg_insn( ureg, 631 translate_opcode( inst->Opcode ), 632 dst, num_dst, 633 src, num_src ); 634 break; 635 636 case OPCODE_XPD: 637 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); 638 ureg_insn( ureg, 639 translate_opcode( inst->Opcode ), 640 dst, num_dst, 641 src, num_src ); 642 break; 643 644 case OPCODE_NOISE1: 645 case OPCODE_NOISE2: 646 case OPCODE_NOISE3: 647 case OPCODE_NOISE4: 648 /* At some point, a motivated person could add a better 649 * implementation of noise. Currently not even the nvidia 650 * binary drivers do anything more than this. In any case, the 651 * place to do this is in the GL state tracker, not the poor 652 * driver. 653 */ 654 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); 655 break; 656 657 case OPCODE_DDY: 658 emit_ddy( t, dst[0], &inst->SrcReg[0] ); 659 break; 660 661 default: 662 ureg_insn( ureg, 663 translate_opcode( inst->Opcode ), 664 dst, num_dst, 665 src, num_src ); 666 break; 667 } 668} 669 670 671/** 672 * Emit the TGSI instructions for inverting the WPOS y coordinate. 673 */ 674static void 675emit_inverted_wpos( struct st_translate *t, 676 const struct gl_program *program ) 677{ 678 struct ureg_program *ureg = t->ureg; 679 680 /* Fragment program uses fragment position input. 681 * Need to replace instances of INPUT[WPOS] with temp T 682 * where T = INPUT[WPOS] by y is inverted. 683 */ 684 static const gl_state_index winSizeState[STATE_LENGTH] 685 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; 686 687 /* XXX: note we are modifying the incoming shader here! Need to 688 * do this before emitting the constant decls below, or this 689 * will be missed: 690 */ 691 unsigned winHeightConst = _mesa_add_state_reference(program->Parameters, 692 winSizeState); 693 694 struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst ); 695 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 696 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 697 698 /* MOV wpos_temp, input[wpos] 699 */ 700 ureg_MOV( ureg, wpos_temp, wpos_input ); 701 702 /* SUB wpos_temp.y, winsize_const, wpos_input 703 */ 704 ureg_SUB( ureg, 705 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 706 winsize, 707 wpos_input); 708 709 /* Use wpos_temp as position input from here on: 710 */ 711 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 712} 713 714 715/** 716 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 717 * TGSI uses +1 for front, -1 for back. 718 * This function converts the TGSI value to the GL value. Simply clamping/ 719 * saturating the value to [0,1] does the job. 720 */ 721static void 722emit_face_var( struct st_translate *t, 723 const struct gl_program *program ) 724{ 725 struct ureg_program *ureg = t->ureg; 726 struct ureg_dst face_temp = ureg_DECL_temporary( ureg ); 727 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 728 729 /* MOV_SAT face_temp, input[face] 730 */ 731 face_temp = ureg_saturate( face_temp ); 732 ureg_MOV( ureg, face_temp, face_input ); 733 734 /* Use face_temp as face input from here on: 735 */ 736 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 737} 738 739static void 740emit_edgeflags( struct st_translate *t, 741 const struct gl_program *program ) 742{ 743 struct ureg_program *ureg = t->ureg; 744 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 745 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 746 747 ureg_MOV( ureg, edge_dst, edge_src ); 748} 749 750/** 751 * Translate Mesa program to TGSI format. 752 * \param program the program to translate 753 * \param numInputs number of input registers used 754 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 755 * input indexes 756 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 757 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 758 * each input 759 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 760 * \param numOutputs number of output registers used 761 * \param outputMapping maps Mesa fragment program outputs to TGSI 762 * generic outputs 763 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 764 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 765 * each output 766 * 767 * \return array of translated tokens, caller's responsibility to free 768 */ 769enum pipe_error 770st_translate_mesa_program( 771 GLcontext *ctx, 772 uint procType, 773 struct ureg_program *ureg, 774 const struct gl_program *program, 775 GLuint numInputs, 776 const GLuint inputMapping[], 777 const ubyte inputSemanticName[], 778 const ubyte inputSemanticIndex[], 779 const GLuint interpMode[], 780 GLuint numOutputs, 781 const GLuint outputMapping[], 782 const ubyte outputSemanticName[], 783 const ubyte outputSemanticIndex[], 784 boolean passthrough_edgeflags ) 785{ 786 struct st_translate translate, *t; 787 unsigned i; 788 789 t = &translate; 790 memset(t, 0, sizeof *t); 791 792 t->procType = procType; 793 t->inputMapping = inputMapping; 794 t->outputMapping = outputMapping; 795 t->ureg = ureg; 796 t->psizoutindex = -1; 797 t->prevInstWrotePsiz = GL_FALSE; 798 799 /*_mesa_print_program(program);*/ 800 801 /* 802 * Declare input attributes. 803 */ 804 if (procType == TGSI_PROCESSOR_FRAGMENT) { 805 for (i = 0; i < numInputs; i++) { 806 t->inputs[i] = ureg_DECL_fs_input(ureg, 807 inputSemanticName[i], 808 inputSemanticIndex[i], 809 interpMode[i]); 810 } 811 812 if (program->InputsRead & FRAG_BIT_WPOS) { 813 /* Must do this after setting up t->inputs, and before 814 * emitting constant references, below: 815 */ 816 emit_inverted_wpos( t, program ); 817 } 818 819 if (program->InputsRead & FRAG_BIT_FACE) { 820 emit_face_var( t, program ); 821 } 822 823 /* 824 * Declare output attributes. 825 */ 826 for (i = 0; i < numOutputs; i++) { 827 switch (outputSemanticName[i]) { 828 case TGSI_SEMANTIC_POSITION: 829 t->outputs[i] = ureg_DECL_output( ureg, 830 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 831 outputSemanticIndex[i] ); 832 833 t->outputs[i] = ureg_writemask( t->outputs[i], 834 TGSI_WRITEMASK_Z ); 835 break; 836 case TGSI_SEMANTIC_COLOR: 837 t->outputs[i] = ureg_DECL_output( ureg, 838 TGSI_SEMANTIC_COLOR, 839 outputSemanticIndex[i] ); 840 break; 841 default: 842 debug_assert(0); 843 return 0; 844 } 845 } 846 } 847 else { 848 for (i = 0; i < numInputs; i++) { 849 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 850 } 851 852 for (i = 0; i < numOutputs; i++) { 853 t->outputs[i] = ureg_DECL_output( ureg, 854 outputSemanticName[i], 855 outputSemanticIndex[i] ); 856 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) { 857 static const gl_state_index pointSizeClampState[STATE_LENGTH] 858 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 }; 859 /* XXX: note we are modifying the incoming shader here! Need to 860 * do this before emitting the constant decls below, or this 861 * will be missed: 862 */ 863 unsigned pointSizeClampConst = _mesa_add_state_reference(program->Parameters, 864 pointSizeClampState); 865 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); 866 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); 867 t->psizregreal = t->outputs[i]; 868 t->psizoutindex = i; 869 t->outputs[i] = psizregtemp; 870 } 871 } 872 if (passthrough_edgeflags) 873 emit_edgeflags( t, program ); 874 } 875 876 /* Declare address register. 877 */ 878 if (program->NumAddressRegs > 0) { 879 debug_assert( program->NumAddressRegs == 1 ); 880 t->address[0] = ureg_DECL_address( ureg ); 881 } 882 883 884 /* Emit constants and immediates. Mesa uses a single index space 885 * for these, so we put all the translated regs in t->constants. 886 */ 887 if (program->Parameters) { 888 889 t->constants = CALLOC( program->Parameters->NumParameters, 890 sizeof t->constants[0] ); 891 if (t->constants == NULL) 892 goto out; 893 894 for (i = 0; i < program->Parameters->NumParameters; i++) { 895 switch (program->Parameters->Parameters[i].Type) { 896 case PROGRAM_ENV_PARAM: 897 case PROGRAM_LOCAL_PARAM: 898 case PROGRAM_STATE_VAR: 899 case PROGRAM_NAMED_PARAM: 900 case PROGRAM_UNIFORM: 901 t->constants[i] = ureg_DECL_constant( ureg, i ); 902 break; 903 904 /* Emit immediates only when there is no address register 905 * in use. FIXME: Be smarter and recognize param arrays: 906 * indirect addressing is only valid within the referenced 907 * array. 908 */ 909 case PROGRAM_CONSTANT: 910 if (program->NumAddressRegs > 0) 911 t->constants[i] = ureg_DECL_constant( ureg, i ); 912 else 913 t->constants[i] = 914 ureg_DECL_immediate( ureg, 915 program->Parameters->ParameterValues[i], 916 4 ); 917 break; 918 default: 919 break; 920 } 921 } 922 } 923 924 /* texture samplers */ 925 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 926 if (program->SamplersUsed & (1 << i)) { 927 t->samplers[i] = ureg_DECL_sampler( ureg, i ); 928 } 929 } 930 931 /* Emit each instruction in turn: 932 */ 933 for (i = 0; i < program->NumInstructions; i++) { 934 set_insn_start( t, ureg_get_instruction_number( ureg )); 935 compile_instruction( t, &program->Instructions[i] ); 936 937 /* note can't do that easily at the end of prog due to 938 possible early return */ 939 if (t->prevInstWrotePsiz && program->Id) { 940 set_insn_start( t, ureg_get_instruction_number( ureg )); 941 ureg_MAX( t->ureg, ureg_writemask(t->outputs[t->psizoutindex], WRITEMASK_X), 942 ureg_src(t->outputs[t->psizoutindex]), 943 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 944 ureg_MIN( t->ureg, ureg_writemask(t->psizregreal, WRITEMASK_X), 945 ureg_src(t->outputs[t->psizoutindex]), 946 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 947 } 948 t->prevInstWrotePsiz = GL_FALSE; 949 } 950 951 /* Fix up all emitted labels: 952 */ 953 for (i = 0; i < t->labels_count; i++) { 954 ureg_fixup_label( ureg, 955 t->labels[i].token, 956 t->insn[t->labels[i].branch_target] ); 957 } 958 959 return PIPE_OK; 960 961out: 962 FREE(t->insn); 963 FREE(t->labels); 964 FREE(t->constants); 965 966 if (t->error) { 967 debug_printf("%s: translate error flag set\n", __FUNCTION__); 968 } 969 970 return PIPE_ERROR_OUT_OF_MEMORY; 971} 972 973 974/** 975 * Tokens cannot be free with _mesa_free otherwise the builtin gallium 976 * malloc debugging will get confused. 977 */ 978void 979st_free_tokens(const struct tgsi_token *tokens) 980{ 981 FREE((void *)tokens); 982} 983