st_mesa_to_tgsi.c revision 3b2bdde1b2ee93f77c01f5a94ebb7778192c15f8
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * \author 30 * Michal Krol, 31 * Keith Whitwell 32 */ 33 34#include "pipe/p_compiler.h" 35#include "pipe/p_shader_tokens.h" 36#include "pipe/p_state.h" 37#include "pipe/p_context.h" 38#include "tgsi/tgsi_ureg.h" 39#include "st_mesa_to_tgsi.h" 40#include "st_context.h" 41#include "shader/prog_instruction.h" 42#include "shader/prog_parameter.h" 43#include "util/u_debug.h" 44#include "util/u_math.h" 45#include "util/u_memory.h" 46 47struct label { 48 unsigned branch_target; 49 unsigned token; 50}; 51 52 53/** 54 * Intermediate state used during shader translation. 55 */ 56struct st_translate { 57 struct ureg_program *ureg; 58 59 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 60 struct ureg_src *constants; 61 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 62 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 63 struct ureg_dst address[1]; 64 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 65 struct ureg_dst psizregreal; 66 struct ureg_src pointSizeConst; 67 GLint psizoutindex; 68 GLboolean prevInstWrotePsiz; 69 70 const GLuint *inputMapping; 71 const GLuint *outputMapping; 72 73 /* For every instruction that contains a label (eg CALL), keep 74 * details so that we can go back afterwards and emit the correct 75 * tgsi instruction number for each label. 76 */ 77 struct label *labels; 78 unsigned labels_size; 79 unsigned labels_count; 80 81 /* Keep a record of the tgsi instruction number that each mesa 82 * instruction starts at, will be used to fix up labels after 83 * translation. 84 */ 85 unsigned *insn; 86 unsigned insn_size; 87 unsigned insn_count; 88 89 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 90 91 boolean error; 92}; 93 94 95static unsigned *get_label( struct st_translate *t, 96 unsigned branch_target ) 97{ 98 unsigned i; 99 100 if (t->labels_count + 1 >= t->labels_size) { 101 unsigned old_size = t->labels_size; 102 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 103 t->labels = REALLOC( t->labels, 104 old_size * sizeof t->labels[0], 105 t->labels_size * sizeof t->labels[0] ); 106 if (t->labels == NULL) { 107 static unsigned dummy; 108 t->error = TRUE; 109 return &dummy; 110 } 111 } 112 113 i = t->labels_count++; 114 t->labels[i].branch_target = branch_target; 115 return &t->labels[i].token; 116} 117 118 119static void set_insn_start( struct st_translate *t, 120 unsigned start ) 121{ 122 if (t->insn_count + 1 >= t->insn_size) { 123 unsigned old_size = t->insn_size; 124 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 125 t->insn = REALLOC( t->insn, 126 old_size * sizeof t->insn[0], 127 t->insn_size * sizeof t->insn[0] ); 128 if (t->insn == NULL) { 129 t->error = TRUE; 130 return; 131 } 132 } 133 134 t->insn[t->insn_count++] = start; 135} 136 137 138/* 139 * Map mesa register file to TGSI register file. 140 */ 141static struct ureg_dst 142dst_register( struct st_translate *t, 143 gl_register_file file, 144 GLuint index ) 145{ 146 switch( file ) { 147 case PROGRAM_UNDEFINED: 148 return ureg_dst_undef(); 149 150 case PROGRAM_TEMPORARY: 151 if (ureg_dst_is_undef(t->temps[index])) 152 t->temps[index] = ureg_DECL_temporary( t->ureg ); 153 154 return t->temps[index]; 155 156 case PROGRAM_OUTPUT: 157 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 158 t->prevInstWrotePsiz = GL_TRUE; 159 160 if (t->procType == TGSI_PROCESSOR_VERTEX) 161 assert(index < VERT_RESULT_MAX); 162 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 163 assert(index < FRAG_RESULT_MAX); 164 else 165 assert(0 && "geom shaders not handled in dst_register() yet"); 166 167 assert(t->outputMapping[index] < Elements(t->outputs)); 168 169 return t->outputs[t->outputMapping[index]]; 170 171 case PROGRAM_ADDRESS: 172 return t->address[index]; 173 174 default: 175 debug_assert( 0 ); 176 return ureg_dst_undef(); 177 } 178} 179 180 181static struct ureg_src 182src_register( struct st_translate *t, 183 gl_register_file file, 184 GLint index ) 185{ 186 switch( file ) { 187 case PROGRAM_UNDEFINED: 188 return ureg_src_undef(); 189 190 case PROGRAM_TEMPORARY: 191 ASSERT(index >= 0); 192 if (ureg_dst_is_undef(t->temps[index])) 193 t->temps[index] = ureg_DECL_temporary( t->ureg ); 194 assert(index < Elements(t->temps)); 195 return ureg_src(t->temps[index]); 196 197 case PROGRAM_NAMED_PARAM: 198 case PROGRAM_ENV_PARAM: 199 case PROGRAM_LOCAL_PARAM: 200 case PROGRAM_UNIFORM: 201 ASSERT(index >= 0); 202 return t->constants[index]; 203 case PROGRAM_STATE_VAR: 204 case PROGRAM_CONSTANT: /* ie, immediate */ 205 if (index < 0) 206 return ureg_DECL_constant( t->ureg, 0 ); 207 else 208 return t->constants[index]; 209 210 case PROGRAM_INPUT: 211 assert(t->inputMapping[index] < Elements(t->inputs)); 212 return t->inputs[t->inputMapping[index]]; 213 214 case PROGRAM_OUTPUT: 215 assert(t->outputMapping[index] < Elements(t->outputs)); 216 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 217 218 case PROGRAM_ADDRESS: 219 return ureg_src(t->address[index]); 220 221 default: 222 debug_assert( 0 ); 223 return ureg_src_undef(); 224 } 225} 226 227 228/** 229 * Map mesa texture target to TGSI texture target. 230 */ 231static unsigned 232translate_texture_target( GLuint textarget, 233 GLboolean shadow ) 234{ 235 if (shadow) { 236 switch( textarget ) { 237 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D; 238 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D; 239 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT; 240 default: break; 241 } 242 } 243 244 switch( textarget ) { 245 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; 246 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; 247 case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; 248 case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; 249 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; 250 default: 251 debug_assert( 0 ); 252 return TGSI_TEXTURE_1D; 253 } 254} 255 256 257static struct ureg_dst 258translate_dst( struct st_translate *t, 259 const struct prog_dst_register *DstReg, 260 boolean saturate ) 261{ 262 struct ureg_dst dst = dst_register( t, 263 DstReg->File, 264 DstReg->Index ); 265 266 dst = ureg_writemask( dst, 267 DstReg->WriteMask ); 268 269 if (saturate) 270 dst = ureg_saturate( dst ); 271 272 if (DstReg->RelAddr) 273 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); 274 275 return dst; 276} 277 278 279static struct ureg_src 280translate_src( struct st_translate *t, 281 const struct prog_src_register *SrcReg ) 282{ 283 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 284 285 src = ureg_swizzle( src, 286 GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, 287 GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, 288 GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3, 289 GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3); 290 291 if (SrcReg->Negate == NEGATE_XYZW) 292 src = ureg_negate(src); 293 294 if (SrcReg->Abs) 295 src = ureg_abs(src); 296 297 if (SrcReg->RelAddr) { 298 src = ureg_src_indirect( src, ureg_src(t->address[0])); 299 /* If SrcReg->Index was negative, it was set to zero in 300 * src_register(). Reassign it now. 301 */ 302 src.Index = SrcReg->Index; 303 } 304 305 return src; 306} 307 308 309static struct ureg_src swizzle_4v( struct ureg_src src, 310 const unsigned *swz ) 311{ 312 return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] ); 313} 314 315 316/** 317 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: 318 * 319 * SWZ dst, src.x-y10 320 * 321 * becomes: 322 * 323 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} 324 */ 325static void emit_swz( struct st_translate *t, 326 struct ureg_dst dst, 327 const struct prog_src_register *SrcReg ) 328{ 329 struct ureg_program *ureg = t->ureg; 330 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 331 332 unsigned negate_mask = SrcReg->Negate; 333 334 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | 335 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | 336 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | 337 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); 338 339 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | 340 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | 341 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | 342 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); 343 344 unsigned negative_one_mask = one_mask & negate_mask; 345 unsigned positive_one_mask = one_mask & ~negate_mask; 346 347 struct ureg_src imm; 348 unsigned i; 349 unsigned mul_swizzle[4] = {0,0,0,0}; 350 unsigned add_swizzle[4] = {0,0,0,0}; 351 unsigned src_swizzle[4] = {0,0,0,0}; 352 boolean need_add = FALSE; 353 boolean need_mul = FALSE; 354 355 if (dst.WriteMask == 0) 356 return; 357 358 /* Is this just a MOV? 359 */ 360 if (zero_mask == 0 && 361 one_mask == 0 && 362 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) 363 { 364 ureg_MOV( ureg, dst, translate_src( t, SrcReg )); 365 return; 366 } 367 368#define IMM_ZERO 0 369#define IMM_ONE 1 370#define IMM_NEG_ONE 2 371 372 imm = ureg_imm3f( ureg, 0, 1, -1 ); 373 374 for (i = 0; i < 4; i++) { 375 unsigned bit = 1 << i; 376 377 if (dst.WriteMask & bit) { 378 if (positive_one_mask & bit) { 379 mul_swizzle[i] = IMM_ZERO; 380 add_swizzle[i] = IMM_ONE; 381 need_add = TRUE; 382 } 383 else if (negative_one_mask & bit) { 384 mul_swizzle[i] = IMM_ZERO; 385 add_swizzle[i] = IMM_NEG_ONE; 386 need_add = TRUE; 387 } 388 else if (zero_mask & bit) { 389 mul_swizzle[i] = IMM_ZERO; 390 add_swizzle[i] = IMM_ZERO; 391 need_add = TRUE; 392 } 393 else { 394 add_swizzle[i] = IMM_ZERO; 395 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); 396 need_mul = TRUE; 397 if (negate_mask & bit) { 398 mul_swizzle[i] = IMM_NEG_ONE; 399 } 400 else { 401 mul_swizzle[i] = IMM_ONE; 402 } 403 } 404 } 405 } 406 407 if (need_mul && need_add) { 408 ureg_MAD( ureg, 409 dst, 410 swizzle_4v( src, src_swizzle ), 411 swizzle_4v( imm, mul_swizzle ), 412 swizzle_4v( imm, add_swizzle ) ); 413 } 414 else if (need_mul) { 415 ureg_MUL( ureg, 416 dst, 417 swizzle_4v( src, src_swizzle ), 418 swizzle_4v( imm, mul_swizzle ) ); 419 } 420 else if (need_add) { 421 ureg_MOV( ureg, 422 dst, 423 swizzle_4v( imm, add_swizzle ) ); 424 } 425 else { 426 debug_assert(0); 427 } 428 429#undef IMM_ZERO 430#undef IMM_ONE 431#undef IMM_NEG_ONE 432} 433 434 435/** 436 * Negate the value of DDY to match GL semantics where (0,0) is the 437 * lower-left corner of the window. 438 * Note that the GL_ARB_fragment_coord_conventions extension will 439 * effect this someday. 440 */ 441static void emit_ddy( struct st_translate *t, 442 struct ureg_dst dst, 443 const struct prog_src_register *SrcReg ) 444{ 445 struct ureg_program *ureg = t->ureg; 446 struct ureg_src src = translate_src( t, SrcReg ); 447 src = ureg_negate( src ); 448 ureg_DDY( ureg, dst, src ); 449} 450 451 452 453static unsigned 454translate_opcode( unsigned op ) 455{ 456 switch( op ) { 457 case OPCODE_ARL: 458 return TGSI_OPCODE_ARL; 459 case OPCODE_ABS: 460 return TGSI_OPCODE_ABS; 461 case OPCODE_ADD: 462 return TGSI_OPCODE_ADD; 463 case OPCODE_BGNLOOP: 464 return TGSI_OPCODE_BGNLOOP; 465 case OPCODE_BGNSUB: 466 return TGSI_OPCODE_BGNSUB; 467 case OPCODE_BRA: 468 return TGSI_OPCODE_BRA; 469 case OPCODE_BRK: 470 return TGSI_OPCODE_BRK; 471 case OPCODE_CAL: 472 return TGSI_OPCODE_CAL; 473 case OPCODE_CMP: 474 return TGSI_OPCODE_CMP; 475 case OPCODE_CONT: 476 return TGSI_OPCODE_CONT; 477 case OPCODE_COS: 478 return TGSI_OPCODE_COS; 479 case OPCODE_DDX: 480 return TGSI_OPCODE_DDX; 481 case OPCODE_DDY: 482 return TGSI_OPCODE_DDY; 483 case OPCODE_DP2: 484 return TGSI_OPCODE_DP2; 485 case OPCODE_DP2A: 486 return TGSI_OPCODE_DP2A; 487 case OPCODE_DP3: 488 return TGSI_OPCODE_DP3; 489 case OPCODE_DP4: 490 return TGSI_OPCODE_DP4; 491 case OPCODE_DPH: 492 return TGSI_OPCODE_DPH; 493 case OPCODE_DST: 494 return TGSI_OPCODE_DST; 495 case OPCODE_ELSE: 496 return TGSI_OPCODE_ELSE; 497 case OPCODE_ENDIF: 498 return TGSI_OPCODE_ENDIF; 499 case OPCODE_ENDLOOP: 500 return TGSI_OPCODE_ENDLOOP; 501 case OPCODE_ENDSUB: 502 return TGSI_OPCODE_ENDSUB; 503 case OPCODE_EX2: 504 return TGSI_OPCODE_EX2; 505 case OPCODE_EXP: 506 return TGSI_OPCODE_EXP; 507 case OPCODE_FLR: 508 return TGSI_OPCODE_FLR; 509 case OPCODE_FRC: 510 return TGSI_OPCODE_FRC; 511 case OPCODE_IF: 512 return TGSI_OPCODE_IF; 513 case OPCODE_TRUNC: 514 return TGSI_OPCODE_TRUNC; 515 case OPCODE_KIL: 516 return TGSI_OPCODE_KIL; 517 case OPCODE_KIL_NV: 518 return TGSI_OPCODE_KILP; 519 case OPCODE_LG2: 520 return TGSI_OPCODE_LG2; 521 case OPCODE_LOG: 522 return TGSI_OPCODE_LOG; 523 case OPCODE_LIT: 524 return TGSI_OPCODE_LIT; 525 case OPCODE_LRP: 526 return TGSI_OPCODE_LRP; 527 case OPCODE_MAD: 528 return TGSI_OPCODE_MAD; 529 case OPCODE_MAX: 530 return TGSI_OPCODE_MAX; 531 case OPCODE_MIN: 532 return TGSI_OPCODE_MIN; 533 case OPCODE_MOV: 534 return TGSI_OPCODE_MOV; 535 case OPCODE_MUL: 536 return TGSI_OPCODE_MUL; 537 case OPCODE_NOP: 538 return TGSI_OPCODE_NOP; 539 case OPCODE_NRM3: 540 return TGSI_OPCODE_NRM; 541 case OPCODE_NRM4: 542 return TGSI_OPCODE_NRM4; 543 case OPCODE_POW: 544 return TGSI_OPCODE_POW; 545 case OPCODE_RCP: 546 return TGSI_OPCODE_RCP; 547 case OPCODE_RET: 548 return TGSI_OPCODE_RET; 549 case OPCODE_RSQ: 550 return TGSI_OPCODE_RSQ; 551 case OPCODE_SCS: 552 return TGSI_OPCODE_SCS; 553 case OPCODE_SEQ: 554 return TGSI_OPCODE_SEQ; 555 case OPCODE_SGE: 556 return TGSI_OPCODE_SGE; 557 case OPCODE_SGT: 558 return TGSI_OPCODE_SGT; 559 case OPCODE_SIN: 560 return TGSI_OPCODE_SIN; 561 case OPCODE_SLE: 562 return TGSI_OPCODE_SLE; 563 case OPCODE_SLT: 564 return TGSI_OPCODE_SLT; 565 case OPCODE_SNE: 566 return TGSI_OPCODE_SNE; 567 case OPCODE_SSG: 568 return TGSI_OPCODE_SSG; 569 case OPCODE_SUB: 570 return TGSI_OPCODE_SUB; 571 case OPCODE_TEX: 572 return TGSI_OPCODE_TEX; 573 case OPCODE_TXB: 574 return TGSI_OPCODE_TXB; 575 case OPCODE_TXD: 576 return TGSI_OPCODE_TXD; 577 case OPCODE_TXL: 578 return TGSI_OPCODE_TXL; 579 case OPCODE_TXP: 580 return TGSI_OPCODE_TXP; 581 case OPCODE_XPD: 582 return TGSI_OPCODE_XPD; 583 case OPCODE_END: 584 return TGSI_OPCODE_END; 585 default: 586 debug_assert( 0 ); 587 return TGSI_OPCODE_NOP; 588 } 589} 590 591 592static void 593compile_instruction( 594 struct st_translate *t, 595 const struct prog_instruction *inst ) 596{ 597 struct ureg_program *ureg = t->ureg; 598 GLuint i; 599 struct ureg_dst dst[1]; 600 struct ureg_src src[4]; 601 unsigned num_dst; 602 unsigned num_src; 603 604 num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); 605 num_src = _mesa_num_inst_src_regs( inst->Opcode ); 606 607 if (num_dst) 608 dst[0] = translate_dst( t, 609 &inst->DstReg, 610 inst->SaturateMode ); 611 612 for (i = 0; i < num_src; i++) 613 src[i] = translate_src( t, &inst->SrcReg[i] ); 614 615 switch( inst->Opcode ) { 616 case OPCODE_SWZ: 617 emit_swz( t, dst[0], &inst->SrcReg[0] ); 618 return; 619 620 case OPCODE_BGNLOOP: 621 case OPCODE_CAL: 622 case OPCODE_ELSE: 623 case OPCODE_ENDLOOP: 624 case OPCODE_IF: 625 debug_assert(num_dst == 0); 626 ureg_label_insn( ureg, 627 translate_opcode( inst->Opcode ), 628 src, num_src, 629 get_label( t, inst->BranchTarget )); 630 return; 631 632 case OPCODE_TEX: 633 case OPCODE_TXB: 634 case OPCODE_TXD: 635 case OPCODE_TXL: 636 case OPCODE_TXP: 637 src[num_src++] = t->samplers[inst->TexSrcUnit]; 638 ureg_tex_insn( ureg, 639 translate_opcode( inst->Opcode ), 640 dst, num_dst, 641 translate_texture_target( inst->TexSrcTarget, 642 inst->TexShadow ), 643 src, num_src ); 644 return; 645 646 case OPCODE_SCS: 647 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); 648 ureg_insn( ureg, 649 translate_opcode( inst->Opcode ), 650 dst, num_dst, 651 src, num_src ); 652 break; 653 654 case OPCODE_XPD: 655 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); 656 ureg_insn( ureg, 657 translate_opcode( inst->Opcode ), 658 dst, num_dst, 659 src, num_src ); 660 break; 661 662 case OPCODE_NOISE1: 663 case OPCODE_NOISE2: 664 case OPCODE_NOISE3: 665 case OPCODE_NOISE4: 666 /* At some point, a motivated person could add a better 667 * implementation of noise. Currently not even the nvidia 668 * binary drivers do anything more than this. In any case, the 669 * place to do this is in the GL state tracker, not the poor 670 * driver. 671 */ 672 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); 673 break; 674 675 case OPCODE_DDY: 676 emit_ddy( t, dst[0], &inst->SrcReg[0] ); 677 break; 678 679 default: 680 ureg_insn( ureg, 681 translate_opcode( inst->Opcode ), 682 dst, num_dst, 683 src, num_src ); 684 break; 685 } 686} 687 688/** 689 * Emit the TGSI instructions to adjust the WPOS pixel center convention 690 */ 691static void 692emit_adjusted_wpos( struct st_translate *t, 693 const struct gl_program *program, GLfloat value) 694{ 695 struct ureg_program *ureg = t->ureg; 696 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 697 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 698 699 ureg_ADD(ureg, ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y), 700 wpos_input, ureg_imm1f(ureg, value)); 701 702 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 703} 704 705/** 706 * Emit the TGSI instructions for inverting the WPOS y coordinate. 707 */ 708static void 709emit_inverted_wpos( struct st_translate *t, 710 const struct gl_program *program ) 711{ 712 struct ureg_program *ureg = t->ureg; 713 714 /* Fragment program uses fragment position input. 715 * Need to replace instances of INPUT[WPOS] with temp T 716 * where T = INPUT[WPOS] by y is inverted. 717 */ 718 static const gl_state_index winSizeState[STATE_LENGTH] 719 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; 720 721 /* XXX: note we are modifying the incoming shader here! Need to 722 * do this before emitting the constant decls below, or this 723 * will be missed: 724 */ 725 unsigned winHeightConst = _mesa_add_state_reference(program->Parameters, 726 winSizeState); 727 728 struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst ); 729 struct ureg_dst wpos_temp; 730 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 731 732 /* MOV wpos_temp, input[wpos] 733 */ 734 if (wpos_input.File == TGSI_FILE_TEMPORARY) 735 wpos_temp = ureg_dst(wpos_input); 736 else { 737 wpos_temp = ureg_DECL_temporary( ureg ); 738 ureg_MOV( ureg, wpos_temp, wpos_input ); 739 } 740 741 /* SUB wpos_temp.y, winsize_const, wpos_input 742 */ 743 ureg_SUB( ureg, 744 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 745 winsize, 746 wpos_input); 747 748 /* Use wpos_temp as position input from here on: 749 */ 750 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 751} 752 753 754/** 755 * Emit fragment position/ooordinate code. 756 */ 757static void 758emit_wpos(struct st_context *st, 759 struct st_translate *t, 760 const struct gl_program *program, 761 struct ureg_program *ureg) 762{ 763 const struct gl_fragment_program *fp = 764 (const struct gl_fragment_program *) program; 765 struct pipe_screen *pscreen = st->pipe->screen; 766 boolean invert = FALSE; 767 768 if (fp->OriginUpperLeft) { 769 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 770 } 771 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 772 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 773 invert = TRUE; 774 } 775 else 776 assert(0); 777 } 778 else { 779 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 780 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 781 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 782 invert = TRUE; 783 else 784 assert(0); 785 } 786 787 if (fp->PixelCenterInteger) { 788 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 789 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 790 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 791 emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f); 792 else 793 assert(0); 794 } 795 else { 796 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 797 } 798 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 799 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 800 emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f); 801 } 802 else 803 assert(0); 804 } 805 806 /* we invert after adjustment so that we avoid the MOV to temporary, 807 * and reuse the adjustment ADD instead */ 808 if (invert) 809 emit_inverted_wpos(t, program); 810} 811 812 813/** 814 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 815 * TGSI uses +1 for front, -1 for back. 816 * This function converts the TGSI value to the GL value. Simply clamping/ 817 * saturating the value to [0,1] does the job. 818 */ 819static void 820emit_face_var( struct st_translate *t, 821 const struct gl_program *program ) 822{ 823 struct ureg_program *ureg = t->ureg; 824 struct ureg_dst face_temp = ureg_DECL_temporary( ureg ); 825 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 826 827 /* MOV_SAT face_temp, input[face] 828 */ 829 face_temp = ureg_saturate( face_temp ); 830 ureg_MOV( ureg, face_temp, face_input ); 831 832 /* Use face_temp as face input from here on: 833 */ 834 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 835} 836 837 838static void 839emit_edgeflags( struct st_translate *t, 840 const struct gl_program *program ) 841{ 842 struct ureg_program *ureg = t->ureg; 843 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 844 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 845 846 ureg_MOV( ureg, edge_dst, edge_src ); 847} 848 849 850/** 851 * Translate Mesa program to TGSI format. 852 * \param program the program to translate 853 * \param numInputs number of input registers used 854 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 855 * input indexes 856 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 857 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 858 * each input 859 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 860 * \param numOutputs number of output registers used 861 * \param outputMapping maps Mesa fragment program outputs to TGSI 862 * generic outputs 863 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 864 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 865 * each output 866 * 867 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 868 */ 869enum pipe_error 870st_translate_mesa_program( 871 GLcontext *ctx, 872 uint procType, 873 struct ureg_program *ureg, 874 const struct gl_program *program, 875 GLuint numInputs, 876 const GLuint inputMapping[], 877 const ubyte inputSemanticName[], 878 const ubyte inputSemanticIndex[], 879 const GLuint interpMode[], 880 GLuint numOutputs, 881 const GLuint outputMapping[], 882 const ubyte outputSemanticName[], 883 const ubyte outputSemanticIndex[], 884 boolean passthrough_edgeflags ) 885{ 886 struct st_translate translate, *t; 887 unsigned i; 888 enum pipe_error ret = PIPE_OK; 889 890 t = &translate; 891 memset(t, 0, sizeof *t); 892 893 t->procType = procType; 894 t->inputMapping = inputMapping; 895 t->outputMapping = outputMapping; 896 t->ureg = ureg; 897 t->psizoutindex = -1; 898 t->prevInstWrotePsiz = GL_FALSE; 899 900 /*_mesa_print_program(program);*/ 901 902 /* 903 * Declare input attributes. 904 */ 905 if (procType == TGSI_PROCESSOR_FRAGMENT) { 906 for (i = 0; i < numInputs; i++) { 907 if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) { 908 t->inputs[i] = ureg_DECL_fs_input_cyl(ureg, 909 inputSemanticName[i], 910 inputSemanticIndex[i], 911 interpMode[i], 912 TGSI_CYLINDRICAL_WRAP_X); 913 } 914 else { 915 t->inputs[i] = ureg_DECL_fs_input(ureg, 916 inputSemanticName[i], 917 inputSemanticIndex[i], 918 interpMode[i]); 919 } 920 } 921 922 if (program->InputsRead & FRAG_BIT_WPOS) { 923 /* Must do this after setting up t->inputs, and before 924 * emitting constant references, below: 925 */ 926 emit_wpos(st_context(ctx), t, program, ureg); 927 } 928 929 if (program->InputsRead & FRAG_BIT_FACE) { 930 emit_face_var( t, program ); 931 } 932 933 /* 934 * Declare output attributes. 935 */ 936 for (i = 0; i < numOutputs; i++) { 937 switch (outputSemanticName[i]) { 938 case TGSI_SEMANTIC_POSITION: 939 t->outputs[i] = ureg_DECL_output( ureg, 940 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 941 outputSemanticIndex[i] ); 942 943 t->outputs[i] = ureg_writemask( t->outputs[i], 944 TGSI_WRITEMASK_Z ); 945 break; 946 case TGSI_SEMANTIC_COLOR: 947 t->outputs[i] = ureg_DECL_output( ureg, 948 TGSI_SEMANTIC_COLOR, 949 outputSemanticIndex[i] ); 950 break; 951 default: 952 debug_assert(0); 953 return 0; 954 } 955 } 956 } 957 else { 958 for (i = 0; i < numInputs; i++) { 959 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 960 } 961 962 for (i = 0; i < numOutputs; i++) { 963 t->outputs[i] = ureg_DECL_output( ureg, 964 outputSemanticName[i], 965 outputSemanticIndex[i] ); 966 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) { 967 static const gl_state_index pointSizeClampState[STATE_LENGTH] 968 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 }; 969 /* XXX: note we are modifying the incoming shader here! Need to 970 * do this before emitting the constant decls below, or this 971 * will be missed: 972 */ 973 unsigned pointSizeClampConst = 974 _mesa_add_state_reference(program->Parameters, 975 pointSizeClampState); 976 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); 977 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); 978 t->psizregreal = t->outputs[i]; 979 t->psizoutindex = i; 980 t->outputs[i] = psizregtemp; 981 } 982 } 983 if (passthrough_edgeflags) 984 emit_edgeflags( t, program ); 985 } 986 987 /* Declare address register. 988 */ 989 if (program->NumAddressRegs > 0) { 990 debug_assert( program->NumAddressRegs == 1 ); 991 t->address[0] = ureg_DECL_address( ureg ); 992 } 993 994 /* Emit constants and immediates. Mesa uses a single index space 995 * for these, so we put all the translated regs in t->constants. 996 */ 997 if (program->Parameters) { 998 t->constants = CALLOC( program->Parameters->NumParameters, 999 sizeof t->constants[0] ); 1000 if (t->constants == NULL) { 1001 ret = PIPE_ERROR_OUT_OF_MEMORY; 1002 goto out; 1003 } 1004 1005 for (i = 0; i < program->Parameters->NumParameters; i++) { 1006 switch (program->Parameters->Parameters[i].Type) { 1007 case PROGRAM_ENV_PARAM: 1008 case PROGRAM_LOCAL_PARAM: 1009 case PROGRAM_STATE_VAR: 1010 case PROGRAM_NAMED_PARAM: 1011 case PROGRAM_UNIFORM: 1012 t->constants[i] = ureg_DECL_constant( ureg, i ); 1013 break; 1014 1015 /* Emit immediates only when there is no address register 1016 * in use. FIXME: Be smarter and recognize param arrays: 1017 * indirect addressing is only valid within the referenced 1018 * array. 1019 */ 1020 case PROGRAM_CONSTANT: 1021 if (program->NumAddressRegs > 0) 1022 t->constants[i] = ureg_DECL_constant( ureg, i ); 1023 else 1024 t->constants[i] = 1025 ureg_DECL_immediate( ureg, 1026 program->Parameters->ParameterValues[i], 1027 4 ); 1028 break; 1029 default: 1030 break; 1031 } 1032 } 1033 } 1034 1035 /* texture samplers */ 1036 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 1037 if (program->SamplersUsed & (1 << i)) { 1038 t->samplers[i] = ureg_DECL_sampler( ureg, i ); 1039 } 1040 } 1041 1042 /* Emit each instruction in turn: 1043 */ 1044 for (i = 0; i < program->NumInstructions; i++) { 1045 set_insn_start( t, ureg_get_instruction_number( ureg )); 1046 compile_instruction( t, &program->Instructions[i] ); 1047 1048 /* note can't do that easily at the end of prog due to 1049 possible early return */ 1050 if (t->prevInstWrotePsiz && program->Id) { 1051 set_insn_start( t, ureg_get_instruction_number( ureg )); 1052 ureg_MAX( t->ureg, 1053 ureg_writemask(t->outputs[t->psizoutindex], WRITEMASK_X), 1054 ureg_src(t->outputs[t->psizoutindex]), 1055 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 1056 ureg_MIN( t->ureg, ureg_writemask(t->psizregreal, WRITEMASK_X), 1057 ureg_src(t->outputs[t->psizoutindex]), 1058 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 1059 } 1060 t->prevInstWrotePsiz = GL_FALSE; 1061 } 1062 1063 /* Fix up all emitted labels: 1064 */ 1065 for (i = 0; i < t->labels_count; i++) { 1066 ureg_fixup_label( ureg, 1067 t->labels[i].token, 1068 t->insn[t->labels[i].branch_target] ); 1069 } 1070 1071out: 1072 FREE(t->insn); 1073 FREE(t->labels); 1074 FREE(t->constants); 1075 1076 if (t->error) { 1077 debug_printf("%s: translate error flag set\n", __FUNCTION__); 1078 } 1079 1080 return ret; 1081} 1082 1083 1084/** 1085 * Tokens cannot be free with free otherwise the builtin gallium 1086 * malloc debugging will get confused. 1087 */ 1088void 1089st_free_tokens(const struct tgsi_token *tokens) 1090{ 1091 FREE((void *)tokens); 1092} 1093