st_mesa_to_tgsi.c revision 1491c6aa2de17760ab157a3fe71e45006e4eecf6
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * \author 30 * Michal Krol, 31 * Keith Whitwell 32 */ 33 34#include "pipe/p_compiler.h" 35#include "pipe/p_shader_tokens.h" 36#include "pipe/p_state.h" 37#include "pipe/p_context.h" 38#include "tgsi/tgsi_ureg.h" 39#include "st_mesa_to_tgsi.h" 40#include "st_context.h" 41#include "program/prog_instruction.h" 42#include "program/prog_parameter.h" 43#include "util/u_debug.h" 44#include "util/u_math.h" 45#include "util/u_memory.h" 46 47struct label { 48 unsigned branch_target; 49 unsigned token; 50}; 51 52 53/** 54 * Intermediate state used during shader translation. 55 */ 56struct st_translate { 57 struct ureg_program *ureg; 58 59 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 60 struct ureg_src *constants; 61 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 62 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 63 struct ureg_dst address[1]; 64 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 65 66 /* Extra info for handling point size clamping in vertex shader */ 67 struct ureg_dst pointSizeResult; /**< Actual point size output register */ 68 struct ureg_src pointSizeConst; /**< Point size range constant register */ 69 GLint pointSizeOutIndex; /**< Temp point size output register */ 70 GLboolean prevInstWrotePointSize; 71 72 const GLuint *inputMapping; 73 const GLuint *outputMapping; 74 75 /* For every instruction that contains a label (eg CALL), keep 76 * details so that we can go back afterwards and emit the correct 77 * tgsi instruction number for each label. 78 */ 79 struct label *labels; 80 unsigned labels_size; 81 unsigned labels_count; 82 83 /* Keep a record of the tgsi instruction number that each mesa 84 * instruction starts at, will be used to fix up labels after 85 * translation. 86 */ 87 unsigned *insn; 88 unsigned insn_size; 89 unsigned insn_count; 90 91 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 92 93 boolean error; 94}; 95 96 97/** 98 * Make note of a branch to a label in the TGSI code. 99 * After we've emitted all instructions, we'll go over the list 100 * of labels built here and patch the TGSI code with the actual 101 * location of each label. 102 */ 103static unsigned *get_label( struct st_translate *t, 104 unsigned branch_target ) 105{ 106 unsigned i; 107 108 if (t->labels_count + 1 >= t->labels_size) { 109 unsigned old_size = t->labels_size; 110 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 111 t->labels = REALLOC( t->labels, 112 old_size * sizeof t->labels[0], 113 t->labels_size * sizeof t->labels[0] ); 114 if (t->labels == NULL) { 115 static unsigned dummy; 116 t->error = TRUE; 117 return &dummy; 118 } 119 } 120 121 i = t->labels_count++; 122 t->labels[i].branch_target = branch_target; 123 return &t->labels[i].token; 124} 125 126 127/** 128 * Called prior to emitting the TGSI code for each Mesa instruction. 129 * Allocate additional space for instructions if needed. 130 * Update the insn[] array so the next Mesa instruction points to 131 * the next TGSI instruction. 132 */ 133static void set_insn_start( struct st_translate *t, 134 unsigned start ) 135{ 136 if (t->insn_count + 1 >= t->insn_size) { 137 unsigned old_size = t->insn_size; 138 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 139 t->insn = REALLOC( t->insn, 140 old_size * sizeof t->insn[0], 141 t->insn_size * sizeof t->insn[0] ); 142 if (t->insn == NULL) { 143 t->error = TRUE; 144 return; 145 } 146 } 147 148 t->insn[t->insn_count++] = start; 149} 150 151 152/** 153 * Map a Mesa dst register to a TGSI ureg_dst register. 154 */ 155static struct ureg_dst 156dst_register( struct st_translate *t, 157 gl_register_file file, 158 GLuint index ) 159{ 160 switch( file ) { 161 case PROGRAM_UNDEFINED: 162 return ureg_dst_undef(); 163 164 case PROGRAM_TEMPORARY: 165 if (ureg_dst_is_undef(t->temps[index])) 166 t->temps[index] = ureg_DECL_temporary( t->ureg ); 167 168 return t->temps[index]; 169 170 case PROGRAM_OUTPUT: 171 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 172 t->prevInstWrotePointSize = GL_TRUE; 173 174 if (t->procType == TGSI_PROCESSOR_VERTEX) 175 assert(index < VERT_RESULT_MAX); 176 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 177 assert(index < FRAG_RESULT_MAX); 178 else 179 assert(index < GEOM_RESULT_MAX); 180 181 assert(t->outputMapping[index] < Elements(t->outputs)); 182 183 return t->outputs[t->outputMapping[index]]; 184 185 case PROGRAM_ADDRESS: 186 return t->address[index]; 187 188 default: 189 debug_assert( 0 ); 190 return ureg_dst_undef(); 191 } 192} 193 194 195/** 196 * Map a Mesa src register to a TGSI ureg_src register. 197 */ 198static struct ureg_src 199src_register( struct st_translate *t, 200 gl_register_file file, 201 GLint index ) 202{ 203 switch( file ) { 204 case PROGRAM_UNDEFINED: 205 return ureg_src_undef(); 206 207 case PROGRAM_TEMPORARY: 208 ASSERT(index >= 0); 209 if (ureg_dst_is_undef(t->temps[index])) 210 t->temps[index] = ureg_DECL_temporary( t->ureg ); 211 assert(index < Elements(t->temps)); 212 return ureg_src(t->temps[index]); 213 214 case PROGRAM_NAMED_PARAM: 215 case PROGRAM_ENV_PARAM: 216 case PROGRAM_LOCAL_PARAM: 217 case PROGRAM_UNIFORM: 218 ASSERT(index >= 0); 219 return t->constants[index]; 220 case PROGRAM_STATE_VAR: 221 case PROGRAM_CONSTANT: /* ie, immediate */ 222 if (index < 0) 223 return ureg_DECL_constant( t->ureg, 0 ); 224 else 225 return t->constants[index]; 226 227 case PROGRAM_INPUT: 228 assert(t->inputMapping[index] < Elements(t->inputs)); 229 return t->inputs[t->inputMapping[index]]; 230 231 case PROGRAM_OUTPUT: 232 assert(t->outputMapping[index] < Elements(t->outputs)); 233 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 234 235 case PROGRAM_ADDRESS: 236 return ureg_src(t->address[index]); 237 238 default: 239 debug_assert( 0 ); 240 return ureg_src_undef(); 241 } 242} 243 244 245/** 246 * Map mesa texture target to TGSI texture target. 247 */ 248static unsigned 249translate_texture_target( GLuint textarget, 250 GLboolean shadow ) 251{ 252 if (shadow) { 253 switch( textarget ) { 254 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D; 255 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D; 256 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT; 257 default: break; 258 } 259 } 260 261 switch( textarget ) { 262 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; 263 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; 264 case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; 265 case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; 266 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; 267 default: 268 debug_assert( 0 ); 269 return TGSI_TEXTURE_1D; 270 } 271} 272 273 274/** 275 * Create a TGSI ureg_dst register from a Mesa dest register. 276 */ 277static struct ureg_dst 278translate_dst( struct st_translate *t, 279 const struct prog_dst_register *DstReg, 280 boolean saturate ) 281{ 282 struct ureg_dst dst = dst_register( t, 283 DstReg->File, 284 DstReg->Index ); 285 286 dst = ureg_writemask( dst, 287 DstReg->WriteMask ); 288 289 if (saturate) 290 dst = ureg_saturate( dst ); 291 292 if (DstReg->RelAddr) 293 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); 294 295 return dst; 296} 297 298 299/** 300 * Create a TGSI ureg_src register from a Mesa src register. 301 */ 302static struct ureg_src 303translate_src( struct st_translate *t, 304 const struct prog_src_register *SrcReg ) 305{ 306 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 307 308 if (t->procType == TGSI_PROCESSOR_GEOMETRY && SrcReg->HasIndex2) { 309 src = src_register( t, SrcReg->File, SrcReg->Index2 ); 310 if (SrcReg->RelAddr2) 311 src = ureg_src_dimension_indirect( src, ureg_src(t->address[0]), 312 SrcReg->Index); 313 else 314 src = ureg_src_dimension( src, SrcReg->Index); 315 } 316 317 src = ureg_swizzle( src, 318 GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, 319 GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, 320 GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3, 321 GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3); 322 323 if (SrcReg->Negate == NEGATE_XYZW) 324 src = ureg_negate(src); 325 326 if (SrcReg->Abs) 327 src = ureg_abs(src); 328 329 if (SrcReg->RelAddr) { 330 src = ureg_src_indirect( src, ureg_src(t->address[0])); 331 if (SrcReg->File != PROGRAM_INPUT && 332 SrcReg->File != PROGRAM_OUTPUT) { 333 /* If SrcReg->Index was negative, it was set to zero in 334 * src_register(). Reassign it now. But don't do this 335 * for input/output regs since they get remapped while 336 * const buffers don't. 337 */ 338 src.Index = SrcReg->Index; 339 } 340 } 341 342 return src; 343} 344 345 346static struct ureg_src swizzle_4v( struct ureg_src src, 347 const unsigned *swz ) 348{ 349 return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] ); 350} 351 352 353/** 354 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: 355 * 356 * SWZ dst, src.x-y10 357 * 358 * becomes: 359 * 360 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} 361 */ 362static void emit_swz( struct st_translate *t, 363 struct ureg_dst dst, 364 const struct prog_src_register *SrcReg ) 365{ 366 struct ureg_program *ureg = t->ureg; 367 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 368 369 unsigned negate_mask = SrcReg->Negate; 370 371 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | 372 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | 373 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | 374 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); 375 376 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | 377 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | 378 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | 379 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); 380 381 unsigned negative_one_mask = one_mask & negate_mask; 382 unsigned positive_one_mask = one_mask & ~negate_mask; 383 384 struct ureg_src imm; 385 unsigned i; 386 unsigned mul_swizzle[4] = {0,0,0,0}; 387 unsigned add_swizzle[4] = {0,0,0,0}; 388 unsigned src_swizzle[4] = {0,0,0,0}; 389 boolean need_add = FALSE; 390 boolean need_mul = FALSE; 391 392 if (dst.WriteMask == 0) 393 return; 394 395 /* Is this just a MOV? 396 */ 397 if (zero_mask == 0 && 398 one_mask == 0 && 399 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) 400 { 401 ureg_MOV( ureg, dst, translate_src( t, SrcReg )); 402 return; 403 } 404 405#define IMM_ZERO 0 406#define IMM_ONE 1 407#define IMM_NEG_ONE 2 408 409 imm = ureg_imm3f( ureg, 0, 1, -1 ); 410 411 for (i = 0; i < 4; i++) { 412 unsigned bit = 1 << i; 413 414 if (dst.WriteMask & bit) { 415 if (positive_one_mask & bit) { 416 mul_swizzle[i] = IMM_ZERO; 417 add_swizzle[i] = IMM_ONE; 418 need_add = TRUE; 419 } 420 else if (negative_one_mask & bit) { 421 mul_swizzle[i] = IMM_ZERO; 422 add_swizzle[i] = IMM_NEG_ONE; 423 need_add = TRUE; 424 } 425 else if (zero_mask & bit) { 426 mul_swizzle[i] = IMM_ZERO; 427 add_swizzle[i] = IMM_ZERO; 428 need_add = TRUE; 429 } 430 else { 431 add_swizzle[i] = IMM_ZERO; 432 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); 433 need_mul = TRUE; 434 if (negate_mask & bit) { 435 mul_swizzle[i] = IMM_NEG_ONE; 436 } 437 else { 438 mul_swizzle[i] = IMM_ONE; 439 } 440 } 441 } 442 } 443 444 if (need_mul && need_add) { 445 ureg_MAD( ureg, 446 dst, 447 swizzle_4v( src, src_swizzle ), 448 swizzle_4v( imm, mul_swizzle ), 449 swizzle_4v( imm, add_swizzle ) ); 450 } 451 else if (need_mul) { 452 ureg_MUL( ureg, 453 dst, 454 swizzle_4v( src, src_swizzle ), 455 swizzle_4v( imm, mul_swizzle ) ); 456 } 457 else if (need_add) { 458 ureg_MOV( ureg, 459 dst, 460 swizzle_4v( imm, add_swizzle ) ); 461 } 462 else { 463 debug_assert(0); 464 } 465 466#undef IMM_ZERO 467#undef IMM_ONE 468#undef IMM_NEG_ONE 469} 470 471 472/** 473 * Negate the value of DDY to match GL semantics where (0,0) is the 474 * lower-left corner of the window. 475 * Note that the GL_ARB_fragment_coord_conventions extension will 476 * effect this someday. 477 */ 478static void emit_ddy( struct st_translate *t, 479 struct ureg_dst dst, 480 const struct prog_src_register *SrcReg ) 481{ 482 struct ureg_program *ureg = t->ureg; 483 struct ureg_src src = translate_src( t, SrcReg ); 484 src = ureg_negate( src ); 485 ureg_DDY( ureg, dst, src ); 486} 487 488 489 490static unsigned 491translate_opcode( unsigned op ) 492{ 493 switch( op ) { 494 case OPCODE_ARL: 495 return TGSI_OPCODE_ARL; 496 case OPCODE_ABS: 497 return TGSI_OPCODE_ABS; 498 case OPCODE_ADD: 499 return TGSI_OPCODE_ADD; 500 case OPCODE_BGNLOOP: 501 return TGSI_OPCODE_BGNLOOP; 502 case OPCODE_BGNSUB: 503 return TGSI_OPCODE_BGNSUB; 504 case OPCODE_BRA: 505 return TGSI_OPCODE_BRA; 506 case OPCODE_BRK: 507 return TGSI_OPCODE_BRK; 508 case OPCODE_CAL: 509 return TGSI_OPCODE_CAL; 510 case OPCODE_CMP: 511 return TGSI_OPCODE_CMP; 512 case OPCODE_CONT: 513 return TGSI_OPCODE_CONT; 514 case OPCODE_COS: 515 return TGSI_OPCODE_COS; 516 case OPCODE_DDX: 517 return TGSI_OPCODE_DDX; 518 case OPCODE_DDY: 519 return TGSI_OPCODE_DDY; 520 case OPCODE_DP2: 521 return TGSI_OPCODE_DP2; 522 case OPCODE_DP2A: 523 return TGSI_OPCODE_DP2A; 524 case OPCODE_DP3: 525 return TGSI_OPCODE_DP3; 526 case OPCODE_DP4: 527 return TGSI_OPCODE_DP4; 528 case OPCODE_DPH: 529 return TGSI_OPCODE_DPH; 530 case OPCODE_DST: 531 return TGSI_OPCODE_DST; 532 case OPCODE_ELSE: 533 return TGSI_OPCODE_ELSE; 534 case OPCODE_EMIT_VERTEX: 535 return TGSI_OPCODE_EMIT; 536 case OPCODE_END_PRIMITIVE: 537 return TGSI_OPCODE_ENDPRIM; 538 case OPCODE_ENDIF: 539 return TGSI_OPCODE_ENDIF; 540 case OPCODE_ENDLOOP: 541 return TGSI_OPCODE_ENDLOOP; 542 case OPCODE_ENDSUB: 543 return TGSI_OPCODE_ENDSUB; 544 case OPCODE_EX2: 545 return TGSI_OPCODE_EX2; 546 case OPCODE_EXP: 547 return TGSI_OPCODE_EXP; 548 case OPCODE_FLR: 549 return TGSI_OPCODE_FLR; 550 case OPCODE_FRC: 551 return TGSI_OPCODE_FRC; 552 case OPCODE_IF: 553 return TGSI_OPCODE_IF; 554 case OPCODE_TRUNC: 555 return TGSI_OPCODE_TRUNC; 556 case OPCODE_KIL: 557 return TGSI_OPCODE_KIL; 558 case OPCODE_KIL_NV: 559 return TGSI_OPCODE_KILP; 560 case OPCODE_LG2: 561 return TGSI_OPCODE_LG2; 562 case OPCODE_LOG: 563 return TGSI_OPCODE_LOG; 564 case OPCODE_LIT: 565 return TGSI_OPCODE_LIT; 566 case OPCODE_LRP: 567 return TGSI_OPCODE_LRP; 568 case OPCODE_MAD: 569 return TGSI_OPCODE_MAD; 570 case OPCODE_MAX: 571 return TGSI_OPCODE_MAX; 572 case OPCODE_MIN: 573 return TGSI_OPCODE_MIN; 574 case OPCODE_MOV: 575 return TGSI_OPCODE_MOV; 576 case OPCODE_MUL: 577 return TGSI_OPCODE_MUL; 578 case OPCODE_NOP: 579 return TGSI_OPCODE_NOP; 580 case OPCODE_NRM3: 581 return TGSI_OPCODE_NRM; 582 case OPCODE_NRM4: 583 return TGSI_OPCODE_NRM4; 584 case OPCODE_POW: 585 return TGSI_OPCODE_POW; 586 case OPCODE_RCP: 587 return TGSI_OPCODE_RCP; 588 case OPCODE_RET: 589 return TGSI_OPCODE_RET; 590 case OPCODE_RSQ: 591 return TGSI_OPCODE_RSQ; 592 case OPCODE_SCS: 593 return TGSI_OPCODE_SCS; 594 case OPCODE_SEQ: 595 return TGSI_OPCODE_SEQ; 596 case OPCODE_SGE: 597 return TGSI_OPCODE_SGE; 598 case OPCODE_SGT: 599 return TGSI_OPCODE_SGT; 600 case OPCODE_SIN: 601 return TGSI_OPCODE_SIN; 602 case OPCODE_SLE: 603 return TGSI_OPCODE_SLE; 604 case OPCODE_SLT: 605 return TGSI_OPCODE_SLT; 606 case OPCODE_SNE: 607 return TGSI_OPCODE_SNE; 608 case OPCODE_SSG: 609 return TGSI_OPCODE_SSG; 610 case OPCODE_SUB: 611 return TGSI_OPCODE_SUB; 612 case OPCODE_TEX: 613 return TGSI_OPCODE_TEX; 614 case OPCODE_TXB: 615 return TGSI_OPCODE_TXB; 616 case OPCODE_TXD: 617 return TGSI_OPCODE_TXD; 618 case OPCODE_TXL: 619 return TGSI_OPCODE_TXL; 620 case OPCODE_TXP: 621 return TGSI_OPCODE_TXP; 622 case OPCODE_XPD: 623 return TGSI_OPCODE_XPD; 624 case OPCODE_END: 625 return TGSI_OPCODE_END; 626 default: 627 debug_assert( 0 ); 628 return TGSI_OPCODE_NOP; 629 } 630} 631 632 633static void 634compile_instruction( 635 struct st_translate *t, 636 const struct prog_instruction *inst ) 637{ 638 struct ureg_program *ureg = t->ureg; 639 GLuint i; 640 struct ureg_dst dst[1]; 641 struct ureg_src src[4]; 642 unsigned num_dst; 643 unsigned num_src; 644 645 num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); 646 num_src = _mesa_num_inst_src_regs( inst->Opcode ); 647 648 if (num_dst) 649 dst[0] = translate_dst( t, 650 &inst->DstReg, 651 inst->SaturateMode ); 652 653 for (i = 0; i < num_src; i++) 654 src[i] = translate_src( t, &inst->SrcReg[i] ); 655 656 switch( inst->Opcode ) { 657 case OPCODE_SWZ: 658 emit_swz( t, dst[0], &inst->SrcReg[0] ); 659 return; 660 661 case OPCODE_BGNLOOP: 662 case OPCODE_CAL: 663 case OPCODE_ELSE: 664 case OPCODE_ENDLOOP: 665 case OPCODE_IF: 666 debug_assert(num_dst == 0); 667 ureg_label_insn( ureg, 668 translate_opcode( inst->Opcode ), 669 src, num_src, 670 get_label( t, inst->BranchTarget )); 671 return; 672 673 case OPCODE_TEX: 674 case OPCODE_TXB: 675 case OPCODE_TXD: 676 case OPCODE_TXL: 677 case OPCODE_TXP: 678 src[num_src++] = t->samplers[inst->TexSrcUnit]; 679 ureg_tex_insn( ureg, 680 translate_opcode( inst->Opcode ), 681 dst, num_dst, 682 translate_texture_target( inst->TexSrcTarget, 683 inst->TexShadow ), 684 src, num_src ); 685 return; 686 687 case OPCODE_SCS: 688 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); 689 ureg_insn( ureg, 690 translate_opcode( inst->Opcode ), 691 dst, num_dst, 692 src, num_src ); 693 break; 694 695 case OPCODE_XPD: 696 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); 697 ureg_insn( ureg, 698 translate_opcode( inst->Opcode ), 699 dst, num_dst, 700 src, num_src ); 701 break; 702 703 case OPCODE_NOISE1: 704 case OPCODE_NOISE2: 705 case OPCODE_NOISE3: 706 case OPCODE_NOISE4: 707 /* At some point, a motivated person could add a better 708 * implementation of noise. Currently not even the nvidia 709 * binary drivers do anything more than this. In any case, the 710 * place to do this is in the GL state tracker, not the poor 711 * driver. 712 */ 713 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); 714 break; 715 716 case OPCODE_DDY: 717 emit_ddy( t, dst[0], &inst->SrcReg[0] ); 718 break; 719 720 default: 721 ureg_insn( ureg, 722 translate_opcode( inst->Opcode ), 723 dst, num_dst, 724 src, num_src ); 725 break; 726 } 727} 728 729 730/** 731 * Emit the TGSI instructions to adjust the WPOS pixel center convention 732 */ 733static void 734emit_adjusted_wpos( struct st_translate *t, 735 const struct gl_program *program, GLfloat value) 736{ 737 struct ureg_program *ureg = t->ureg; 738 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 739 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 740 741 ureg_ADD(ureg, 742 ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y), 743 wpos_input, ureg_imm1f(ureg, value)); 744 745 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 746} 747 748 749/** 750 * Emit the TGSI instructions for inverting the WPOS y coordinate. 751 */ 752static void 753emit_inverted_wpos( struct st_translate *t, 754 const struct gl_program *program ) 755{ 756 struct ureg_program *ureg = t->ureg; 757 758 /* Fragment program uses fragment position input. 759 * Need to replace instances of INPUT[WPOS] with temp T 760 * where T = INPUT[WPOS] by y is inverted. 761 */ 762 static const gl_state_index winSizeState[STATE_LENGTH] 763 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; 764 765 /* XXX: note we are modifying the incoming shader here! Need to 766 * do this before emitting the constant decls below, or this 767 * will be missed: 768 */ 769 unsigned winHeightConst = _mesa_add_state_reference(program->Parameters, 770 winSizeState); 771 772 struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst ); 773 struct ureg_dst wpos_temp; 774 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 775 776 /* MOV wpos_temp, input[wpos] 777 */ 778 if (wpos_input.File == TGSI_FILE_TEMPORARY) 779 wpos_temp = ureg_dst(wpos_input); 780 else { 781 wpos_temp = ureg_DECL_temporary( ureg ); 782 ureg_MOV( ureg, wpos_temp, wpos_input ); 783 } 784 785 /* SUB wpos_temp.y, winsize_const, wpos_input 786 */ 787 ureg_SUB( ureg, 788 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 789 winsize, 790 wpos_input); 791 792 /* Use wpos_temp as position input from here on: 793 */ 794 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 795} 796 797 798/** 799 * Emit fragment position/ooordinate code. 800 */ 801static void 802emit_wpos(struct st_context *st, 803 struct st_translate *t, 804 const struct gl_program *program, 805 struct ureg_program *ureg) 806{ 807 const struct gl_fragment_program *fp = 808 (const struct gl_fragment_program *) program; 809 struct pipe_screen *pscreen = st->pipe->screen; 810 boolean invert = FALSE; 811 812 if (fp->OriginUpperLeft) { 813 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 814 } 815 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 816 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 817 invert = TRUE; 818 } 819 else 820 assert(0); 821 } 822 else { 823 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 824 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 825 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 826 invert = TRUE; 827 else 828 assert(0); 829 } 830 831 if (fp->PixelCenterInteger) { 832 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 833 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 834 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 835 emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f); 836 else 837 assert(0); 838 } 839 else { 840 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 841 } 842 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 843 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 844 emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f); 845 } 846 else 847 assert(0); 848 } 849 850 /* we invert after adjustment so that we avoid the MOV to temporary, 851 * and reuse the adjustment ADD instead */ 852 if (invert) 853 emit_inverted_wpos(t, program); 854} 855 856 857/** 858 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 859 * TGSI uses +1 for front, -1 for back. 860 * This function converts the TGSI value to the GL value. Simply clamping/ 861 * saturating the value to [0,1] does the job. 862 */ 863static void 864emit_face_var( struct st_translate *t, 865 const struct gl_program *program ) 866{ 867 struct ureg_program *ureg = t->ureg; 868 struct ureg_dst face_temp = ureg_DECL_temporary( ureg ); 869 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 870 871 /* MOV_SAT face_temp, input[face] 872 */ 873 face_temp = ureg_saturate( face_temp ); 874 ureg_MOV( ureg, face_temp, face_input ); 875 876 /* Use face_temp as face input from here on: 877 */ 878 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 879} 880 881 882static void 883emit_edgeflags( struct st_translate *t, 884 const struct gl_program *program ) 885{ 886 struct ureg_program *ureg = t->ureg; 887 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 888 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 889 890 ureg_MOV( ureg, edge_dst, edge_src ); 891} 892 893 894/** 895 * Translate Mesa program to TGSI format. 896 * \param program the program to translate 897 * \param numInputs number of input registers used 898 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 899 * input indexes 900 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 901 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 902 * each input 903 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 904 * \param numOutputs number of output registers used 905 * \param outputMapping maps Mesa fragment program outputs to TGSI 906 * generic outputs 907 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 908 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 909 * each output 910 * 911 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 912 */ 913enum pipe_error 914st_translate_mesa_program( 915 GLcontext *ctx, 916 uint procType, 917 struct ureg_program *ureg, 918 const struct gl_program *program, 919 GLuint numInputs, 920 const GLuint inputMapping[], 921 const ubyte inputSemanticName[], 922 const ubyte inputSemanticIndex[], 923 const GLuint interpMode[], 924 GLuint numOutputs, 925 const GLuint outputMapping[], 926 const ubyte outputSemanticName[], 927 const ubyte outputSemanticIndex[], 928 boolean passthrough_edgeflags ) 929{ 930 struct st_translate translate, *t; 931 unsigned i; 932 enum pipe_error ret = PIPE_OK; 933 934 assert(numInputs <= Elements(t->inputs)); 935 assert(numOutputs <= Elements(t->outputs)); 936 937 t = &translate; 938 memset(t, 0, sizeof *t); 939 940 t->procType = procType; 941 t->inputMapping = inputMapping; 942 t->outputMapping = outputMapping; 943 t->ureg = ureg; 944 t->pointSizeOutIndex = -1; 945 t->prevInstWrotePointSize = GL_FALSE; 946 947 /*_mesa_print_program(program);*/ 948 949 /* 950 * Declare input attributes. 951 */ 952 if (procType == TGSI_PROCESSOR_FRAGMENT) { 953 for (i = 0; i < numInputs; i++) { 954 if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) { 955 t->inputs[i] = ureg_DECL_fs_input_cyl(ureg, 956 inputSemanticName[i], 957 inputSemanticIndex[i], 958 interpMode[i], 959 TGSI_CYLINDRICAL_WRAP_X); 960 } 961 else { 962 t->inputs[i] = ureg_DECL_fs_input(ureg, 963 inputSemanticName[i], 964 inputSemanticIndex[i], 965 interpMode[i]); 966 } 967 } 968 969 if (program->InputsRead & FRAG_BIT_WPOS) { 970 /* Must do this after setting up t->inputs, and before 971 * emitting constant references, below: 972 */ 973 emit_wpos(st_context(ctx), t, program, ureg); 974 } 975 976 if (program->InputsRead & FRAG_BIT_FACE) { 977 emit_face_var( t, program ); 978 } 979 980 /* 981 * Declare output attributes. 982 */ 983 for (i = 0; i < numOutputs; i++) { 984 switch (outputSemanticName[i]) { 985 case TGSI_SEMANTIC_POSITION: 986 t->outputs[i] = ureg_DECL_output( ureg, 987 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 988 outputSemanticIndex[i] ); 989 990 t->outputs[i] = ureg_writemask( t->outputs[i], 991 TGSI_WRITEMASK_Z ); 992 break; 993 case TGSI_SEMANTIC_COLOR: 994 t->outputs[i] = ureg_DECL_output( ureg, 995 TGSI_SEMANTIC_COLOR, 996 outputSemanticIndex[i] ); 997 break; 998 default: 999 debug_assert(0); 1000 return 0; 1001 } 1002 } 1003 } 1004 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 1005 for (i = 0; i < numInputs; i++) { 1006 t->inputs[i] = ureg_DECL_gs_input(ureg, 1007 i, 1008 inputSemanticName[i], 1009 inputSemanticIndex[i]); 1010 } 1011 1012 for (i = 0; i < numOutputs; i++) { 1013 t->outputs[i] = ureg_DECL_output( ureg, 1014 outputSemanticName[i], 1015 outputSemanticIndex[i] ); 1016 } 1017 } 1018 else { 1019 assert(procType == TGSI_PROCESSOR_VERTEX); 1020 1021 for (i = 0; i < numInputs; i++) { 1022 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 1023 } 1024 1025 for (i = 0; i < numOutputs; i++) { 1026 t->outputs[i] = ureg_DECL_output( ureg, 1027 outputSemanticName[i], 1028 outputSemanticIndex[i] ); 1029 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) { 1030 /* Writing to the point size result register requires special 1031 * handling to implement clamping. 1032 */ 1033 static const gl_state_index pointSizeClampState[STATE_LENGTH] 1034 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 }; 1035 /* XXX: note we are modifying the incoming shader here! Need to 1036 * do this before emitting the constant decls below, or this 1037 * will be missed: 1038 */ 1039 unsigned pointSizeClampConst = 1040 _mesa_add_state_reference(program->Parameters, 1041 pointSizeClampState); 1042 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); 1043 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); 1044 t->pointSizeResult = t->outputs[i]; 1045 t->pointSizeOutIndex = i; 1046 t->outputs[i] = psizregtemp; 1047 } 1048 } 1049 if (passthrough_edgeflags) 1050 emit_edgeflags( t, program ); 1051 } 1052 1053 /* Declare address register. 1054 */ 1055 if (program->NumAddressRegs > 0) { 1056 debug_assert( program->NumAddressRegs == 1 ); 1057 t->address[0] = ureg_DECL_address( ureg ); 1058 } 1059 1060 /* Emit constants and immediates. Mesa uses a single index space 1061 * for these, so we put all the translated regs in t->constants. 1062 */ 1063 if (program->Parameters) { 1064 t->constants = CALLOC( program->Parameters->NumParameters, 1065 sizeof t->constants[0] ); 1066 if (t->constants == NULL) { 1067 ret = PIPE_ERROR_OUT_OF_MEMORY; 1068 goto out; 1069 } 1070 1071 for (i = 0; i < program->Parameters->NumParameters; i++) { 1072 switch (program->Parameters->Parameters[i].Type) { 1073 case PROGRAM_ENV_PARAM: 1074 case PROGRAM_LOCAL_PARAM: 1075 case PROGRAM_STATE_VAR: 1076 case PROGRAM_NAMED_PARAM: 1077 case PROGRAM_UNIFORM: 1078 t->constants[i] = ureg_DECL_constant( ureg, i ); 1079 break; 1080 1081 /* Emit immediates only when there is no address register 1082 * in use. FIXME: Be smarter and recognize param arrays: 1083 * indirect addressing is only valid within the referenced 1084 * array. 1085 */ 1086 case PROGRAM_CONSTANT: 1087 if (program->NumAddressRegs > 0) 1088 t->constants[i] = ureg_DECL_constant( ureg, i ); 1089 else 1090 t->constants[i] = 1091 ureg_DECL_immediate( ureg, 1092 program->Parameters->ParameterValues[i], 1093 4 ); 1094 break; 1095 default: 1096 break; 1097 } 1098 } 1099 } 1100 1101 /* texture samplers */ 1102 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 1103 if (program->SamplersUsed & (1 << i)) { 1104 t->samplers[i] = ureg_DECL_sampler( ureg, i ); 1105 } 1106 } 1107 1108 /* Emit each instruction in turn: 1109 */ 1110 for (i = 0; i < program->NumInstructions; i++) { 1111 set_insn_start( t, ureg_get_instruction_number( ureg )); 1112 compile_instruction( t, &program->Instructions[i] ); 1113 1114 if (t->prevInstWrotePointSize && program->Id) { 1115 /* The previous instruction wrote to the (fake) vertex point size 1116 * result register. Now we need to clamp that value to the min/max 1117 * point size range, putting the result into the real point size 1118 * register. 1119 * Note that we can't do this easily at the end of program due to 1120 * possible early return. 1121 */ 1122 set_insn_start( t, ureg_get_instruction_number( ureg )); 1123 ureg_MAX( t->ureg, 1124 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), 1125 ureg_src(t->outputs[t->pointSizeOutIndex]), 1126 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 1127 ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), 1128 ureg_src(t->outputs[t->pointSizeOutIndex]), 1129 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 1130 } 1131 t->prevInstWrotePointSize = GL_FALSE; 1132 } 1133 1134 /* Fix up all emitted labels: 1135 */ 1136 for (i = 0; i < t->labels_count; i++) { 1137 ureg_fixup_label( ureg, 1138 t->labels[i].token, 1139 t->insn[t->labels[i].branch_target] ); 1140 } 1141 1142out: 1143 FREE(t->insn); 1144 FREE(t->labels); 1145 FREE(t->constants); 1146 1147 if (t->error) { 1148 debug_printf("%s: translate error flag set\n", __FUNCTION__); 1149 } 1150 1151 return ret; 1152} 1153 1154 1155/** 1156 * Tokens cannot be free with free otherwise the builtin gallium 1157 * malloc debugging will get confused. 1158 */ 1159void 1160st_free_tokens(const struct tgsi_token *tokens) 1161{ 1162 FREE((void *)tokens); 1163} 1164