st_mesa_to_tgsi.c revision d531f9c2f5c78468d913fc509b223760ac1c1124
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * \author 30 * Michal Krol, 31 * Keith Whitwell 32 */ 33 34#include "pipe/p_compiler.h" 35#include "pipe/p_context.h" 36#include "pipe/p_screen.h" 37#include "pipe/p_shader_tokens.h" 38#include "pipe/p_state.h" 39#include "tgsi/tgsi_ureg.h" 40#include "st_mesa_to_tgsi.h" 41#include "st_context.h" 42#include "program/prog_instruction.h" 43#include "program/prog_parameter.h" 44#include "util/u_debug.h" 45#include "util/u_math.h" 46#include "util/u_memory.h" 47 48 49#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 50 (1 << PROGRAM_ENV_PARAM) | \ 51 (1 << PROGRAM_STATE_VAR) | \ 52 (1 << PROGRAM_NAMED_PARAM) | \ 53 (1 << PROGRAM_CONSTANT) | \ 54 (1 << PROGRAM_UNIFORM)) 55 56 57struct label { 58 unsigned branch_target; 59 unsigned token; 60}; 61 62 63/** 64 * Intermediate state used during shader translation. 65 */ 66struct st_translate { 67 struct ureg_program *ureg; 68 69 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 70 struct ureg_src *constants; 71 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 72 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 73 struct ureg_dst address[1]; 74 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 75 76 /* Extra info for handling point size clamping in vertex shader */ 77 struct ureg_dst pointSizeResult; /**< Actual point size output register */ 78 struct ureg_src pointSizeConst; /**< Point size range constant register */ 79 GLint pointSizeOutIndex; /**< Temp point size output register */ 80 GLboolean prevInstWrotePointSize; 81 82 const GLuint *inputMapping; 83 const GLuint *outputMapping; 84 85 /* For every instruction that contains a label (eg CALL), keep 86 * details so that we can go back afterwards and emit the correct 87 * tgsi instruction number for each label. 88 */ 89 struct label *labels; 90 unsigned labels_size; 91 unsigned labels_count; 92 93 /* Keep a record of the tgsi instruction number that each mesa 94 * instruction starts at, will be used to fix up labels after 95 * translation. 96 */ 97 unsigned *insn; 98 unsigned insn_size; 99 unsigned insn_count; 100 101 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 102 103 boolean error; 104}; 105 106 107/** 108 * Make note of a branch to a label in the TGSI code. 109 * After we've emitted all instructions, we'll go over the list 110 * of labels built here and patch the TGSI code with the actual 111 * location of each label. 112 */ 113static unsigned *get_label( struct st_translate *t, 114 unsigned branch_target ) 115{ 116 unsigned i; 117 118 if (t->labels_count + 1 >= t->labels_size) { 119 unsigned old_size = t->labels_size; 120 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 121 t->labels = REALLOC( t->labels, 122 old_size * sizeof t->labels[0], 123 t->labels_size * sizeof t->labels[0] ); 124 if (t->labels == NULL) { 125 static unsigned dummy; 126 t->error = TRUE; 127 return &dummy; 128 } 129 } 130 131 i = t->labels_count++; 132 t->labels[i].branch_target = branch_target; 133 return &t->labels[i].token; 134} 135 136 137/** 138 * Called prior to emitting the TGSI code for each Mesa instruction. 139 * Allocate additional space for instructions if needed. 140 * Update the insn[] array so the next Mesa instruction points to 141 * the next TGSI instruction. 142 */ 143static void set_insn_start( struct st_translate *t, 144 unsigned start ) 145{ 146 if (t->insn_count + 1 >= t->insn_size) { 147 unsigned old_size = t->insn_size; 148 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 149 t->insn = REALLOC( t->insn, 150 old_size * sizeof t->insn[0], 151 t->insn_size * sizeof t->insn[0] ); 152 if (t->insn == NULL) { 153 t->error = TRUE; 154 return; 155 } 156 } 157 158 t->insn[t->insn_count++] = start; 159} 160 161 162/** 163 * Map a Mesa dst register to a TGSI ureg_dst register. 164 */ 165static struct ureg_dst 166dst_register( struct st_translate *t, 167 gl_register_file file, 168 GLuint index ) 169{ 170 switch( file ) { 171 case PROGRAM_UNDEFINED: 172 return ureg_dst_undef(); 173 174 case PROGRAM_TEMPORARY: 175 if (ureg_dst_is_undef(t->temps[index])) 176 t->temps[index] = ureg_DECL_temporary( t->ureg ); 177 178 return t->temps[index]; 179 180 case PROGRAM_OUTPUT: 181 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 182 t->prevInstWrotePointSize = GL_TRUE; 183 184 if (t->procType == TGSI_PROCESSOR_VERTEX) 185 assert(index < VERT_RESULT_MAX); 186 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 187 assert(index < FRAG_RESULT_MAX); 188 else 189 assert(index < GEOM_RESULT_MAX); 190 191 assert(t->outputMapping[index] < Elements(t->outputs)); 192 193 return t->outputs[t->outputMapping[index]]; 194 195 case PROGRAM_ADDRESS: 196 return t->address[index]; 197 198 default: 199 debug_assert( 0 ); 200 return ureg_dst_undef(); 201 } 202} 203 204 205/** 206 * Map a Mesa src register to a TGSI ureg_src register. 207 */ 208static struct ureg_src 209src_register( struct st_translate *t, 210 gl_register_file file, 211 GLint index ) 212{ 213 switch( file ) { 214 case PROGRAM_UNDEFINED: 215 return ureg_src_undef(); 216 217 case PROGRAM_TEMPORARY: 218 assert(index >= 0); 219 if (ureg_dst_is_undef(t->temps[index])) 220 t->temps[index] = ureg_DECL_temporary( t->ureg ); 221 assert(index < Elements(t->temps)); 222 return ureg_src(t->temps[index]); 223 224 case PROGRAM_NAMED_PARAM: 225 case PROGRAM_ENV_PARAM: 226 case PROGRAM_LOCAL_PARAM: 227 case PROGRAM_UNIFORM: 228 assert(index >= 0); 229 return t->constants[index]; 230 case PROGRAM_STATE_VAR: 231 case PROGRAM_CONSTANT: /* ie, immediate */ 232 if (index < 0) 233 return ureg_DECL_constant( t->ureg, 0 ); 234 else 235 return t->constants[index]; 236 237 case PROGRAM_INPUT: 238 assert(t->inputMapping[index] < Elements(t->inputs)); 239 return t->inputs[t->inputMapping[index]]; 240 241 case PROGRAM_OUTPUT: 242 assert(t->outputMapping[index] < Elements(t->outputs)); 243 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 244 245 case PROGRAM_ADDRESS: 246 return ureg_src(t->address[index]); 247 248 default: 249 debug_assert( 0 ); 250 return ureg_src_undef(); 251 } 252} 253 254 255/** 256 * Map mesa texture target to TGSI texture target. 257 */ 258static unsigned 259translate_texture_target( GLuint textarget, 260 GLboolean shadow ) 261{ 262 if (shadow) { 263 switch( textarget ) { 264 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D; 265 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D; 266 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT; 267 default: break; 268 } 269 } 270 271 switch( textarget ) { 272 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; 273 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; 274 case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; 275 case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; 276 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; 277 default: 278 debug_assert( 0 ); 279 return TGSI_TEXTURE_1D; 280 } 281} 282 283 284/** 285 * Create a TGSI ureg_dst register from a Mesa dest register. 286 */ 287static struct ureg_dst 288translate_dst( struct st_translate *t, 289 const struct prog_dst_register *DstReg, 290 boolean saturate ) 291{ 292 struct ureg_dst dst = dst_register( t, 293 DstReg->File, 294 DstReg->Index ); 295 296 dst = ureg_writemask( dst, 297 DstReg->WriteMask ); 298 299 if (saturate) 300 dst = ureg_saturate( dst ); 301 302 if (DstReg->RelAddr) 303 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); 304 305 return dst; 306} 307 308 309/** 310 * Create a TGSI ureg_src register from a Mesa src register. 311 */ 312static struct ureg_src 313translate_src( struct st_translate *t, 314 const struct prog_src_register *SrcReg ) 315{ 316 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 317 318 if (t->procType == TGSI_PROCESSOR_GEOMETRY && SrcReg->HasIndex2) { 319 src = src_register( t, SrcReg->File, SrcReg->Index2 ); 320 if (SrcReg->RelAddr2) 321 src = ureg_src_dimension_indirect( src, ureg_src(t->address[0]), 322 SrcReg->Index); 323 else 324 src = ureg_src_dimension( src, SrcReg->Index); 325 } 326 327 src = ureg_swizzle( src, 328 GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, 329 GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, 330 GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3, 331 GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3); 332 333 if (SrcReg->Negate == NEGATE_XYZW) 334 src = ureg_negate(src); 335 336 if (SrcReg->Abs) 337 src = ureg_abs(src); 338 339 if (SrcReg->RelAddr) { 340 src = ureg_src_indirect( src, ureg_src(t->address[0])); 341 if (SrcReg->File != PROGRAM_INPUT && 342 SrcReg->File != PROGRAM_OUTPUT) { 343 /* If SrcReg->Index was negative, it was set to zero in 344 * src_register(). Reassign it now. But don't do this 345 * for input/output regs since they get remapped while 346 * const buffers don't. 347 */ 348 src.Index = SrcReg->Index; 349 } 350 } 351 352 return src; 353} 354 355 356static struct ureg_src swizzle_4v( struct ureg_src src, 357 const unsigned *swz ) 358{ 359 return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] ); 360} 361 362 363/** 364 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: 365 * 366 * SWZ dst, src.x-y10 367 * 368 * becomes: 369 * 370 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} 371 */ 372static void emit_swz( struct st_translate *t, 373 struct ureg_dst dst, 374 const struct prog_src_register *SrcReg ) 375{ 376 struct ureg_program *ureg = t->ureg; 377 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 378 379 unsigned negate_mask = SrcReg->Negate; 380 381 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | 382 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | 383 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | 384 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); 385 386 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | 387 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | 388 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | 389 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); 390 391 unsigned negative_one_mask = one_mask & negate_mask; 392 unsigned positive_one_mask = one_mask & ~negate_mask; 393 394 struct ureg_src imm; 395 unsigned i; 396 unsigned mul_swizzle[4] = {0,0,0,0}; 397 unsigned add_swizzle[4] = {0,0,0,0}; 398 unsigned src_swizzle[4] = {0,0,0,0}; 399 boolean need_add = FALSE; 400 boolean need_mul = FALSE; 401 402 if (dst.WriteMask == 0) 403 return; 404 405 /* Is this just a MOV? 406 */ 407 if (zero_mask == 0 && 408 one_mask == 0 && 409 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) 410 { 411 ureg_MOV( ureg, dst, translate_src( t, SrcReg )); 412 return; 413 } 414 415#define IMM_ZERO 0 416#define IMM_ONE 1 417#define IMM_NEG_ONE 2 418 419 imm = ureg_imm3f( ureg, 0, 1, -1 ); 420 421 for (i = 0; i < 4; i++) { 422 unsigned bit = 1 << i; 423 424 if (dst.WriteMask & bit) { 425 if (positive_one_mask & bit) { 426 mul_swizzle[i] = IMM_ZERO; 427 add_swizzle[i] = IMM_ONE; 428 need_add = TRUE; 429 } 430 else if (negative_one_mask & bit) { 431 mul_swizzle[i] = IMM_ZERO; 432 add_swizzle[i] = IMM_NEG_ONE; 433 need_add = TRUE; 434 } 435 else if (zero_mask & bit) { 436 mul_swizzle[i] = IMM_ZERO; 437 add_swizzle[i] = IMM_ZERO; 438 need_add = TRUE; 439 } 440 else { 441 add_swizzle[i] = IMM_ZERO; 442 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); 443 need_mul = TRUE; 444 if (negate_mask & bit) { 445 mul_swizzle[i] = IMM_NEG_ONE; 446 } 447 else { 448 mul_swizzle[i] = IMM_ONE; 449 } 450 } 451 } 452 } 453 454 if (need_mul && need_add) { 455 ureg_MAD( ureg, 456 dst, 457 swizzle_4v( src, src_swizzle ), 458 swizzle_4v( imm, mul_swizzle ), 459 swizzle_4v( imm, add_swizzle ) ); 460 } 461 else if (need_mul) { 462 ureg_MUL( ureg, 463 dst, 464 swizzle_4v( src, src_swizzle ), 465 swizzle_4v( imm, mul_swizzle ) ); 466 } 467 else if (need_add) { 468 ureg_MOV( ureg, 469 dst, 470 swizzle_4v( imm, add_swizzle ) ); 471 } 472 else { 473 debug_assert(0); 474 } 475 476#undef IMM_ZERO 477#undef IMM_ONE 478#undef IMM_NEG_ONE 479} 480 481 482/** 483 * Negate the value of DDY to match GL semantics where (0,0) is the 484 * lower-left corner of the window. 485 * Note that the GL_ARB_fragment_coord_conventions extension will 486 * effect this someday. 487 */ 488static void emit_ddy( struct st_translate *t, 489 struct ureg_dst dst, 490 const struct prog_src_register *SrcReg ) 491{ 492 struct ureg_program *ureg = t->ureg; 493 struct ureg_src src = translate_src( t, SrcReg ); 494 src = ureg_negate( src ); 495 ureg_DDY( ureg, dst, src ); 496} 497 498 499 500static unsigned 501translate_opcode( unsigned op ) 502{ 503 switch( op ) { 504 case OPCODE_ARL: 505 return TGSI_OPCODE_ARL; 506 case OPCODE_ABS: 507 return TGSI_OPCODE_ABS; 508 case OPCODE_ADD: 509 return TGSI_OPCODE_ADD; 510 case OPCODE_BGNLOOP: 511 return TGSI_OPCODE_BGNLOOP; 512 case OPCODE_BGNSUB: 513 return TGSI_OPCODE_BGNSUB; 514 case OPCODE_BRA: 515 return TGSI_OPCODE_BRA; 516 case OPCODE_BRK: 517 return TGSI_OPCODE_BRK; 518 case OPCODE_CAL: 519 return TGSI_OPCODE_CAL; 520 case OPCODE_CMP: 521 return TGSI_OPCODE_CMP; 522 case OPCODE_CONT: 523 return TGSI_OPCODE_CONT; 524 case OPCODE_COS: 525 return TGSI_OPCODE_COS; 526 case OPCODE_DDX: 527 return TGSI_OPCODE_DDX; 528 case OPCODE_DDY: 529 return TGSI_OPCODE_DDY; 530 case OPCODE_DP2: 531 return TGSI_OPCODE_DP2; 532 case OPCODE_DP2A: 533 return TGSI_OPCODE_DP2A; 534 case OPCODE_DP3: 535 return TGSI_OPCODE_DP3; 536 case OPCODE_DP4: 537 return TGSI_OPCODE_DP4; 538 case OPCODE_DPH: 539 return TGSI_OPCODE_DPH; 540 case OPCODE_DST: 541 return TGSI_OPCODE_DST; 542 case OPCODE_ELSE: 543 return TGSI_OPCODE_ELSE; 544 case OPCODE_EMIT_VERTEX: 545 return TGSI_OPCODE_EMIT; 546 case OPCODE_END_PRIMITIVE: 547 return TGSI_OPCODE_ENDPRIM; 548 case OPCODE_ENDIF: 549 return TGSI_OPCODE_ENDIF; 550 case OPCODE_ENDLOOP: 551 return TGSI_OPCODE_ENDLOOP; 552 case OPCODE_ENDSUB: 553 return TGSI_OPCODE_ENDSUB; 554 case OPCODE_EX2: 555 return TGSI_OPCODE_EX2; 556 case OPCODE_EXP: 557 return TGSI_OPCODE_EXP; 558 case OPCODE_FLR: 559 return TGSI_OPCODE_FLR; 560 case OPCODE_FRC: 561 return TGSI_OPCODE_FRC; 562 case OPCODE_IF: 563 return TGSI_OPCODE_IF; 564 case OPCODE_TRUNC: 565 return TGSI_OPCODE_TRUNC; 566 case OPCODE_KIL: 567 return TGSI_OPCODE_KIL; 568 case OPCODE_KIL_NV: 569 return TGSI_OPCODE_KILP; 570 case OPCODE_LG2: 571 return TGSI_OPCODE_LG2; 572 case OPCODE_LOG: 573 return TGSI_OPCODE_LOG; 574 case OPCODE_LIT: 575 return TGSI_OPCODE_LIT; 576 case OPCODE_LRP: 577 return TGSI_OPCODE_LRP; 578 case OPCODE_MAD: 579 return TGSI_OPCODE_MAD; 580 case OPCODE_MAX: 581 return TGSI_OPCODE_MAX; 582 case OPCODE_MIN: 583 return TGSI_OPCODE_MIN; 584 case OPCODE_MOV: 585 return TGSI_OPCODE_MOV; 586 case OPCODE_MUL: 587 return TGSI_OPCODE_MUL; 588 case OPCODE_NOP: 589 return TGSI_OPCODE_NOP; 590 case OPCODE_NRM3: 591 return TGSI_OPCODE_NRM; 592 case OPCODE_NRM4: 593 return TGSI_OPCODE_NRM4; 594 case OPCODE_POW: 595 return TGSI_OPCODE_POW; 596 case OPCODE_RCP: 597 return TGSI_OPCODE_RCP; 598 case OPCODE_RET: 599 return TGSI_OPCODE_RET; 600 case OPCODE_RSQ: 601 return TGSI_OPCODE_RSQ; 602 case OPCODE_SCS: 603 return TGSI_OPCODE_SCS; 604 case OPCODE_SEQ: 605 return TGSI_OPCODE_SEQ; 606 case OPCODE_SGE: 607 return TGSI_OPCODE_SGE; 608 case OPCODE_SGT: 609 return TGSI_OPCODE_SGT; 610 case OPCODE_SIN: 611 return TGSI_OPCODE_SIN; 612 case OPCODE_SLE: 613 return TGSI_OPCODE_SLE; 614 case OPCODE_SLT: 615 return TGSI_OPCODE_SLT; 616 case OPCODE_SNE: 617 return TGSI_OPCODE_SNE; 618 case OPCODE_SSG: 619 return TGSI_OPCODE_SSG; 620 case OPCODE_SUB: 621 return TGSI_OPCODE_SUB; 622 case OPCODE_TEX: 623 return TGSI_OPCODE_TEX; 624 case OPCODE_TXB: 625 return TGSI_OPCODE_TXB; 626 case OPCODE_TXD: 627 return TGSI_OPCODE_TXD; 628 case OPCODE_TXL: 629 return TGSI_OPCODE_TXL; 630 case OPCODE_TXP: 631 return TGSI_OPCODE_TXP; 632 case OPCODE_XPD: 633 return TGSI_OPCODE_XPD; 634 case OPCODE_END: 635 return TGSI_OPCODE_END; 636 default: 637 debug_assert( 0 ); 638 return TGSI_OPCODE_NOP; 639 } 640} 641 642 643static void 644compile_instruction( 645 struct st_translate *t, 646 const struct prog_instruction *inst ) 647{ 648 struct ureg_program *ureg = t->ureg; 649 GLuint i; 650 struct ureg_dst dst[1]; 651 struct ureg_src src[4]; 652 unsigned num_dst; 653 unsigned num_src; 654 655 num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); 656 num_src = _mesa_num_inst_src_regs( inst->Opcode ); 657 658 if (num_dst) 659 dst[0] = translate_dst( t, 660 &inst->DstReg, 661 inst->SaturateMode ); 662 663 for (i = 0; i < num_src; i++) 664 src[i] = translate_src( t, &inst->SrcReg[i] ); 665 666 switch( inst->Opcode ) { 667 case OPCODE_SWZ: 668 emit_swz( t, dst[0], &inst->SrcReg[0] ); 669 return; 670 671 case OPCODE_BGNLOOP: 672 case OPCODE_CAL: 673 case OPCODE_ELSE: 674 case OPCODE_ENDLOOP: 675 case OPCODE_IF: 676 debug_assert(num_dst == 0); 677 ureg_label_insn( ureg, 678 translate_opcode( inst->Opcode ), 679 src, num_src, 680 get_label( t, inst->BranchTarget )); 681 return; 682 683 case OPCODE_TEX: 684 case OPCODE_TXB: 685 case OPCODE_TXD: 686 case OPCODE_TXL: 687 case OPCODE_TXP: 688 src[num_src++] = t->samplers[inst->TexSrcUnit]; 689 ureg_tex_insn( ureg, 690 translate_opcode( inst->Opcode ), 691 dst, num_dst, 692 translate_texture_target( inst->TexSrcTarget, 693 inst->TexShadow ), 694 src, num_src ); 695 return; 696 697 case OPCODE_SCS: 698 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); 699 ureg_insn( ureg, 700 translate_opcode( inst->Opcode ), 701 dst, num_dst, 702 src, num_src ); 703 break; 704 705 case OPCODE_XPD: 706 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); 707 ureg_insn( ureg, 708 translate_opcode( inst->Opcode ), 709 dst, num_dst, 710 src, num_src ); 711 break; 712 713 case OPCODE_NOISE1: 714 case OPCODE_NOISE2: 715 case OPCODE_NOISE3: 716 case OPCODE_NOISE4: 717 /* At some point, a motivated person could add a better 718 * implementation of noise. Currently not even the nvidia 719 * binary drivers do anything more than this. In any case, the 720 * place to do this is in the GL state tracker, not the poor 721 * driver. 722 */ 723 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); 724 break; 725 726 case OPCODE_DDY: 727 emit_ddy( t, dst[0], &inst->SrcReg[0] ); 728 break; 729 730 default: 731 ureg_insn( ureg, 732 translate_opcode( inst->Opcode ), 733 dst, num_dst, 734 src, num_src ); 735 break; 736 } 737} 738 739 740/** 741 * Emit the TGSI instructions to adjust the WPOS pixel center convention 742 */ 743static void 744emit_adjusted_wpos( struct st_translate *t, 745 const struct gl_program *program, GLfloat value) 746{ 747 struct ureg_program *ureg = t->ureg; 748 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 749 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 750 751 /* Note that we bias X and Y and pass Z and W through unchanged. 752 * The shader might also use gl_FragCoord.w and .z. 753 */ 754 ureg_ADD(ureg, wpos_temp, wpos_input, 755 ureg_imm4f(ureg, value, value, 0.0f, 0.0f)); 756 757 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 758} 759 760 761/** 762 * Emit the TGSI instructions for inverting the WPOS y coordinate. 763 * This code is unavoidable because it also depends on whether 764 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 765 */ 766static void 767emit_wpos_inversion( struct st_translate *t, 768 const struct gl_program *program, 769 boolean invert) 770{ 771 struct ureg_program *ureg = t->ureg; 772 773 /* Fragment program uses fragment position input. 774 * Need to replace instances of INPUT[WPOS] with temp T 775 * where T = INPUT[WPOS] by y is inverted. 776 */ 777 static const gl_state_index wposTransformState[STATE_LENGTH] 778 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 }; 779 780 /* XXX: note we are modifying the incoming shader here! Need to 781 * do this before emitting the constant decls below, or this 782 * will be missed: 783 */ 784 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 785 wposTransformState); 786 787 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 788 struct ureg_dst wpos_temp; 789 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 790 791 /* MOV wpos_temp, input[wpos] 792 */ 793 if (wpos_input.File == TGSI_FILE_TEMPORARY) 794 wpos_temp = ureg_dst(wpos_input); 795 else { 796 wpos_temp = ureg_DECL_temporary( ureg ); 797 ureg_MOV( ureg, wpos_temp, wpos_input ); 798 } 799 800 if (invert) { 801 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 802 */ 803 ureg_MAD( ureg, 804 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 805 wpos_input, 806 ureg_scalar(wpostrans, 0), 807 ureg_scalar(wpostrans, 1)); 808 } else { 809 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 810 */ 811 ureg_MAD( ureg, 812 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 813 wpos_input, 814 ureg_scalar(wpostrans, 2), 815 ureg_scalar(wpostrans, 3)); 816 } 817 818 /* Use wpos_temp as position input from here on: 819 */ 820 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 821} 822 823 824/** 825 * Emit fragment position/ooordinate code. 826 */ 827static void 828emit_wpos(struct st_context *st, 829 struct st_translate *t, 830 const struct gl_program *program, 831 struct ureg_program *ureg) 832{ 833 const struct gl_fragment_program *fp = 834 (const struct gl_fragment_program *) program; 835 struct pipe_screen *pscreen = st->pipe->screen; 836 boolean invert = FALSE; 837 838 if (fp->OriginUpperLeft) { 839 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 840 } 841 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 842 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 843 invert = TRUE; 844 } 845 else 846 assert(0); 847 } 848 else { 849 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 850 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 851 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 852 invert = TRUE; 853 else 854 assert(0); 855 } 856 857 if (fp->PixelCenterInteger) { 858 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 859 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 860 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 861 emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f); 862 else 863 assert(0); 864 } 865 else { 866 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 867 } 868 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 869 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 870 emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f); 871 } 872 else 873 assert(0); 874 } 875 876 /* we invert after adjustment so that we avoid the MOV to temporary, 877 * and reuse the adjustment ADD instead */ 878 emit_wpos_inversion(t, program, invert); 879} 880 881 882/** 883 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 884 * TGSI uses +1 for front, -1 for back. 885 * This function converts the TGSI value to the GL value. Simply clamping/ 886 * saturating the value to [0,1] does the job. 887 */ 888static void 889emit_face_var( struct st_translate *t, 890 const struct gl_program *program ) 891{ 892 struct ureg_program *ureg = t->ureg; 893 struct ureg_dst face_temp = ureg_DECL_temporary( ureg ); 894 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 895 896 /* MOV_SAT face_temp, input[face] 897 */ 898 face_temp = ureg_saturate( face_temp ); 899 ureg_MOV( ureg, face_temp, face_input ); 900 901 /* Use face_temp as face input from here on: 902 */ 903 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 904} 905 906 907static void 908emit_edgeflags( struct st_translate *t, 909 const struct gl_program *program ) 910{ 911 struct ureg_program *ureg = t->ureg; 912 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 913 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 914 915 ureg_MOV( ureg, edge_dst, edge_src ); 916} 917 918 919/** 920 * Translate Mesa program to TGSI format. 921 * \param program the program to translate 922 * \param numInputs number of input registers used 923 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 924 * input indexes 925 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 926 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 927 * each input 928 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 929 * \param numOutputs number of output registers used 930 * \param outputMapping maps Mesa fragment program outputs to TGSI 931 * generic outputs 932 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 933 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 934 * each output 935 * 936 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 937 */ 938enum pipe_error 939st_translate_mesa_program( 940 struct gl_context *ctx, 941 uint procType, 942 struct ureg_program *ureg, 943 const struct gl_program *program, 944 GLuint numInputs, 945 const GLuint inputMapping[], 946 const ubyte inputSemanticName[], 947 const ubyte inputSemanticIndex[], 948 const GLuint interpMode[], 949 GLuint numOutputs, 950 const GLuint outputMapping[], 951 const ubyte outputSemanticName[], 952 const ubyte outputSemanticIndex[], 953 boolean passthrough_edgeflags ) 954{ 955 struct st_translate translate, *t; 956 unsigned i; 957 enum pipe_error ret = PIPE_OK; 958 959 assert(numInputs <= Elements(t->inputs)); 960 assert(numOutputs <= Elements(t->outputs)); 961 962 t = &translate; 963 memset(t, 0, sizeof *t); 964 965 t->procType = procType; 966 t->inputMapping = inputMapping; 967 t->outputMapping = outputMapping; 968 t->ureg = ureg; 969 t->pointSizeOutIndex = -1; 970 t->prevInstWrotePointSize = GL_FALSE; 971 972 /*_mesa_print_program(program);*/ 973 974 /* 975 * Declare input attributes. 976 */ 977 if (procType == TGSI_PROCESSOR_FRAGMENT) { 978 for (i = 0; i < numInputs; i++) { 979 if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) { 980 t->inputs[i] = ureg_DECL_fs_input_cyl(ureg, 981 inputSemanticName[i], 982 inputSemanticIndex[i], 983 interpMode[i], 984 TGSI_CYLINDRICAL_WRAP_X); 985 } 986 else { 987 t->inputs[i] = ureg_DECL_fs_input(ureg, 988 inputSemanticName[i], 989 inputSemanticIndex[i], 990 interpMode[i]); 991 } 992 } 993 994 if (program->InputsRead & FRAG_BIT_WPOS) { 995 /* Must do this after setting up t->inputs, and before 996 * emitting constant references, below: 997 */ 998 emit_wpos(st_context(ctx), t, program, ureg); 999 } 1000 1001 if (program->InputsRead & FRAG_BIT_FACE) { 1002 emit_face_var( t, program ); 1003 } 1004 1005 /* 1006 * Declare output attributes. 1007 */ 1008 for (i = 0; i < numOutputs; i++) { 1009 switch (outputSemanticName[i]) { 1010 case TGSI_SEMANTIC_POSITION: 1011 t->outputs[i] = ureg_DECL_output( ureg, 1012 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 1013 outputSemanticIndex[i] ); 1014 1015 t->outputs[i] = ureg_writemask( t->outputs[i], 1016 TGSI_WRITEMASK_Z ); 1017 break; 1018 case TGSI_SEMANTIC_STENCIL: 1019 t->outputs[i] = ureg_DECL_output( ureg, 1020 TGSI_SEMANTIC_STENCIL, /* Stencil */ 1021 outputSemanticIndex[i] ); 1022 t->outputs[i] = ureg_writemask( t->outputs[i], 1023 TGSI_WRITEMASK_Y ); 1024 break; 1025 case TGSI_SEMANTIC_COLOR: 1026 t->outputs[i] = ureg_DECL_output( ureg, 1027 TGSI_SEMANTIC_COLOR, 1028 outputSemanticIndex[i] ); 1029 break; 1030 default: 1031 debug_assert(0); 1032 return 0; 1033 } 1034 } 1035 } 1036 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 1037 for (i = 0; i < numInputs; i++) { 1038 t->inputs[i] = ureg_DECL_gs_input(ureg, 1039 i, 1040 inputSemanticName[i], 1041 inputSemanticIndex[i]); 1042 } 1043 1044 for (i = 0; i < numOutputs; i++) { 1045 t->outputs[i] = ureg_DECL_output( ureg, 1046 outputSemanticName[i], 1047 outputSemanticIndex[i] ); 1048 } 1049 } 1050 else { 1051 assert(procType == TGSI_PROCESSOR_VERTEX); 1052 1053 for (i = 0; i < numInputs; i++) { 1054 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 1055 } 1056 1057 for (i = 0; i < numOutputs; i++) { 1058 t->outputs[i] = ureg_DECL_output( ureg, 1059 outputSemanticName[i], 1060 outputSemanticIndex[i] ); 1061 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) { 1062 /* Writing to the point size result register requires special 1063 * handling to implement clamping. 1064 */ 1065 static const gl_state_index pointSizeClampState[STATE_LENGTH] 1066 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 }; 1067 /* XXX: note we are modifying the incoming shader here! Need to 1068 * do this before emitting the constant decls below, or this 1069 * will be missed: 1070 */ 1071 unsigned pointSizeClampConst = 1072 _mesa_add_state_reference(program->Parameters, 1073 pointSizeClampState); 1074 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); 1075 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); 1076 t->pointSizeResult = t->outputs[i]; 1077 t->pointSizeOutIndex = i; 1078 t->outputs[i] = psizregtemp; 1079 } 1080 } 1081 if (passthrough_edgeflags) 1082 emit_edgeflags( t, program ); 1083 } 1084 1085 /* Declare address register. 1086 */ 1087 if (program->NumAddressRegs > 0) { 1088 debug_assert( program->NumAddressRegs == 1 ); 1089 t->address[0] = ureg_DECL_address( ureg ); 1090 } 1091 1092 if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) { 1093 /* If temps are accessed with indirect addressing, declare temporaries 1094 * in sequential order. Else, we declare them on demand elsewhere. 1095 */ 1096 for (i = 0; i < program->NumTemporaries; i++) { 1097 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 1098 t->temps[i] = ureg_DECL_temporary( t->ureg ); 1099 } 1100 } 1101 1102 /* Emit constants and immediates. Mesa uses a single index space 1103 * for these, so we put all the translated regs in t->constants. 1104 */ 1105 if (program->Parameters) { 1106 t->constants = CALLOC( program->Parameters->NumParameters, 1107 sizeof t->constants[0] ); 1108 if (t->constants == NULL) { 1109 ret = PIPE_ERROR_OUT_OF_MEMORY; 1110 goto out; 1111 } 1112 1113 for (i = 0; i < program->Parameters->NumParameters; i++) { 1114 switch (program->Parameters->Parameters[i].Type) { 1115 case PROGRAM_ENV_PARAM: 1116 case PROGRAM_LOCAL_PARAM: 1117 case PROGRAM_STATE_VAR: 1118 case PROGRAM_NAMED_PARAM: 1119 case PROGRAM_UNIFORM: 1120 t->constants[i] = ureg_DECL_constant( ureg, i ); 1121 break; 1122 1123 /* Emit immediates only when there's no indirect addressing of 1124 * the const buffer. 1125 * FIXME: Be smarter and recognize param arrays: 1126 * indirect addressing is only valid within the referenced 1127 * array. 1128 */ 1129 case PROGRAM_CONSTANT: 1130 if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST) 1131 t->constants[i] = ureg_DECL_constant( ureg, i ); 1132 else 1133 t->constants[i] = 1134 ureg_DECL_immediate( ureg, 1135 program->Parameters->ParameterValues[i], 1136 4 ); 1137 break; 1138 default: 1139 break; 1140 } 1141 } 1142 } 1143 1144 /* texture samplers */ 1145 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 1146 if (program->SamplersUsed & (1 << i)) { 1147 t->samplers[i] = ureg_DECL_sampler( ureg, i ); 1148 } 1149 } 1150 1151 /* Emit each instruction in turn: 1152 */ 1153 for (i = 0; i < program->NumInstructions; i++) { 1154 set_insn_start( t, ureg_get_instruction_number( ureg )); 1155 compile_instruction( t, &program->Instructions[i] ); 1156 1157 if (t->prevInstWrotePointSize && program->Id) { 1158 /* The previous instruction wrote to the (fake) vertex point size 1159 * result register. Now we need to clamp that value to the min/max 1160 * point size range, putting the result into the real point size 1161 * register. 1162 * Note that we can't do this easily at the end of program due to 1163 * possible early return. 1164 */ 1165 set_insn_start( t, ureg_get_instruction_number( ureg )); 1166 ureg_MAX( t->ureg, 1167 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), 1168 ureg_src(t->outputs[t->pointSizeOutIndex]), 1169 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 1170 ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), 1171 ureg_src(t->outputs[t->pointSizeOutIndex]), 1172 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 1173 } 1174 t->prevInstWrotePointSize = GL_FALSE; 1175 } 1176 1177 /* Fix up all emitted labels: 1178 */ 1179 for (i = 0; i < t->labels_count; i++) { 1180 ureg_fixup_label( ureg, 1181 t->labels[i].token, 1182 t->insn[t->labels[i].branch_target] ); 1183 } 1184 1185out: 1186 FREE(t->insn); 1187 FREE(t->labels); 1188 FREE(t->constants); 1189 1190 if (t->error) { 1191 debug_printf("%s: translate error flag set\n", __FUNCTION__); 1192 } 1193 1194 return ret; 1195} 1196 1197 1198/** 1199 * Tokens cannot be free with free otherwise the builtin gallium 1200 * malloc debugging will get confused. 1201 */ 1202void 1203st_free_tokens(const struct tgsi_token *tokens) 1204{ 1205 FREE((void *)tokens); 1206} 1207