st_mesa_to_tgsi.c revision f3b215cba2bca92d6582cc0c34702b73289f909c
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * \author 30 * Michal Krol 31 */ 32 33#include "pipe/p_compiler.h" 34#include "pipe/p_shader_tokens.h" 35#include "tgsi/tgsi_parse.h" 36#include "tgsi/tgsi_build.h" 37#include "tgsi/tgsi_util.h" 38#include "tgsi/tgsi_dump.h" 39#include "tgsi/tgsi_sanity.h" 40#include "st_mesa_to_tgsi.h" 41#include "shader/prog_instruction.h" 42#include "shader/prog_parameter.h" 43#include "shader/prog_print.h" 44#include "util/u_debug.h" 45 46/* 47 * Map mesa register file to TGSI register file. 48 */ 49static GLuint 50map_register_file( 51 gl_register_file file, 52 GLuint index, 53 const GLuint immediateMapping[], 54 GLboolean indirectAccess ) 55{ 56 switch( file ) { 57 case PROGRAM_UNDEFINED: 58 return TGSI_FILE_NULL; 59 case PROGRAM_TEMPORARY: 60 return TGSI_FILE_TEMPORARY; 61 /*case PROGRAM_LOCAL_PARAM:*/ 62 /*case PROGRAM_ENV_PARAM:*/ 63 64 /* Because of the longstanding problem with mesa arb shaders 65 * where constants, immediates and state variables are all 66 * bundled together as PROGRAM_STATE_VAR, we can't tell from the 67 * mesa register file whether this is a CONSTANT or an 68 * IMMEDIATE, hence we need all the other information. 69 */ 70 case PROGRAM_STATE_VAR: 71 case PROGRAM_NAMED_PARAM: 72 case PROGRAM_UNIFORM: 73 if (!indirectAccess && immediateMapping && immediateMapping[index] != ~0) 74 return TGSI_FILE_IMMEDIATE; 75 else 76 return TGSI_FILE_CONSTANT; 77 case PROGRAM_CONSTANT: 78 if (indirectAccess) 79 return TGSI_FILE_CONSTANT; 80 assert(immediateMapping[index] != ~0); 81 return TGSI_FILE_IMMEDIATE; 82 case PROGRAM_INPUT: 83 return TGSI_FILE_INPUT; 84 case PROGRAM_OUTPUT: 85 return TGSI_FILE_OUTPUT; 86 case PROGRAM_ADDRESS: 87 return TGSI_FILE_ADDRESS; 88 default: 89 assert( 0 ); 90 return TGSI_FILE_NULL; 91 } 92} 93 94/** 95 * Map mesa register file index to TGSI index. 96 * Take special care when processing input and output indices. 97 * \param file one of TGSI_FILE_x 98 * \param index the mesa register file index 99 * \param inputMapping maps Mesa input indexes to TGSI input indexes 100 * \param outputMapping maps Mesa output indexes to TGSI output indexes 101 */ 102static GLuint 103map_register_file_index( 104 GLuint procType, 105 GLuint file, 106 GLuint index, 107 GLuint *swizzle, 108 const GLuint inputMapping[], 109 const GLuint outputMapping[], 110 const GLuint immediateMapping[], 111 GLboolean indirectAccess ) 112{ 113 switch( file ) { 114 case TGSI_FILE_INPUT: 115 /* inputs are mapped according to the user-defined map */ 116 return inputMapping[index]; 117 118 case TGSI_FILE_OUTPUT: 119 return outputMapping[index]; 120 121 case TGSI_FILE_IMMEDIATE: 122 if (indirectAccess) 123 return index; 124 assert(immediateMapping[index] != ~0); 125 return immediateMapping[index]; 126 127 default: 128 return index; 129 } 130} 131 132/* 133 * Map mesa texture target to TGSI texture target. 134 */ 135static GLuint 136map_texture_target( 137 GLuint textarget, 138 GLboolean shadow ) 139{ 140 switch( textarget ) { 141 case TEXTURE_1D_INDEX: 142 if (shadow) 143 return TGSI_TEXTURE_SHADOW1D; 144 else 145 return TGSI_TEXTURE_1D; 146 case TEXTURE_2D_INDEX: 147 if (shadow) 148 return TGSI_TEXTURE_SHADOW2D; 149 else 150 return TGSI_TEXTURE_2D; 151 case TEXTURE_3D_INDEX: 152 return TGSI_TEXTURE_3D; 153 case TEXTURE_CUBE_INDEX: 154 return TGSI_TEXTURE_CUBE; 155 case TEXTURE_RECT_INDEX: 156 if (shadow) 157 return TGSI_TEXTURE_SHADOWRECT; 158 else 159 return TGSI_TEXTURE_RECT; 160 default: 161 assert( 0 ); 162 } 163 164 return TGSI_TEXTURE_1D; 165} 166 167static GLuint 168convert_sat( 169 GLuint sat ) 170{ 171 switch( sat ) { 172 case SATURATE_OFF: 173 return TGSI_SAT_NONE; 174 case SATURATE_ZERO_ONE: 175 return TGSI_SAT_ZERO_ONE; 176 case SATURATE_PLUS_MINUS_ONE: 177 return TGSI_SAT_MINUS_PLUS_ONE; 178 default: 179 assert( 0 ); 180 return TGSI_SAT_NONE; 181 } 182} 183 184static GLuint 185convert_writemask( 186 GLuint writemask ) 187{ 188 assert( WRITEMASK_X == TGSI_WRITEMASK_X ); 189 assert( WRITEMASK_Y == TGSI_WRITEMASK_Y ); 190 assert( WRITEMASK_Z == TGSI_WRITEMASK_Z ); 191 assert( WRITEMASK_W == TGSI_WRITEMASK_W ); 192 assert( (writemask & ~TGSI_WRITEMASK_XYZW) == 0 ); 193 194 return writemask; 195} 196 197static struct tgsi_full_immediate 198make_immediate(const float *value, uint size) 199{ 200 struct tgsi_full_immediate imm; 201 unsigned i; 202 203 imm = tgsi_default_full_immediate(); 204 imm.Immediate.NrTokens += size; 205 imm.Immediate.DataType = TGSI_IMM_FLOAT32; 206 207 for (i = 0; i < size; i++) 208 imm.u[i].Float = value[i]; 209 210 return imm; 211} 212 213static void 214compile_instruction( 215 const struct prog_instruction *inst, 216 struct tgsi_full_instruction *fullinst, 217 const GLuint inputMapping[], 218 const GLuint outputMapping[], 219 const GLuint immediateMapping[], 220 GLboolean indirectAccess, 221 GLuint preamble_size, 222 GLuint procType, 223 GLboolean *insideSubroutine, 224 GLint wposTemp) 225{ 226 GLuint i; 227 struct tgsi_full_dst_register *fulldst; 228 struct tgsi_full_src_register *fullsrc; 229 230 *fullinst = tgsi_default_full_instruction(); 231 232 fullinst->Instruction.Saturate = convert_sat( inst->SaturateMode ); 233 fullinst->Instruction.NumDstRegs = _mesa_num_inst_dst_regs( inst->Opcode ); 234 fullinst->Instruction.NumSrcRegs = _mesa_num_inst_src_regs( inst->Opcode ); 235 236 fulldst = &fullinst->FullDstRegisters[0]; 237 fulldst->DstRegister.File = map_register_file( inst->DstReg.File, 0, NULL, GL_FALSE ); 238 fulldst->DstRegister.Index = map_register_file_index( 239 procType, 240 fulldst->DstRegister.File, 241 inst->DstReg.Index, 242 NULL, 243 inputMapping, 244 outputMapping, 245 NULL, 246 GL_FALSE ); 247 fulldst->DstRegister.WriteMask = convert_writemask( inst->DstReg.WriteMask ); 248 if (inst->DstReg.RelAddr) { 249 fulldst->DstRegister.Indirect = 1; 250 fulldst->DstRegisterInd.File = TGSI_FILE_ADDRESS; 251 fulldst->DstRegisterInd.Index = 0; 252 } 253 254 for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { 255 GLuint j; 256 GLuint swizzle = inst->SrcReg[i].Swizzle; 257 258 fullsrc = &fullinst->FullSrcRegisters[i]; 259 260 if (procType == TGSI_PROCESSOR_FRAGMENT && 261 inst->SrcReg[i].File == PROGRAM_INPUT && 262 inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { 263 /* special case of INPUT[WPOS] */ 264 fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY; 265 fullsrc->SrcRegister.Index = wposTemp; 266 } 267 else { 268 /* any other src register */ 269 fullsrc->SrcRegister.File = map_register_file( 270 inst->SrcReg[i].File, 271 inst->SrcReg[i].Index, 272 immediateMapping, 273 indirectAccess ); 274 fullsrc->SrcRegister.Index = map_register_file_index( 275 procType, 276 fullsrc->SrcRegister.File, 277 inst->SrcReg[i].Index, 278 &swizzle, 279 inputMapping, 280 outputMapping, 281 immediateMapping, 282 indirectAccess ); 283 } 284 285 /* swizzle (ext swizzle also depends on negation) */ 286 { 287 GLuint swz[4]; 288 GLboolean extended = (inst->SrcReg[i].Negate != NEGATE_NONE && 289 inst->SrcReg[i].Negate != NEGATE_XYZW); 290 for( j = 0; j < 4; j++ ) { 291 swz[j] = GET_SWZ( swizzle, j ); 292 if (swz[j] > SWIZZLE_W) 293 extended = GL_TRUE; 294 } 295 if (extended) { 296 for (j = 0; j < 4; j++) { 297 tgsi_util_set_src_register_extswizzle(&fullsrc->SrcRegisterExtSwz, 298 swz[j], j); 299 } 300 } 301 else { 302 for (j = 0; j < 4; j++) { 303 tgsi_util_set_src_register_swizzle(&fullsrc->SrcRegister, 304 swz[j], j); 305 } 306 } 307 } 308 309 if( inst->SrcReg[i].Negate == NEGATE_XYZW ) { 310 fullsrc->SrcRegister.Negate = 1; 311 } 312 else if( inst->SrcReg[i].Negate != NEGATE_NONE ) { 313 if( inst->SrcReg[i].Negate & NEGATE_X ) { 314 fullsrc->SrcRegisterExtSwz.NegateX = 1; 315 } 316 if( inst->SrcReg[i].Negate & NEGATE_Y ) { 317 fullsrc->SrcRegisterExtSwz.NegateY = 1; 318 } 319 if( inst->SrcReg[i].Negate & NEGATE_Z ) { 320 fullsrc->SrcRegisterExtSwz.NegateZ = 1; 321 } 322 if( inst->SrcReg[i].Negate & NEGATE_W ) { 323 fullsrc->SrcRegisterExtSwz.NegateW = 1; 324 } 325 } 326 327 if( inst->SrcReg[i].Abs ) { 328 fullsrc->SrcRegisterExtMod.Absolute = 1; 329 } 330 331 if( inst->SrcReg[i].RelAddr ) { 332 fullsrc->SrcRegister.Indirect = 1; 333 334 fullsrc->SrcRegisterInd.File = TGSI_FILE_ADDRESS; 335 fullsrc->SrcRegisterInd.Index = 0; 336 } 337 } 338 339 switch( inst->Opcode ) { 340 case OPCODE_ARL: 341 fullinst->Instruction.Opcode = TGSI_OPCODE_ARL; 342 break; 343 case OPCODE_ABS: 344 fullinst->Instruction.Opcode = TGSI_OPCODE_ABS; 345 break; 346 case OPCODE_ADD: 347 fullinst->Instruction.Opcode = TGSI_OPCODE_ADD; 348 break; 349 case OPCODE_BGNLOOP: 350 fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP; 351 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; 352 break; 353 case OPCODE_BGNSUB: 354 fullinst->Instruction.Opcode = TGSI_OPCODE_BGNSUB; 355 *insideSubroutine = GL_TRUE; 356 break; 357 case OPCODE_BRA: 358 fullinst->Instruction.Opcode = TGSI_OPCODE_BRA; 359 break; 360 case OPCODE_BRK: 361 fullinst->Instruction.Opcode = TGSI_OPCODE_BRK; 362 break; 363 case OPCODE_CAL: 364 fullinst->Instruction.Opcode = TGSI_OPCODE_CAL; 365 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; 366 break; 367 case OPCODE_CMP: 368 fullinst->Instruction.Opcode = TGSI_OPCODE_CMP; 369 break; 370 case OPCODE_CONT: 371 fullinst->Instruction.Opcode = TGSI_OPCODE_CONT; 372 break; 373 case OPCODE_COS: 374 fullinst->Instruction.Opcode = TGSI_OPCODE_COS; 375 break; 376 case OPCODE_DDX: 377 fullinst->Instruction.Opcode = TGSI_OPCODE_DDX; 378 break; 379 case OPCODE_DDY: 380 fullinst->Instruction.Opcode = TGSI_OPCODE_DDY; 381 break; 382 case OPCODE_DP2: 383 fullinst->Instruction.Opcode = TGSI_OPCODE_DP2; 384 break; 385 case OPCODE_DP2A: 386 fullinst->Instruction.Opcode = TGSI_OPCODE_DP2A; 387 break; 388 case OPCODE_DP3: 389 fullinst->Instruction.Opcode = TGSI_OPCODE_DP3; 390 break; 391 case OPCODE_DP4: 392 fullinst->Instruction.Opcode = TGSI_OPCODE_DP4; 393 break; 394 case OPCODE_DPH: 395 fullinst->Instruction.Opcode = TGSI_OPCODE_DPH; 396 break; 397 case OPCODE_DST: 398 fullinst->Instruction.Opcode = TGSI_OPCODE_DST; 399 break; 400 case OPCODE_ELSE: 401 fullinst->Instruction.Opcode = TGSI_OPCODE_ELSE; 402 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; 403 break; 404 case OPCODE_ENDIF: 405 fullinst->Instruction.Opcode = TGSI_OPCODE_ENDIF; 406 break; 407 case OPCODE_ENDLOOP: 408 fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP; 409 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; 410 break; 411 case OPCODE_ENDSUB: 412 fullinst->Instruction.Opcode = TGSI_OPCODE_ENDSUB; 413 *insideSubroutine = GL_FALSE; 414 break; 415 case OPCODE_EX2: 416 fullinst->Instruction.Opcode = TGSI_OPCODE_EX2; 417 break; 418 case OPCODE_EXP: 419 fullinst->Instruction.Opcode = TGSI_OPCODE_EXP; 420 break; 421 case OPCODE_FLR: 422 fullinst->Instruction.Opcode = TGSI_OPCODE_FLR; 423 break; 424 case OPCODE_FRC: 425 fullinst->Instruction.Opcode = TGSI_OPCODE_FRC; 426 break; 427 case OPCODE_IF: 428 fullinst->Instruction.Opcode = TGSI_OPCODE_IF; 429 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; 430 break; 431 case OPCODE_TRUNC: 432 fullinst->Instruction.Opcode = TGSI_OPCODE_TRUNC; 433 break; 434 case OPCODE_KIL: 435 /* conditional */ 436 fullinst->Instruction.Opcode = TGSI_OPCODE_KIL; 437 break; 438 case OPCODE_KIL_NV: 439 /* predicated */ 440 assert(inst->DstReg.CondMask == COND_TR); 441 fullinst->Instruction.Opcode = TGSI_OPCODE_KILP; 442 break; 443 case OPCODE_LG2: 444 fullinst->Instruction.Opcode = TGSI_OPCODE_LG2; 445 break; 446 case OPCODE_LOG: 447 fullinst->Instruction.Opcode = TGSI_OPCODE_LOG; 448 break; 449 case OPCODE_LIT: 450 fullinst->Instruction.Opcode = TGSI_OPCODE_LIT; 451 break; 452 case OPCODE_LRP: 453 fullinst->Instruction.Opcode = TGSI_OPCODE_LRP; 454 break; 455 case OPCODE_MAD: 456 fullinst->Instruction.Opcode = TGSI_OPCODE_MAD; 457 break; 458 case OPCODE_MAX: 459 fullinst->Instruction.Opcode = TGSI_OPCODE_MAX; 460 break; 461 case OPCODE_MIN: 462 fullinst->Instruction.Opcode = TGSI_OPCODE_MIN; 463 break; 464 case OPCODE_MOV: 465 fullinst->Instruction.Opcode = TGSI_OPCODE_MOV; 466 break; 467 case OPCODE_MUL: 468 fullinst->Instruction.Opcode = TGSI_OPCODE_MUL; 469 break; 470 case OPCODE_NOISE1: 471 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE1; 472 break; 473 case OPCODE_NOISE2: 474 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE2; 475 break; 476 case OPCODE_NOISE3: 477 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE3; 478 break; 479 case OPCODE_NOISE4: 480 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE4; 481 break; 482 case OPCODE_NOP: 483 fullinst->Instruction.Opcode = TGSI_OPCODE_NOP; 484 break; 485 case OPCODE_NRM3: 486 fullinst->Instruction.Opcode = TGSI_OPCODE_NRM; 487 break; 488 case OPCODE_NRM4: 489 fullinst->Instruction.Opcode = TGSI_OPCODE_NRM4; 490 break; 491 case OPCODE_POW: 492 fullinst->Instruction.Opcode = TGSI_OPCODE_POW; 493 break; 494 case OPCODE_RCP: 495 fullinst->Instruction.Opcode = TGSI_OPCODE_RCP; 496 break; 497 case OPCODE_RET: 498 /* If RET is used inside main (not a real subroutine) we may want 499 * to execute END instead of RET. TBD... 500 */ 501 if (1 /* *insideSubroutine */) { 502 fullinst->Instruction.Opcode = TGSI_OPCODE_RET; 503 } 504 else { 505 /* inside main() pseudo-function */ 506 fullinst->Instruction.Opcode = TGSI_OPCODE_END; 507 } 508 break; 509 case OPCODE_RSQ: 510 fullinst->Instruction.Opcode = TGSI_OPCODE_RSQ; 511 break; 512 case OPCODE_SCS: 513 fullinst->Instruction.Opcode = TGSI_OPCODE_SCS; 514 fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XY; 515 break; 516 case OPCODE_SEQ: 517 fullinst->Instruction.Opcode = TGSI_OPCODE_SEQ; 518 break; 519 case OPCODE_SGE: 520 fullinst->Instruction.Opcode = TGSI_OPCODE_SGE; 521 break; 522 case OPCODE_SGT: 523 fullinst->Instruction.Opcode = TGSI_OPCODE_SGT; 524 break; 525 case OPCODE_SIN: 526 fullinst->Instruction.Opcode = TGSI_OPCODE_SIN; 527 break; 528 case OPCODE_SLE: 529 fullinst->Instruction.Opcode = TGSI_OPCODE_SLE; 530 break; 531 case OPCODE_SLT: 532 fullinst->Instruction.Opcode = TGSI_OPCODE_SLT; 533 break; 534 case OPCODE_SNE: 535 fullinst->Instruction.Opcode = TGSI_OPCODE_SNE; 536 break; 537 case OPCODE_SSG: 538 fullinst->Instruction.Opcode = TGSI_OPCODE_SSG; 539 break; 540 case OPCODE_SUB: 541 fullinst->Instruction.Opcode = TGSI_OPCODE_SUB; 542 break; 543 case OPCODE_SWZ: 544 fullinst->Instruction.Opcode = TGSI_OPCODE_SWZ; 545 break; 546 case OPCODE_TEX: 547 /* ordinary texture lookup */ 548 fullinst->Instruction.Opcode = TGSI_OPCODE_TEX; 549 fullinst->Instruction.NumSrcRegs = 2; 550 fullinst->InstructionExtTexture.Texture = 551 map_texture_target( inst->TexSrcTarget, inst->TexShadow ); 552 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; 553 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; 554 break; 555 case OPCODE_TXB: 556 /* texture lookup with LOD bias */ 557 fullinst->Instruction.Opcode = TGSI_OPCODE_TXB; 558 fullinst->Instruction.NumSrcRegs = 2; 559 fullinst->InstructionExtTexture.Texture = 560 map_texture_target( inst->TexSrcTarget, inst->TexShadow ); 561 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; 562 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; 563 break; 564 case OPCODE_TXD: 565 /* texture lookup with explicit partial derivatives */ 566 fullinst->Instruction.Opcode = TGSI_OPCODE_TXD; 567 fullinst->Instruction.NumSrcRegs = 4; 568 fullinst->InstructionExtTexture.Texture = 569 map_texture_target( inst->TexSrcTarget, inst->TexShadow ); 570 /* src[0] = coord, src[1] = d[strq]/dx, src[2] = d[strq]/dy */ 571 fullinst->FullSrcRegisters[3].SrcRegister.File = TGSI_FILE_SAMPLER; 572 fullinst->FullSrcRegisters[3].SrcRegister.Index = inst->TexSrcUnit; 573 break; 574 case OPCODE_TXL: 575 /* texture lookup with explicit LOD */ 576 fullinst->Instruction.Opcode = TGSI_OPCODE_TXL; 577 fullinst->Instruction.NumSrcRegs = 2; 578 fullinst->InstructionExtTexture.Texture = 579 map_texture_target( inst->TexSrcTarget, inst->TexShadow ); 580 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; 581 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; 582 break; 583 case OPCODE_TXP: 584 /* texture lookup with divide by Q component */ 585 /* convert to TEX w/ special flag for division */ 586 fullinst->Instruction.Opcode = TGSI_OPCODE_TXP; 587 fullinst->Instruction.NumSrcRegs = 2; 588 fullinst->InstructionExtTexture.Texture = 589 map_texture_target( inst->TexSrcTarget, inst->TexShadow ); 590 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; 591 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; 592 break; 593 case OPCODE_XPD: 594 fullinst->Instruction.Opcode = TGSI_OPCODE_XPD; 595 fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XYZ; 596 break; 597 case OPCODE_END: 598 fullinst->Instruction.Opcode = TGSI_OPCODE_END; 599 break; 600 default: 601 assert( 0 ); 602 } 603} 604 605/** 606 * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens 607 */ 608static struct tgsi_full_declaration 609make_input_decl( 610 GLuint index, 611 GLboolean interpolate_info, 612 GLuint interpolate, 613 GLuint usage_mask, 614 GLboolean semantic_info, 615 GLuint semantic_name, 616 GLbitfield semantic_index, 617 GLbitfield input_flags) 618{ 619 struct tgsi_full_declaration decl; 620 621 assert(semantic_name < TGSI_SEMANTIC_COUNT); 622 623 decl = tgsi_default_full_declaration(); 624 decl.Declaration.File = TGSI_FILE_INPUT; 625 decl.Declaration.UsageMask = usage_mask; 626 decl.Declaration.Semantic = semantic_info; 627 decl.DeclarationRange.First = index; 628 decl.DeclarationRange.Last = index; 629 if (semantic_info) { 630 decl.Semantic.SemanticName = semantic_name; 631 decl.Semantic.SemanticIndex = semantic_index; 632 } 633 if (interpolate_info) { 634 decl.Declaration.Interpolate = interpolate; 635 } 636 if (input_flags & PROG_PARAM_BIT_CENTROID) 637 decl.Declaration.Centroid = 1; 638 if (input_flags & PROG_PARAM_BIT_INVARIANT) 639 decl.Declaration.Invariant = 1; 640 641 return decl; 642} 643 644/** 645 * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens 646 */ 647static struct tgsi_full_declaration 648make_output_decl( 649 GLuint index, 650 GLuint semantic_name, 651 GLuint semantic_index, 652 GLuint usage_mask, 653 GLbitfield output_flags) 654{ 655 struct tgsi_full_declaration decl; 656 657 assert(semantic_name < TGSI_SEMANTIC_COUNT); 658 659 decl = tgsi_default_full_declaration(); 660 decl.Declaration.File = TGSI_FILE_OUTPUT; 661 decl.Declaration.UsageMask = usage_mask; 662 decl.Declaration.Semantic = 1; 663 decl.DeclarationRange.First = index; 664 decl.DeclarationRange.Last = index; 665 decl.Semantic.SemanticName = semantic_name; 666 decl.Semantic.SemanticIndex = semantic_index; 667 if (output_flags & PROG_PARAM_BIT_CENTROID) 668 decl.Declaration.Centroid = 1; 669 if (output_flags & PROG_PARAM_BIT_INVARIANT) 670 decl.Declaration.Invariant = 1; 671 672 return decl; 673} 674 675 676static struct tgsi_full_declaration 677make_temp_decl( 678 GLuint start_index, 679 GLuint end_index ) 680{ 681 struct tgsi_full_declaration decl; 682 decl = tgsi_default_full_declaration(); 683 decl.Declaration.File = TGSI_FILE_TEMPORARY; 684 decl.DeclarationRange.First = start_index; 685 decl.DeclarationRange.Last = end_index; 686 return decl; 687} 688 689static struct tgsi_full_declaration 690make_addr_decl( 691 GLuint start_index, 692 GLuint end_index ) 693{ 694 struct tgsi_full_declaration decl; 695 696 decl = tgsi_default_full_declaration(); 697 decl.Declaration.File = TGSI_FILE_ADDRESS; 698 decl.DeclarationRange.First = start_index; 699 decl.DeclarationRange.Last = end_index; 700 return decl; 701} 702 703static struct tgsi_full_declaration 704make_sampler_decl(GLuint index) 705{ 706 struct tgsi_full_declaration decl; 707 decl = tgsi_default_full_declaration(); 708 decl.Declaration.File = TGSI_FILE_SAMPLER; 709 decl.DeclarationRange.First = index; 710 decl.DeclarationRange.Last = index; 711 return decl; 712} 713 714/** Reference into a constant buffer */ 715static struct tgsi_full_declaration 716make_constant_decl(GLuint first, GLuint last) 717{ 718 struct tgsi_full_declaration decl; 719 decl = tgsi_default_full_declaration(); 720 decl.Declaration.File = TGSI_FILE_CONSTANT; 721 decl.DeclarationRange.First = first; 722 decl.DeclarationRange.Last = last; 723 return decl; 724} 725 726 727 728/** 729 * Find the temporaries which are used in the given program. 730 */ 731static void 732find_temporaries(const struct gl_program *program, 733 GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) 734{ 735 GLuint i, j; 736 737 for (i = 0; i < MAX_PROGRAM_TEMPS; i++) 738 tempsUsed[i] = GL_FALSE; 739 740 for (i = 0; i < program->NumInstructions; i++) { 741 const struct prog_instruction *inst = program->Instructions + i; 742 const GLuint n = _mesa_num_inst_src_regs( inst->Opcode ); 743 for (j = 0; j < n; j++) { 744 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) 745 tempsUsed[inst->SrcReg[j].Index] = GL_TRUE; 746 if (inst->DstReg.File == PROGRAM_TEMPORARY) 747 tempsUsed[inst->DstReg.Index] = GL_TRUE; 748 } 749 } 750} 751 752 753/** 754 * Find an unused temporary in the tempsUsed array. 755 */ 756static int 757find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) 758{ 759 int i; 760 for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { 761 if (!tempsUsed[i]) { 762 tempsUsed[i] = GL_TRUE; 763 return i; 764 } 765 } 766 return -1; 767} 768 769 770/** helper for building simple TGSI instruction, one src register */ 771static void 772build_tgsi_instruction1(struct tgsi_full_instruction *inst, 773 int opcode, 774 int dstFile, int dstIndex, int writemask, 775 int srcFile1, int srcIndex1) 776{ 777 *inst = tgsi_default_full_instruction(); 778 779 inst->Instruction.Opcode = opcode; 780 781 inst->Instruction.NumDstRegs = 1; 782 inst->FullDstRegisters[0].DstRegister.File = dstFile; 783 inst->FullDstRegisters[0].DstRegister.Index = dstIndex; 784 inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; 785 786 inst->Instruction.NumSrcRegs = 1; 787 inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; 788 inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; 789} 790 791 792/** helper for building simple TGSI instruction, two src registers */ 793static void 794build_tgsi_instruction2(struct tgsi_full_instruction *inst, 795 int opcode, 796 int dstFile, int dstIndex, int writemask, 797 int srcFile1, int srcIndex1, 798 int srcFile2, int srcIndex2) 799{ 800 *inst = tgsi_default_full_instruction(); 801 802 inst->Instruction.Opcode = opcode; 803 804 inst->Instruction.NumDstRegs = 1; 805 inst->FullDstRegisters[0].DstRegister.File = dstFile; 806 inst->FullDstRegisters[0].DstRegister.Index = dstIndex; 807 inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; 808 809 inst->Instruction.NumSrcRegs = 2; 810 inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; 811 inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; 812 inst->FullSrcRegisters[1].SrcRegister.File = srcFile2; 813 inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2; 814} 815 816 817 818/** 819 * Emit the TGSI instructions for inverting the WPOS y coordinate. 820 */ 821static int 822emit_inverted_wpos(struct tgsi_token *tokens, 823 int wpos_temp, 824 int winsize_const, 825 int wpos_input, 826 struct tgsi_header *header, int maxTokens) 827{ 828 struct tgsi_full_instruction fullinst; 829 int ti = 0; 830 831 /* MOV wpos_temp.xzw, input[wpos]; */ 832 build_tgsi_instruction1(&fullinst, 833 TGSI_OPCODE_MOV, 834 TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW, 835 TGSI_FILE_INPUT, 0); 836 837 ti += tgsi_build_full_instruction(&fullinst, 838 &tokens[ti], 839 header, 840 maxTokens - ti); 841 842 /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */ 843 build_tgsi_instruction2(&fullinst, 844 TGSI_OPCODE_SUB, 845 TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y, 846 TGSI_FILE_CONSTANT, winsize_const, 847 TGSI_FILE_INPUT, wpos_input); 848 849 ti += tgsi_build_full_instruction(&fullinst, 850 &tokens[ti], 851 header, 852 maxTokens - ti); 853 854 return ti; 855} 856 857 858 859 860/** 861 * Translate Mesa program to TGSI format. 862 * \param program the program to translate 863 * \param numInputs number of input registers used 864 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 865 * input indexes 866 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 867 * \param inputSemanticIndex the semantic index (ex: which texcoord) for each input 868 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 869 870 * \param numOutputs number of output registers used 871 * \param outputMapping maps Mesa fragment program outputs to TGSI 872 * generic outputs 873 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 874 * \param outputSemanticIndex the semantic index (ex: which texcoord) for each output 875 * \param tokens array to store translated tokens in 876 * \param maxTokens size of the tokens array 877 * 878 * \return number of tokens placed in 'tokens' buffer, or zero if error 879 */ 880GLuint 881st_translate_mesa_program( 882 GLcontext *ctx, 883 uint procType, 884 const struct gl_program *program, 885 GLuint numInputs, 886 const GLuint inputMapping[], 887 const ubyte inputSemanticName[], 888 const ubyte inputSemanticIndex[], 889 const GLuint interpMode[], 890 const GLbitfield inputFlags[], 891 GLuint numOutputs, 892 const GLuint outputMapping[], 893 const ubyte outputSemanticName[], 894 const ubyte outputSemanticIndex[], 895 const GLbitfield outputFlags[], 896 struct tgsi_token *tokens, 897 GLuint maxTokens ) 898{ 899 GLuint i; 900 GLuint ti; /* token index */ 901 struct tgsi_header *header; 902 struct tgsi_processor *processor; 903 GLuint preamble_size = 0; 904 GLuint immediates[1000]; 905 GLuint numImmediates = 0; 906 GLboolean insideSubroutine = GL_FALSE; 907 GLboolean indirectAccess = GL_FALSE; 908 GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; 909 GLint wposTemp = -1, winHeightConst = -1; 910 911 assert(procType == TGSI_PROCESSOR_FRAGMENT || 912 procType == TGSI_PROCESSOR_VERTEX); 913 914 find_temporaries(program, tempsUsed); 915 916 if (procType == TGSI_PROCESSOR_FRAGMENT) { 917 if (program->InputsRead & FRAG_BIT_WPOS) { 918 /* Fragment program uses fragment position input. 919 * Need to replace instances of INPUT[WPOS] with temp T 920 * where T = INPUT[WPOS] by y is inverted. 921 */ 922 static const gl_state_index winSizeState[STATE_LENGTH] 923 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; 924 winHeightConst = _mesa_add_state_reference(program->Parameters, 925 winSizeState); 926 wposTemp = find_free_temporary(tempsUsed); 927 } 928 } 929 930 931 *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); 932 933 header = (struct tgsi_header *) &tokens[1]; 934 *header = tgsi_build_header(); 935 936 processor = (struct tgsi_processor *) &tokens[2]; 937 *processor = tgsi_build_processor( procType, header ); 938 939 ti = 3; 940 941 /* 942 * Declare input attributes. 943 */ 944 if (procType == TGSI_PROCESSOR_FRAGMENT) { 945 for (i = 0; i < numInputs; i++) { 946 struct tgsi_full_declaration fulldecl; 947 fulldecl = make_input_decl(i, 948 GL_TRUE, interpMode[i], 949 TGSI_WRITEMASK_XYZW, 950 GL_TRUE, inputSemanticName[i], 951 inputSemanticIndex[i], 952 inputFlags[i]); 953 ti += tgsi_build_full_declaration(&fulldecl, 954 &tokens[ti], 955 header, 956 maxTokens - ti ); 957 } 958 } 959 else { 960 /* vertex prog */ 961 /* XXX: this could probaby be merged with the clause above. 962 * the only difference is the semantic tags. 963 */ 964 for (i = 0; i < numInputs; i++) { 965 struct tgsi_full_declaration fulldecl; 966 fulldecl = make_input_decl(i, 967 GL_FALSE, 0, 968 TGSI_WRITEMASK_XYZW, 969 GL_FALSE, 0, 0, 970 inputFlags[i]); 971 ti += tgsi_build_full_declaration(&fulldecl, 972 &tokens[ti], 973 header, 974 maxTokens - ti ); 975 } 976 } 977 978 /* 979 * Declare output attributes. 980 */ 981 if (procType == TGSI_PROCESSOR_FRAGMENT) { 982 for (i = 0; i < numOutputs; i++) { 983 struct tgsi_full_declaration fulldecl; 984 switch (outputSemanticName[i]) { 985 case TGSI_SEMANTIC_POSITION: 986 fulldecl = make_output_decl(i, 987 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 988 outputSemanticIndex[i], 989 TGSI_WRITEMASK_Z, 990 outputFlags[i]); 991 break; 992 case TGSI_SEMANTIC_COLOR: 993 fulldecl = make_output_decl(i, 994 TGSI_SEMANTIC_COLOR, 995 outputSemanticIndex[i], 996 TGSI_WRITEMASK_XYZW, 997 outputFlags[i]); 998 break; 999 default: 1000 assert(0); 1001 return 0; 1002 } 1003 ti += tgsi_build_full_declaration(&fulldecl, 1004 &tokens[ti], 1005 header, 1006 maxTokens - ti ); 1007 } 1008 } 1009 else { 1010 /* vertex prog */ 1011 for (i = 0; i < numOutputs; i++) { 1012 struct tgsi_full_declaration fulldecl; 1013 fulldecl = make_output_decl(i, 1014 outputSemanticName[i], 1015 outputSemanticIndex[i], 1016 TGSI_WRITEMASK_XYZW, 1017 outputFlags[i]); 1018 ti += tgsi_build_full_declaration(&fulldecl, 1019 &tokens[ti], 1020 header, 1021 maxTokens - ti ); 1022 } 1023 } 1024 1025 /* temporary decls */ 1026 { 1027 GLboolean inside_range = GL_FALSE; 1028 GLuint start_range = 0; 1029 1030 tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE; 1031 for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) { 1032 if (tempsUsed[i] && !inside_range) { 1033 inside_range = GL_TRUE; 1034 start_range = i; 1035 } 1036 else if (!tempsUsed[i] && inside_range) { 1037 struct tgsi_full_declaration fulldecl; 1038 1039 inside_range = GL_FALSE; 1040 fulldecl = make_temp_decl( start_range, i - 1 ); 1041 ti += tgsi_build_full_declaration( 1042 &fulldecl, 1043 &tokens[ti], 1044 header, 1045 maxTokens - ti ); 1046 } 1047 } 1048 } 1049 1050 /* Declare address register. 1051 */ 1052 if (program->NumAddressRegs > 0) { 1053 struct tgsi_full_declaration fulldecl; 1054 1055 assert( program->NumAddressRegs == 1 ); 1056 1057 fulldecl = make_addr_decl( 0, 0 ); 1058 ti += tgsi_build_full_declaration( 1059 &fulldecl, 1060 &tokens[ti], 1061 header, 1062 maxTokens - ti ); 1063 1064 indirectAccess = GL_TRUE; 1065 } 1066 1067 /* immediates/literals */ 1068 memset(immediates, ~0, sizeof(immediates)); 1069 1070 /* Emit immediates only when there is no address register in use. 1071 * FIXME: Be smarter and recognize param arrays -- indirect addressing is 1072 * only valid within the referenced array. 1073 */ 1074 if (program->Parameters && !indirectAccess) { 1075 for (i = 0; i < program->Parameters->NumParameters; i++) { 1076 if (program->Parameters->Parameters[i].Type == PROGRAM_CONSTANT) { 1077 struct tgsi_full_immediate fullimm; 1078 1079 fullimm = make_immediate( program->Parameters->ParameterValues[i], 4 ); 1080 ti += tgsi_build_full_immediate( 1081 &fullimm, 1082 &tokens[ti], 1083 header, 1084 maxTokens - ti ); 1085 immediates[i] = numImmediates; 1086 numImmediates++; 1087 } 1088 } 1089 } 1090 1091 /* constant buffer refs */ 1092 if (program->Parameters) { 1093 GLint start = -1, end = -1; 1094 1095 for (i = 0; i < program->Parameters->NumParameters; i++) { 1096 GLboolean emit = (i == program->Parameters->NumParameters - 1); 1097 GLboolean matches; 1098 1099 switch (program->Parameters->Parameters[i].Type) { 1100 case PROGRAM_ENV_PARAM: 1101 case PROGRAM_STATE_VAR: 1102 case PROGRAM_NAMED_PARAM: 1103 case PROGRAM_UNIFORM: 1104 matches = GL_TRUE; 1105 break; 1106 case PROGRAM_CONSTANT: 1107 matches = indirectAccess; 1108 break; 1109 default: 1110 matches = GL_FALSE; 1111 } 1112 1113 if (matches) { 1114 if (start == -1) { 1115 /* begin a sequence */ 1116 start = i; 1117 end = i; 1118 } 1119 else { 1120 /* continue sequence */ 1121 end = i; 1122 } 1123 } 1124 else { 1125 if (start != -1) { 1126 /* end of sequence */ 1127 emit = GL_TRUE; 1128 } 1129 } 1130 1131 if (emit && start >= 0) { 1132 struct tgsi_full_declaration fulldecl; 1133 1134 fulldecl = make_constant_decl( start, end ); 1135 ti += tgsi_build_full_declaration( 1136 &fulldecl, 1137 &tokens[ti], 1138 header, 1139 maxTokens - ti ); 1140 start = end = -1; 1141 } 1142 } 1143 } 1144 1145 /* texture samplers */ 1146 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 1147 if (program->SamplersUsed & (1 << i)) { 1148 struct tgsi_full_declaration fulldecl; 1149 1150 fulldecl = make_sampler_decl( i ); 1151 ti += tgsi_build_full_declaration( 1152 &fulldecl, 1153 &tokens[ti], 1154 header, 1155 maxTokens - ti ); 1156 } 1157 } 1158 1159 /* invert WPOS fragment input */ 1160 if (wposTemp >= 0) { 1161 ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst, 1162 inputMapping[FRAG_ATTRIB_WPOS], 1163 header, maxTokens - ti); 1164 preamble_size = 2; /* two instructions added */ 1165 } 1166 1167 for (i = 0; i < program->NumInstructions; i++) { 1168 struct tgsi_full_instruction fullinst; 1169 1170 compile_instruction( 1171 &program->Instructions[i], 1172 &fullinst, 1173 inputMapping, 1174 outputMapping, 1175 immediates, 1176 indirectAccess, 1177 preamble_size, 1178 procType, 1179 &insideSubroutine, 1180 wposTemp); 1181 1182 ti += tgsi_build_full_instruction( 1183 &fullinst, 1184 &tokens[ti], 1185 header, 1186 maxTokens - ti ); 1187 } 1188 1189#if DEBUG 1190 if(!tgsi_sanity_check(tokens)) { 1191 debug_printf("Due to sanity check failure(s) above the following shader program is invalid:\n"); 1192 debug_printf("\nOriginal program:\n%s", program->String); 1193 debug_printf("\nMesa program:\n"); 1194 _mesa_print_program(program); 1195 debug_printf("\nTGSI program:\n"); 1196 tgsi_dump(tokens, 0); 1197 assert(0); 1198 } 1199#endif 1200 1201 return ti; 1202} 1203