brw_wm_fp.c revision 9e7903e492ad842481a166484e0474dd4f3100ba
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "main/glheader.h" 34#include "main/macros.h" 35#include "main/enums.h" 36#include "brw_context.h" 37#include "brw_wm.h" 38#include "brw_util.h" 39 40#include "shader/prog_parameter.h" 41#include "shader/prog_print.h" 42#include "shader/prog_statevars.h" 43 44 45#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS 46 47#define X 0 48#define Y 1 49#define Z 2 50#define W 3 51 52 53static const char *wm_opcode_strings[] = { 54 "PIXELXY", 55 "DELTAXY", 56 "PIXELW", 57 "LINTERP", 58 "PINTERP", 59 "CINTERP", 60 "WPOSXY", 61 "FB_WRITE" 62}; 63 64#if 0 65static const char *wm_file_strings[] = { 66 "PAYLOAD" 67}; 68#endif 69 70 71/*********************************************************************** 72 * Source regs 73 */ 74 75static struct prog_src_register src_reg(GLuint file, GLuint idx) 76{ 77 struct prog_src_register reg; 78 reg.File = file; 79 reg.Index = idx; 80 reg.Swizzle = SWIZZLE_NOOP; 81 reg.RelAddr = 0; 82 reg.NegateBase = 0; 83 reg.Abs = 0; 84 reg.NegateAbs = 0; 85 return reg; 86} 87 88static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) 89{ 90 return src_reg(dst.File, dst.Index); 91} 92 93static struct prog_src_register src_undef( void ) 94{ 95 return src_reg(PROGRAM_UNDEFINED, 0); 96} 97 98static GLboolean src_is_undef(struct prog_src_register src) 99{ 100 return src.File == PROGRAM_UNDEFINED; 101} 102 103static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) 104{ 105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); 106 return reg; 107} 108 109static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) 110{ 111 return src_swizzle(reg, x, x, x, x); 112} 113 114 115/*********************************************************************** 116 * Dest regs 117 */ 118 119static struct prog_dst_register dst_reg(GLuint file, GLuint idx) 120{ 121 struct prog_dst_register reg; 122 reg.File = file; 123 reg.Index = idx; 124 reg.WriteMask = WRITEMASK_XYZW; 125 reg.RelAddr = 0; 126 reg.CondMask = 0; 127 reg.CondSwizzle = 0; 128 reg.CondSrc = 0; 129 reg.pad = 0; 130 return reg; 131} 132 133static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) 134{ 135 reg.WriteMask &= mask; 136 return reg; 137} 138 139static struct prog_dst_register dst_undef( void ) 140{ 141 return dst_reg(PROGRAM_UNDEFINED, 0); 142} 143 144 145 146static struct prog_dst_register get_temp( struct brw_wm_compile *c ) 147{ 148 int bit = _mesa_ffs( ~c->fp_temp ); 149 150 if (!bit) { 151 _mesa_printf("%s: out of temporaries\n", __FILE__); 152 exit(1); 153 } 154 155 c->fp_temp |= 1<<(bit-1); 156 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); 157} 158 159 160static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) 161{ 162 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); 163} 164 165 166/*********************************************************************** 167 * Instructions 168 */ 169 170static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) 171{ 172 return &c->prog_instructions[c->nr_fp_insns++]; 173} 174 175static struct prog_instruction *emit_insn(struct brw_wm_compile *c, 176 const struct prog_instruction *inst0) 177{ 178 struct prog_instruction *inst = get_fp_inst(c); 179 *inst = *inst0; 180 inst->Data = (void *)inst0; 181 return inst; 182} 183 184static struct prog_instruction * emit_op(struct brw_wm_compile *c, 185 GLuint op, 186 struct prog_dst_register dest, 187 GLuint saturate, 188 GLuint tex_src_unit, 189 GLuint tex_src_target, 190 struct prog_src_register src0, 191 struct prog_src_register src1, 192 struct prog_src_register src2 ) 193{ 194 struct prog_instruction *inst = get_fp_inst(c); 195 196 memset(inst, 0, sizeof(*inst)); 197 198 inst->Opcode = op; 199 inst->DstReg = dest; 200 inst->SaturateMode = saturate; 201 inst->TexSrcUnit = tex_src_unit; 202 inst->TexSrcTarget = tex_src_target; 203 inst->SrcReg[0] = src0; 204 inst->SrcReg[1] = src1; 205 inst->SrcReg[2] = src2; 206 return inst; 207} 208 209 210 211 212/*********************************************************************** 213 * Special instructions for interpolation and other tasks 214 */ 215 216static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) 217{ 218 if (src_is_undef(c->pixel_xy)) { 219 struct prog_dst_register pixel_xy = get_temp(c); 220 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 221 222 223 /* Emit the out calculations, and hold onto the results. Use 224 * two instructions as a temporary is required. 225 */ 226 /* pixel_xy.xy = PIXELXY payload[0]; 227 */ 228 emit_op(c, 229 WM_PIXELXY, 230 dst_mask(pixel_xy, WRITEMASK_XY), 231 0, 0, 0, 232 payload_r0_depth, 233 src_undef(), 234 src_undef()); 235 236 c->pixel_xy = src_reg_from_dst(pixel_xy); 237 } 238 239 return c->pixel_xy; 240} 241 242static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) 243{ 244 if (src_is_undef(c->delta_xy)) { 245 struct prog_dst_register delta_xy = get_temp(c); 246 struct prog_src_register pixel_xy = get_pixel_xy(c); 247 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 248 249 /* deltas.xy = DELTAXY pixel_xy, payload[0] 250 */ 251 emit_op(c, 252 WM_DELTAXY, 253 dst_mask(delta_xy, WRITEMASK_XY), 254 0, 0, 0, 255 pixel_xy, 256 payload_r0_depth, 257 src_undef()); 258 259 c->delta_xy = src_reg_from_dst(delta_xy); 260 } 261 262 return c->delta_xy; 263} 264 265static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) 266{ 267 if (src_is_undef(c->pixel_w)) { 268 struct prog_dst_register pixel_w = get_temp(c); 269 struct prog_src_register deltas = get_delta_xy(c); 270 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); 271 272 273 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x 274 */ 275 emit_op(c, 276 WM_PIXELW, 277 dst_mask(pixel_w, WRITEMASK_W), 278 0, 0, 0, 279 interp_wpos, 280 deltas, 281 src_undef()); 282 283 284 c->pixel_w = src_reg_from_dst(pixel_w); 285 } 286 287 return c->pixel_w; 288} 289 290static void emit_interp( struct brw_wm_compile *c, 291 GLuint idx ) 292{ 293 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); 294 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); 295 struct prog_src_register deltas = get_delta_xy(c); 296 struct prog_src_register arg2; 297 GLuint opcode; 298 299 /* Need to use PINTERP on attributes which have been 300 * multiplied by 1/W in the SF program, and LINTERP on those 301 * which have not: 302 */ 303 switch (idx) { 304 case FRAG_ATTRIB_WPOS: 305 opcode = WM_LINTERP; 306 arg2 = src_undef(); 307 308 /* Have to treat wpos.xy specially: 309 */ 310 emit_op(c, 311 WM_WPOSXY, 312 dst_mask(dst, WRITEMASK_XY), 313 0, 0, 0, 314 get_pixel_xy(c), 315 src_undef(), 316 src_undef()); 317 318 dst = dst_mask(dst, WRITEMASK_ZW); 319 320 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw 321 */ 322 emit_op(c, 323 WM_LINTERP, 324 dst, 325 0, 0, 0, 326 interp, 327 deltas, 328 arg2); 329 break; 330 case FRAG_ATTRIB_COL0: 331 case FRAG_ATTRIB_COL1: 332 if (c->key.flat_shade) { 333 emit_op(c, 334 WM_CINTERP, 335 dst, 336 0, 0, 0, 337 interp, 338 src_undef(), 339 src_undef()); 340 } 341 else { 342 emit_op(c, 343 WM_LINTERP, 344 dst, 345 0, 0, 0, 346 interp, 347 deltas, 348 src_undef()); 349 } 350 break; 351 default: 352 emit_op(c, 353 WM_PINTERP, 354 dst, 355 0, 0, 0, 356 interp, 357 deltas, 358 get_pixel_w(c)); 359 break; 360 } 361 362 c->fp_interp_emitted |= 1<<idx; 363} 364 365static void emit_ddx( struct brw_wm_compile *c, 366 const struct prog_instruction *inst ) 367{ 368 GLuint idx = inst->SrcReg[0].Index; 369 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); 370 371 c->fp_deriv_emitted |= 1<<idx; 372 emit_op(c, 373 OPCODE_DDX, 374 inst->DstReg, 375 0, 0, 0, 376 interp, 377 get_pixel_w(c), 378 src_undef()); 379} 380 381static void emit_ddy( struct brw_wm_compile *c, 382 const struct prog_instruction *inst ) 383{ 384 GLuint idx = inst->SrcReg[0].Index; 385 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); 386 387 c->fp_deriv_emitted |= 1<<idx; 388 emit_op(c, 389 OPCODE_DDY, 390 inst->DstReg, 391 0, 0, 0, 392 interp, 393 get_pixel_w(c), 394 src_undef()); 395} 396 397/*********************************************************************** 398 * Hacks to extend the program parameter and constant lists. 399 */ 400 401/* Add the fog parameters to the parameter list of the original 402 * program, rather than creating a new list. Doesn't really do any 403 * harm and it's not as if the parameter handling isn't a big hack 404 * anyway. 405 */ 406static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 407 GLint s0, 408 GLint s1, 409 GLint s2, 410 GLint s3, 411 GLint s4) 412{ 413 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; 414 gl_state_index tokens[STATE_LENGTH]; 415 GLuint idx; 416 tokens[0] = s0; 417 tokens[1] = s1; 418 tokens[2] = s2; 419 tokens[3] = s3; 420 tokens[4] = s4; 421 422 for (idx = 0; idx < paramList->NumParameters; idx++) { 423 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR && 424 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0) 425 return src_reg(PROGRAM_STATE_VAR, idx); 426 } 427 428 idx = _mesa_add_state_reference( paramList, tokens ); 429 430 return src_reg(PROGRAM_STATE_VAR, idx); 431} 432 433 434static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 435 GLfloat s0, 436 GLfloat s1, 437 GLfloat s2, 438 GLfloat s3) 439{ 440 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; 441 GLfloat values[4]; 442 GLuint idx; 443 GLuint swizzle; 444 445 values[0] = s0; 446 values[1] = s1; 447 values[2] = s2; 448 values[3] = s3; 449 450 /* Have to search, otherwise multiple compilations will each grow 451 * the parameter list. 452 */ 453 for (idx = 0; idx < paramList->NumParameters; idx++) { 454 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT && 455 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0) 456 457 /* XXX: this mimics the mesa bug which puts all constants and 458 * parameters into the "PROGRAM_STATE_VAR" category: 459 */ 460 return src_reg(PROGRAM_STATE_VAR, idx); 461 } 462 463 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); 464 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */ 465 return src_reg(PROGRAM_STATE_VAR, idx); 466} 467 468 469 470/*********************************************************************** 471 * Expand various instructions here to simpler forms. 472 */ 473static void precalc_dst( struct brw_wm_compile *c, 474 const struct prog_instruction *inst ) 475{ 476 struct prog_src_register src0 = inst->SrcReg[0]; 477 struct prog_src_register src1 = inst->SrcReg[1]; 478 struct prog_dst_register dst = inst->DstReg; 479 480 if (dst.WriteMask & WRITEMASK_Y) { 481 /* dst.y = mul src0.y, src1.y 482 */ 483 emit_op(c, 484 OPCODE_MUL, 485 dst_mask(dst, WRITEMASK_Y), 486 inst->SaturateMode, 0, 0, 487 src0, 488 src1, 489 src_undef()); 490 } 491 492 493 if (dst.WriteMask & WRITEMASK_XZ) { 494 struct prog_instruction *swz; 495 GLuint z = GET_SWZ(src0.Swizzle, Z); 496 497 /* dst.xz = swz src0.1zzz 498 */ 499 swz = emit_op(c, 500 OPCODE_SWZ, 501 dst_mask(dst, WRITEMASK_XZ), 502 inst->SaturateMode, 0, 0, 503 src_swizzle(src0, SWIZZLE_ONE, z, z, z), 504 src_undef(), 505 src_undef()); 506 /* Avoid letting negation flag of src0 affect our 1 constant. */ 507 swz->SrcReg[0].NegateBase &= ~NEGATE_X; 508 } 509 if (dst.WriteMask & WRITEMASK_W) { 510 /* dst.w = mov src1.w 511 */ 512 emit_op(c, 513 OPCODE_MOV, 514 dst_mask(dst, WRITEMASK_W), 515 inst->SaturateMode, 0, 0, 516 src1, 517 src_undef(), 518 src_undef()); 519 } 520} 521 522 523static void precalc_lit( struct brw_wm_compile *c, 524 const struct prog_instruction *inst ) 525{ 526 struct prog_src_register src0 = inst->SrcReg[0]; 527 struct prog_dst_register dst = inst->DstReg; 528 529 if (dst.WriteMask & WRITEMASK_XW) { 530 struct prog_instruction *swz; 531 532 /* dst.xw = swz src0.1111 533 */ 534 swz = emit_op(c, 535 OPCODE_SWZ, 536 dst_mask(dst, WRITEMASK_XW), 537 0, 0, 0, 538 src_swizzle1(src0, SWIZZLE_ONE), 539 src_undef(), 540 src_undef()); 541 /* Avoid letting the negation flag of src0 affect our 1 constant. */ 542 swz->SrcReg[0].NegateBase = 0; 543 } 544 545 546 if (dst.WriteMask & WRITEMASK_YZ) { 547 emit_op(c, 548 OPCODE_LIT, 549 dst_mask(dst, WRITEMASK_YZ), 550 inst->SaturateMode, 0, 0, 551 src0, 552 src_undef(), 553 src_undef()); 554 } 555} 556 557 558/** 559 * Some TEX instructions require extra code, cube map coordinate 560 * normalization, or coordinate scaling for RECT textures, etc. 561 * This function emits those extra instructions and the TEX 562 * instruction itself. 563 */ 564static void precalc_tex( struct brw_wm_compile *c, 565 const struct prog_instruction *inst ) 566{ 567 struct prog_src_register coord; 568 struct prog_dst_register tmpcoord; 569 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; 570 571 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { 572 struct prog_instruction *out; 573 struct prog_dst_register tmp0 = get_temp(c); 574 struct prog_src_register tmp0src = src_reg_from_dst(tmp0); 575 struct prog_dst_register tmp1 = get_temp(c); 576 struct prog_src_register tmp1src = src_reg_from_dst(tmp1); 577 struct prog_src_register src0 = inst->SrcReg[0]; 578 579 /* find longest component of coord vector and normalize it */ 580 tmpcoord = get_temp(c); 581 coord = src_reg_from_dst(tmpcoord); 582 583 /* tmpcoord = src0 (i.e.: coord = src0) */ 584 out = emit_op(c, OPCODE_MOV, 585 tmpcoord, 586 0, 0, 0, 587 src0, 588 src_undef(), 589 src_undef()); 590 out->SrcReg[0].NegateBase = 0; 591 out->SrcReg[0].Abs = 1; 592 593 /* tmp0 = MAX(coord.X, coord.Y) */ 594 emit_op(c, OPCODE_MAX, 595 tmp0, 596 0, 0, 0, 597 src_swizzle1(coord, X), 598 src_swizzle1(coord, Y), 599 src_undef()); 600 601 /* tmp1 = MAX(tmp0, coord.Z) */ 602 emit_op(c, OPCODE_MAX, 603 tmp1, 604 0, 0, 0, 605 tmp0src, 606 src_swizzle1(coord, Z), 607 src_undef()); 608 609 /* tmp0 = 1 / tmp1 */ 610 emit_op(c, OPCODE_RCP, 611 tmp0, 612 0, 0, 0, 613 tmp1src, 614 src_undef(), 615 src_undef()); 616 617 /* tmpCoord = src0 * tmp0 */ 618 emit_op(c, OPCODE_MUL, 619 tmpcoord, 620 0, 0, 0, 621 src0, 622 tmp0src, 623 src_undef()); 624 625 release_temp(c, tmp0); 626 release_temp(c, tmp1); 627 } 628 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { 629 struct prog_src_register scale = 630 search_or_add_param5( c, 631 STATE_INTERNAL, 632 STATE_TEXRECT_SCALE, 633 unit, 634 0,0 ); 635 636 tmpcoord = get_temp(c); 637 638 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } 639 */ 640 emit_op(c, 641 OPCODE_MUL, 642 tmpcoord, 643 0, 0, 0, 644 inst->SrcReg[0], 645 scale, 646 src_undef()); 647 648 coord = src_reg_from_dst(tmpcoord); 649 } 650 else { 651 coord = inst->SrcReg[0]; 652 } 653 654 /* Need to emit YUV texture conversions by hand. Probably need to 655 * do this here - the alternative is in brw_wm_emit.c, but the 656 * conversion requires allocating a temporary variable which we 657 * don't have the facility to do that late in the compilation. 658 */ 659 if (c->key.yuvtex_mask & (1 << unit)) { 660 /* convert ycbcr to RGBA */ 661 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); 662 663 /* 664 CONST C0 = { -.5, -.0625, -.5, 1.164 } 665 CONST C1 = { 1.596, -0.813, 2.018, -.391 } 666 UYV = TEX ... 667 UYV.xyz = ADD UYV, C0 668 UYV.y = MUL UYV.y, C0.w 669 if (UV swaped) 670 RGB.xyz = MAD UYV.zzx, C1, UYV.y 671 else 672 RGB.xyz = MAD UYV.xxz, C1, UYV.y 673 RGB.y = MAD UYV.z, C1.w, RGB.y 674 */ 675 struct prog_dst_register dst = inst->DstReg; 676 struct prog_dst_register tmp = get_temp(c); 677 struct prog_src_register tmpsrc = src_reg_from_dst(tmp); 678 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); 679 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); 680 681 /* tmp = TEX ... 682 */ 683 emit_op(c, 684 OPCODE_TEX, 685 tmp, 686 inst->SaturateMode, 687 unit, 688 inst->TexSrcTarget, 689 coord, 690 src_undef(), 691 src_undef()); 692 693 /* tmp.xyz = ADD TMP, C0 694 */ 695 emit_op(c, 696 OPCODE_ADD, 697 dst_mask(tmp, WRITEMASK_XYZ), 698 0, 0, 0, 699 tmpsrc, 700 C0, 701 src_undef()); 702 703 /* YUV.y = MUL YUV.y, C0.w 704 */ 705 706 emit_op(c, 707 OPCODE_MUL, 708 dst_mask(tmp, WRITEMASK_Y), 709 0, 0, 0, 710 tmpsrc, 711 src_swizzle1(C0, W), 712 src_undef()); 713 714 /* 715 * if (UV swaped) 716 * RGB.xyz = MAD YUV.zzx, C1, YUV.y 717 * else 718 * RGB.xyz = MAD YUV.xxz, C1, YUV.y 719 */ 720 721 emit_op(c, 722 OPCODE_MAD, 723 dst_mask(dst, WRITEMASK_XYZ), 724 0, 0, 0, 725 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), 726 C1, 727 src_swizzle1(tmpsrc, Y)); 728 729 /* RGB.y = MAD YUV.z, C1.w, RGB.y 730 */ 731 emit_op(c, 732 OPCODE_MAD, 733 dst_mask(dst, WRITEMASK_Y), 734 0, 0, 0, 735 src_swizzle1(tmpsrc, Z), 736 src_swizzle1(C1, W), 737 src_swizzle1(src_reg_from_dst(dst), Y)); 738 739 release_temp(c, tmp); 740 } 741 else { 742 /* ordinary RGBA tex instruction */ 743 emit_op(c, 744 OPCODE_TEX, 745 inst->DstReg, 746 inst->SaturateMode, 747 unit, 748 inst->TexSrcTarget, 749 coord, 750 src_undef(), 751 src_undef()); 752 } 753 754 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || 755 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) 756 release_temp(c, tmpcoord); 757} 758 759 760static GLboolean projtex( struct brw_wm_compile *c, 761 const struct prog_instruction *inst ) 762{ 763 struct prog_src_register src = inst->SrcReg[0]; 764 765 /* Only try to detect the simplest cases. Could detect (later) 766 * cases where we are trying to emit code like RCP {1.0}, MUL x, 767 * {1.0}, and so on. 768 * 769 * More complex cases than this typically only arise from 770 * user-provided fragment programs anyway: 771 */ 772 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) 773 return 0; /* ut2004 gun rendering !?! */ 774 else if (src.File == PROGRAM_INPUT && 775 GET_SWZ(src.Swizzle, W) == W && 776 (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0) 777 return 0; 778 else 779 return 1; 780} 781 782 783static void precalc_txp( struct brw_wm_compile *c, 784 const struct prog_instruction *inst ) 785{ 786 struct prog_src_register src0 = inst->SrcReg[0]; 787 788 if (projtex(c, inst)) { 789 struct prog_dst_register tmp = get_temp(c); 790 struct prog_instruction tmp_inst; 791 792 /* tmp0.w = RCP inst.arg[0][3] 793 */ 794 emit_op(c, 795 OPCODE_RCP, 796 dst_mask(tmp, WRITEMASK_W), 797 0, 0, 0, 798 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), 799 src_undef(), 800 src_undef()); 801 802 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww 803 */ 804 emit_op(c, 805 OPCODE_MUL, 806 dst_mask(tmp, WRITEMASK_XYZ), 807 0, 0, 0, 808 src0, 809 src_swizzle1(src_reg_from_dst(tmp), W), 810 src_undef()); 811 812 /* dst = precalc(TEX tmp0) 813 */ 814 tmp_inst = *inst; 815 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); 816 precalc_tex(c, &tmp_inst); 817 818 release_temp(c, tmp); 819 } 820 else 821 { 822 /* dst = precalc(TEX src0) 823 */ 824 precalc_tex(c, inst); 825 } 826} 827 828 829 830static void emit_fb_write( struct brw_wm_compile *c ) 831{ 832 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 833 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); 834 struct prog_src_register outcolor; 835 GLuint i; 836 837 struct prog_instruction *inst, *last_inst; 838 struct brw_context *brw = c->func.brw; 839 840 /* inst->Sampler is not used by backend, 841 use it for fb write target and eot */ 842 843 if (brw->state.nr_draw_regions > 1) { 844 for (i = 0 ; i < brw->state.nr_draw_regions; i++) { 845 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); 846 last_inst = inst = emit_op(c, 847 WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0, 848 outcolor, payload_r0_depth, outdepth); 849 inst->Sampler = (i<<1); 850 if (c->fp_fragcolor_emitted) { 851 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); 852 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), 853 0, 0, 0, outcolor, payload_r0_depth, outdepth); 854 inst->Sampler = (i<<1); 855 } 856 } 857 last_inst->Sampler |= 1; //eot 858 } 859 else { 860 /* if gl_FragData[0] is written, use it, else use gl_FragColor */ 861 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) 862 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); 863 else 864 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); 865 866 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), 867 0, 0, 0, outcolor, payload_r0_depth, outdepth); 868 inst->Sampler = 1|(0<<1); 869 } 870} 871 872 873 874 875/*********************************************************************** 876 * Emit INTERP instructions ahead of first use of each attrib. 877 */ 878 879static void validate_src_regs( struct brw_wm_compile *c, 880 const struct prog_instruction *inst ) 881{ 882 GLuint nr_args = brw_wm_nr_args( inst->Opcode ); 883 GLuint i; 884 885 for (i = 0; i < nr_args; i++) { 886 if (inst->SrcReg[i].File == PROGRAM_INPUT) { 887 GLuint idx = inst->SrcReg[i].Index; 888 if (!(c->fp_interp_emitted & (1<<idx))) { 889 emit_interp(c, idx); 890 } 891 } 892 } 893} 894 895static void validate_dst_regs( struct brw_wm_compile *c, 896 const struct prog_instruction *inst ) 897{ 898 if (inst->DstReg.File == PROGRAM_OUTPUT) { 899 GLuint idx = inst->DstReg.Index; 900 if (idx == FRAG_RESULT_COLR) 901 c->fp_fragcolor_emitted = 1; 902 } 903} 904 905static void print_insns( const struct prog_instruction *insn, 906 GLuint nr ) 907{ 908 GLuint i; 909 for (i = 0; i < nr; i++, insn++) { 910 _mesa_printf("%3d: ", i); 911 if (insn->Opcode < MAX_OPCODE) 912 _mesa_print_instruction(insn); 913 else if (insn->Opcode < MAX_WM_OPCODE) { 914 GLuint idx = insn->Opcode - MAX_OPCODE; 915 916 _mesa_print_alu_instruction(insn, 917 wm_opcode_strings[idx], 918 3); 919 } 920 else 921 _mesa_printf("UNKNOWN\n"); 922 923 } 924} 925 926void brw_wm_pass_fp( struct brw_wm_compile *c ) 927{ 928 struct brw_fragment_program *fp = c->fp; 929 GLuint insn; 930 931 if (INTEL_DEBUG & DEBUG_WM) { 932 _mesa_printf("pre-fp:\n"); 933 _mesa_print_program(&fp->program.Base); 934 _mesa_printf("\n"); 935 } 936 937 c->pixel_xy = src_undef(); 938 c->delta_xy = src_undef(); 939 c->pixel_w = src_undef(); 940 c->nr_fp_insns = 0; 941 942 /* Emit preamble instructions: 943 */ 944 945 946 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { 947 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; 948 validate_src_regs(c, inst); 949 validate_dst_regs(c, inst); 950 } 951 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { 952 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; 953 struct prog_instruction *out; 954 955 /* Check for INPUT values, emit INTERP instructions where 956 * necessary: 957 */ 958 959 960 switch (inst->Opcode) { 961 case OPCODE_SWZ: 962 out = emit_insn(c, inst); 963 out->Opcode = OPCODE_MOV; 964 break; 965 966 case OPCODE_ABS: 967 out = emit_insn(c, inst); 968 out->Opcode = OPCODE_MOV; 969 out->SrcReg[0].NegateBase = 0; 970 out->SrcReg[0].Abs = 1; 971 break; 972 973 case OPCODE_SUB: 974 out = emit_insn(c, inst); 975 out->Opcode = OPCODE_ADD; 976 out->SrcReg[1].NegateBase ^= 0xf; 977 break; 978 979 case OPCODE_SCS: 980 out = emit_insn(c, inst); 981 /* This should probably be done in the parser. 982 */ 983 out->DstReg.WriteMask &= WRITEMASK_XY; 984 break; 985 986 case OPCODE_DST: 987 precalc_dst(c, inst); 988 break; 989 990 case OPCODE_LIT: 991 precalc_lit(c, inst); 992 break; 993 994 case OPCODE_TEX: 995 precalc_tex(c, inst); 996 break; 997 998 case OPCODE_TXP: 999 precalc_txp(c, inst); 1000 break; 1001 1002 case OPCODE_TXB: 1003 out = emit_insn(c, inst); 1004 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; 1005 break; 1006 1007 case OPCODE_XPD: 1008 out = emit_insn(c, inst); 1009 /* This should probably be done in the parser. 1010 */ 1011 out->DstReg.WriteMask &= WRITEMASK_XYZ; 1012 break; 1013 1014 case OPCODE_KIL: 1015 out = emit_insn(c, inst); 1016 /* This should probably be done in the parser. 1017 */ 1018 out->DstReg.WriteMask = 0; 1019 break; 1020 case OPCODE_DDX: 1021 emit_ddx(c, inst); 1022 break; 1023 case OPCODE_DDY: 1024 emit_ddy(c, inst); 1025 break; 1026 case OPCODE_END: 1027 emit_fb_write(c); 1028 break; 1029 case OPCODE_PRINT: 1030 break; 1031 1032 default: 1033 emit_insn(c, inst); 1034 break; 1035 } 1036 } 1037 1038 if (INTEL_DEBUG & DEBUG_WM) { 1039 _mesa_printf("pass_fp:\n"); 1040 print_insns( c->prog_instructions, c->nr_fp_insns ); 1041 _mesa_printf("\n"); 1042 } 1043} 1044 1045