brw_wm_fp.c revision cfa927766ab610a9a76730d337d77008d876ebbd
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "main/glheader.h" 34#include "main/macros.h" 35#include "main/enums.h" 36#include "brw_context.h" 37#include "brw_wm.h" 38#include "brw_util.h" 39 40#include "shader/prog_parameter.h" 41#include "shader/prog_print.h" 42#include "shader/prog_statevars.h" 43 44 45/** An invalid texture target */ 46#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS 47 48/** An invalid texture unit */ 49#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT 50 51#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS 52 53#define X 0 54#define Y 1 55#define Z 2 56#define W 3 57 58 59static const char *wm_opcode_strings[] = { 60 "PIXELXY", 61 "DELTAXY", 62 "PIXELW", 63 "LINTERP", 64 "PINTERP", 65 "CINTERP", 66 "WPOSXY", 67 "FB_WRITE", 68 "FRONTFACING", 69}; 70 71#if 0 72static const char *wm_file_strings[] = { 73 "PAYLOAD" 74}; 75#endif 76 77 78/*********************************************************************** 79 * Source regs 80 */ 81 82static struct prog_src_register src_reg(GLuint file, GLuint idx) 83{ 84 struct prog_src_register reg; 85 reg.File = file; 86 reg.Index = idx; 87 reg.Swizzle = SWIZZLE_NOOP; 88 reg.RelAddr = 0; 89 reg.Negate = NEGATE_NONE; 90 reg.Abs = 0; 91 return reg; 92} 93 94static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) 95{ 96 return src_reg(dst.File, dst.Index); 97} 98 99static struct prog_src_register src_undef( void ) 100{ 101 return src_reg(PROGRAM_UNDEFINED, 0); 102} 103 104static GLboolean src_is_undef(struct prog_src_register src) 105{ 106 return src.File == PROGRAM_UNDEFINED; 107} 108 109static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) 110{ 111 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); 112 return reg; 113} 114 115static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) 116{ 117 return src_swizzle(reg, x, x, x, x); 118} 119 120static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) 121{ 122 reg.Swizzle = swizzle; 123 return reg; 124} 125 126 127/*********************************************************************** 128 * Dest regs 129 */ 130 131static struct prog_dst_register dst_reg(GLuint file, GLuint idx) 132{ 133 struct prog_dst_register reg; 134 reg.File = file; 135 reg.Index = idx; 136 reg.WriteMask = WRITEMASK_XYZW; 137 reg.RelAddr = 0; 138 reg.CondMask = COND_TR; 139 reg.CondSwizzle = 0; 140 reg.CondSrc = 0; 141 reg.pad = 0; 142 return reg; 143} 144 145static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) 146{ 147 reg.WriteMask &= mask; 148 return reg; 149} 150 151static struct prog_dst_register dst_undef( void ) 152{ 153 return dst_reg(PROGRAM_UNDEFINED, 0); 154} 155 156 157 158static struct prog_dst_register get_temp( struct brw_wm_compile *c ) 159{ 160 int bit = _mesa_ffs( ~c->fp_temp ); 161 162 if (!bit) { 163 _mesa_printf("%s: out of temporaries\n", __FILE__); 164 exit(1); 165 } 166 167 c->fp_temp |= 1<<(bit-1); 168 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); 169} 170 171 172static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) 173{ 174 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); 175} 176 177 178/*********************************************************************** 179 * Instructions 180 */ 181 182static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) 183{ 184 assert(c->nr_fp_insns < BRW_WM_MAX_INSN); 185 return &c->prog_instructions[c->nr_fp_insns++]; 186} 187 188static struct prog_instruction *emit_insn(struct brw_wm_compile *c, 189 const struct prog_instruction *inst0) 190{ 191 struct prog_instruction *inst = get_fp_inst(c); 192 *inst = *inst0; 193 return inst; 194} 195 196static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, 197 GLuint op, 198 struct prog_dst_register dest, 199 GLuint saturate, 200 GLuint tex_src_unit, 201 GLuint tex_src_target, 202 GLuint tex_shadow, 203 struct prog_src_register src0, 204 struct prog_src_register src1, 205 struct prog_src_register src2 ) 206{ 207 struct prog_instruction *inst = get_fp_inst(c); 208 209 assert(tex_src_unit < BRW_MAX_TEX_UNIT || 210 tex_src_unit == TEX_UNIT_NONE); 211 assert(tex_src_target < NUM_TEXTURE_TARGETS || 212 tex_src_target == TEX_TARGET_NONE); 213 214 /* update mask of which texture units are referenced by this program */ 215 if (tex_src_unit != TEX_UNIT_NONE) 216 c->fp->tex_units_used |= (1 << tex_src_unit); 217 218 memset(inst, 0, sizeof(*inst)); 219 220 inst->Opcode = op; 221 inst->DstReg = dest; 222 inst->SaturateMode = saturate; 223 inst->TexSrcUnit = tex_src_unit; 224 inst->TexSrcTarget = tex_src_target; 225 inst->TexShadow = tex_shadow; 226 inst->SrcReg[0] = src0; 227 inst->SrcReg[1] = src1; 228 inst->SrcReg[2] = src2; 229 return inst; 230} 231 232 233static struct prog_instruction * emit_op(struct brw_wm_compile *c, 234 GLuint op, 235 struct prog_dst_register dest, 236 GLuint saturate, 237 struct prog_src_register src0, 238 struct prog_src_register src1, 239 struct prog_src_register src2 ) 240{ 241 return emit_tex_op(c, op, dest, saturate, 242 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ 243 src0, src1, src2); 244} 245 246 247/* Many Mesa opcodes produce the same value across all the result channels. 248 * We'd rather not have to support that splatting in the opcode implementations, 249 * and brw_wm_pass*.c wants to optimize them out by shuffling references around 250 * anyway. We can easily get both by emitting the opcode to one channel, and 251 * then MOVing it to the others, which brw_wm_pass*.c already understands. 252 */ 253static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, 254 const struct prog_instruction *inst0) 255{ 256 struct prog_instruction *inst; 257 unsigned int dst_chan; 258 unsigned int other_channel_mask; 259 260 if (inst0->DstReg.WriteMask == 0) 261 return NULL; 262 263 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; 264 inst = get_fp_inst(c); 265 *inst = *inst0; 266 inst->DstReg.WriteMask = 1 << dst_chan; 267 268 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); 269 if (other_channel_mask != 0) { 270 inst = emit_op(c, 271 OPCODE_MOV, 272 dst_mask(inst0->DstReg, other_channel_mask), 273 0, 274 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), 275 src_undef(), 276 src_undef()); 277 } 278 return inst; 279} 280 281 282/*********************************************************************** 283 * Special instructions for interpolation and other tasks 284 */ 285 286static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) 287{ 288 if (src_is_undef(c->pixel_xy)) { 289 struct prog_dst_register pixel_xy = get_temp(c); 290 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 291 292 293 /* Emit the out calculations, and hold onto the results. Use 294 * two instructions as a temporary is required. 295 */ 296 /* pixel_xy.xy = PIXELXY payload[0]; 297 */ 298 emit_op(c, 299 WM_PIXELXY, 300 dst_mask(pixel_xy, WRITEMASK_XY), 301 0, 302 payload_r0_depth, 303 src_undef(), 304 src_undef()); 305 306 c->pixel_xy = src_reg_from_dst(pixel_xy); 307 } 308 309 return c->pixel_xy; 310} 311 312static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) 313{ 314 if (src_is_undef(c->delta_xy)) { 315 struct prog_dst_register delta_xy = get_temp(c); 316 struct prog_src_register pixel_xy = get_pixel_xy(c); 317 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 318 319 /* deltas.xy = DELTAXY pixel_xy, payload[0] 320 */ 321 emit_op(c, 322 WM_DELTAXY, 323 dst_mask(delta_xy, WRITEMASK_XY), 324 0, 325 pixel_xy, 326 payload_r0_depth, 327 src_undef()); 328 329 c->delta_xy = src_reg_from_dst(delta_xy); 330 } 331 332 return c->delta_xy; 333} 334 335static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) 336{ 337 if (src_is_undef(c->pixel_w)) { 338 struct prog_dst_register pixel_w = get_temp(c); 339 struct prog_src_register deltas = get_delta_xy(c); 340 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); 341 342 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x 343 */ 344 emit_op(c, 345 WM_PIXELW, 346 dst_mask(pixel_w, WRITEMASK_W), 347 0, 348 interp_wpos, 349 deltas, 350 src_undef()); 351 352 353 c->pixel_w = src_reg_from_dst(pixel_w); 354 } 355 356 return c->pixel_w; 357} 358 359static void emit_interp( struct brw_wm_compile *c, 360 GLuint idx ) 361{ 362 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); 363 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); 364 struct prog_src_register deltas = get_delta_xy(c); 365 366 /* Need to use PINTERP on attributes which have been 367 * multiplied by 1/W in the SF program, and LINTERP on those 368 * which have not: 369 */ 370 switch (idx) { 371 case FRAG_ATTRIB_WPOS: 372 /* Have to treat wpos.xy specially: 373 */ 374 emit_op(c, 375 WM_WPOSXY, 376 dst_mask(dst, WRITEMASK_XY), 377 0, 378 get_pixel_xy(c), 379 src_undef(), 380 src_undef()); 381 382 dst = dst_mask(dst, WRITEMASK_ZW); 383 384 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw 385 */ 386 emit_op(c, 387 WM_LINTERP, 388 dst, 389 0, 390 interp, 391 deltas, 392 src_undef()); 393 break; 394 case FRAG_ATTRIB_COL0: 395 case FRAG_ATTRIB_COL1: 396 if (c->key.flat_shade) { 397 emit_op(c, 398 WM_CINTERP, 399 dst, 400 0, 401 interp, 402 src_undef(), 403 src_undef()); 404 } 405 else { 406 if (c->key.linear_color) { 407 emit_op(c, 408 WM_LINTERP, 409 dst, 410 0, 411 interp, 412 deltas, 413 src_undef()); 414 } 415 else { 416 /* perspective-corrected color interpolation */ 417 emit_op(c, 418 WM_PINTERP, 419 dst, 420 0, 421 interp, 422 deltas, 423 get_pixel_w(c)); 424 } 425 } 426 break; 427 case FRAG_ATTRIB_FOGC: 428 /* Interpolate the fog coordinate */ 429 emit_op(c, 430 WM_PINTERP, 431 dst_mask(dst, WRITEMASK_X), 432 0, 433 interp, 434 deltas, 435 get_pixel_w(c)); 436 437 emit_op(c, 438 OPCODE_MOV, 439 dst_mask(dst, WRITEMASK_YZW), 440 0, 441 src_swizzle(interp, 442 SWIZZLE_ZERO, 443 SWIZZLE_ZERO, 444 SWIZZLE_ZERO, 445 SWIZZLE_ONE), 446 src_undef(), 447 src_undef()); 448 break; 449 450 case FRAG_ATTRIB_FACE: 451 emit_op(c, 452 WM_FRONTFACING, 453 dst_mask(dst, WRITEMASK_X), 454 0, 455 src_undef(), 456 src_undef(), 457 src_undef()); 458 break; 459 460 case FRAG_ATTRIB_PNTC: 461 /* XXX review/test this case */ 462 emit_op(c, 463 WM_PINTERP, 464 dst_mask(dst, WRITEMASK_XY), 465 0, 466 interp, 467 deltas, 468 get_pixel_w(c)); 469 470 emit_op(c, 471 OPCODE_MOV, 472 dst_mask(dst, WRITEMASK_ZW), 473 0, 474 src_swizzle(interp, 475 SWIZZLE_ZERO, 476 SWIZZLE_ZERO, 477 SWIZZLE_ZERO, 478 SWIZZLE_ONE), 479 src_undef(), 480 src_undef()); 481 break; 482 483 default: 484 emit_op(c, 485 WM_PINTERP, 486 dst, 487 0, 488 interp, 489 deltas, 490 get_pixel_w(c)); 491 break; 492 } 493 494 c->fp_interp_emitted |= 1<<idx; 495} 496 497/*********************************************************************** 498 * Hacks to extend the program parameter and constant lists. 499 */ 500 501/* Add the fog parameters to the parameter list of the original 502 * program, rather than creating a new list. Doesn't really do any 503 * harm and it's not as if the parameter handling isn't a big hack 504 * anyway. 505 */ 506static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 507 GLint s0, 508 GLint s1, 509 GLint s2, 510 GLint s3, 511 GLint s4) 512{ 513 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; 514 gl_state_index tokens[STATE_LENGTH]; 515 GLuint idx; 516 tokens[0] = s0; 517 tokens[1] = s1; 518 tokens[2] = s2; 519 tokens[3] = s3; 520 tokens[4] = s4; 521 522 for (idx = 0; idx < paramList->NumParameters; idx++) { 523 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR && 524 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0) 525 return src_reg(PROGRAM_STATE_VAR, idx); 526 } 527 528 idx = _mesa_add_state_reference( paramList, tokens ); 529 530 return src_reg(PROGRAM_STATE_VAR, idx); 531} 532 533 534static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 535 GLfloat s0, 536 GLfloat s1, 537 GLfloat s2, 538 GLfloat s3) 539{ 540 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; 541 GLfloat values[4]; 542 GLuint idx; 543 GLuint swizzle; 544 545 values[0] = s0; 546 values[1] = s1; 547 values[2] = s2; 548 values[3] = s3; 549 550 /* Have to search, otherwise multiple compilations will each grow 551 * the parameter list. 552 */ 553 for (idx = 0; idx < paramList->NumParameters; idx++) { 554 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT && 555 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0) 556 557 /* XXX: this mimics the mesa bug which puts all constants and 558 * parameters into the "PROGRAM_STATE_VAR" category: 559 */ 560 return src_reg(PROGRAM_STATE_VAR, idx); 561 } 562 563 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); 564 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */ 565 return src_reg(PROGRAM_STATE_VAR, idx); 566} 567 568 569 570/*********************************************************************** 571 * Expand various instructions here to simpler forms. 572 */ 573static void precalc_dst( struct brw_wm_compile *c, 574 const struct prog_instruction *inst ) 575{ 576 struct prog_src_register src0 = inst->SrcReg[0]; 577 struct prog_src_register src1 = inst->SrcReg[1]; 578 struct prog_dst_register dst = inst->DstReg; 579 580 if (dst.WriteMask & WRITEMASK_Y) { 581 /* dst.y = mul src0.y, src1.y 582 */ 583 emit_op(c, 584 OPCODE_MUL, 585 dst_mask(dst, WRITEMASK_Y), 586 inst->SaturateMode, 587 src0, 588 src1, 589 src_undef()); 590 } 591 592 if (dst.WriteMask & WRITEMASK_XZ) { 593 struct prog_instruction *swz; 594 GLuint z = GET_SWZ(src0.Swizzle, Z); 595 596 /* dst.xz = swz src0.1zzz 597 */ 598 swz = emit_op(c, 599 OPCODE_SWZ, 600 dst_mask(dst, WRITEMASK_XZ), 601 inst->SaturateMode, 602 src_swizzle(src0, SWIZZLE_ONE, z, z, z), 603 src_undef(), 604 src_undef()); 605 /* Avoid letting negation flag of src0 affect our 1 constant. */ 606 swz->SrcReg[0].Negate &= ~NEGATE_X; 607 } 608 if (dst.WriteMask & WRITEMASK_W) { 609 /* dst.w = mov src1.w 610 */ 611 emit_op(c, 612 OPCODE_MOV, 613 dst_mask(dst, WRITEMASK_W), 614 inst->SaturateMode, 615 src1, 616 src_undef(), 617 src_undef()); 618 } 619} 620 621 622static void precalc_lit( struct brw_wm_compile *c, 623 const struct prog_instruction *inst ) 624{ 625 struct prog_src_register src0 = inst->SrcReg[0]; 626 struct prog_dst_register dst = inst->DstReg; 627 628 if (dst.WriteMask & WRITEMASK_XW) { 629 struct prog_instruction *swz; 630 631 /* dst.xw = swz src0.1111 632 */ 633 swz = emit_op(c, 634 OPCODE_SWZ, 635 dst_mask(dst, WRITEMASK_XW), 636 0, 637 src_swizzle1(src0, SWIZZLE_ONE), 638 src_undef(), 639 src_undef()); 640 /* Avoid letting the negation flag of src0 affect our 1 constant. */ 641 swz->SrcReg[0].Negate = NEGATE_NONE; 642 } 643 644 if (dst.WriteMask & WRITEMASK_YZ) { 645 emit_op(c, 646 OPCODE_LIT, 647 dst_mask(dst, WRITEMASK_YZ), 648 inst->SaturateMode, 649 src0, 650 src_undef(), 651 src_undef()); 652 } 653} 654 655 656/** 657 * Some TEX instructions require extra code, cube map coordinate 658 * normalization, or coordinate scaling for RECT textures, etc. 659 * This function emits those extra instructions and the TEX 660 * instruction itself. 661 */ 662static void precalc_tex( struct brw_wm_compile *c, 663 const struct prog_instruction *inst ) 664{ 665 struct prog_src_register coord; 666 struct prog_dst_register tmpcoord; 667 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; 668 669 assert(unit < BRW_MAX_TEX_UNIT); 670 671 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { 672 struct prog_instruction *out; 673 struct prog_dst_register tmp0 = get_temp(c); 674 struct prog_src_register tmp0src = src_reg_from_dst(tmp0); 675 struct prog_dst_register tmp1 = get_temp(c); 676 struct prog_src_register tmp1src = src_reg_from_dst(tmp1); 677 struct prog_src_register src0 = inst->SrcReg[0]; 678 679 /* find longest component of coord vector and normalize it */ 680 tmpcoord = get_temp(c); 681 coord = src_reg_from_dst(tmpcoord); 682 683 /* tmpcoord = src0 (i.e.: coord = src0) */ 684 out = emit_op(c, OPCODE_MOV, 685 tmpcoord, 686 0, 687 src0, 688 src_undef(), 689 src_undef()); 690 out->SrcReg[0].Negate = NEGATE_NONE; 691 out->SrcReg[0].Abs = 1; 692 693 /* tmp0 = MAX(coord.X, coord.Y) */ 694 emit_op(c, OPCODE_MAX, 695 tmp0, 696 0, 697 src_swizzle1(coord, X), 698 src_swizzle1(coord, Y), 699 src_undef()); 700 701 /* tmp1 = MAX(tmp0, coord.Z) */ 702 emit_op(c, OPCODE_MAX, 703 tmp1, 704 0, 705 tmp0src, 706 src_swizzle1(coord, Z), 707 src_undef()); 708 709 /* tmp0 = 1 / tmp1 */ 710 emit_op(c, OPCODE_RCP, 711 dst_mask(tmp0, WRITEMASK_X), 712 0, 713 tmp1src, 714 src_undef(), 715 src_undef()); 716 717 /* tmpCoord = src0 * tmp0 */ 718 emit_op(c, OPCODE_MUL, 719 tmpcoord, 720 0, 721 src0, 722 src_swizzle1(tmp0src, SWIZZLE_X), 723 src_undef()); 724 725 release_temp(c, tmp0); 726 release_temp(c, tmp1); 727 } 728 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { 729 struct prog_src_register scale = 730 search_or_add_param5( c, 731 STATE_INTERNAL, 732 STATE_TEXRECT_SCALE, 733 unit, 734 0,0 ); 735 736 tmpcoord = get_temp(c); 737 738 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } 739 */ 740 emit_op(c, 741 OPCODE_MUL, 742 tmpcoord, 743 0, 744 inst->SrcReg[0], 745 src_swizzle(scale, 746 SWIZZLE_X, 747 SWIZZLE_Y, 748 SWIZZLE_ONE, 749 SWIZZLE_ONE), 750 src_undef()); 751 752 coord = src_reg_from_dst(tmpcoord); 753 } 754 else { 755 coord = inst->SrcReg[0]; 756 } 757 758 /* Need to emit YUV texture conversions by hand. Probably need to 759 * do this here - the alternative is in brw_wm_emit.c, but the 760 * conversion requires allocating a temporary variable which we 761 * don't have the facility to do that late in the compilation. 762 */ 763 if (c->key.yuvtex_mask & (1 << unit)) { 764 /* convert ycbcr to RGBA */ 765 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); 766 767 /* 768 CONST C0 = { -.5, -.0625, -.5, 1.164 } 769 CONST C1 = { 1.596, -0.813, 2.018, -.391 } 770 UYV = TEX ... 771 UYV.xyz = ADD UYV, C0 772 UYV.y = MUL UYV.y, C0.w 773 if (UV swaped) 774 RGB.xyz = MAD UYV.zzx, C1, UYV.y 775 else 776 RGB.xyz = MAD UYV.xxz, C1, UYV.y 777 RGB.y = MAD UYV.z, C1.w, RGB.y 778 */ 779 struct prog_dst_register dst = inst->DstReg; 780 struct prog_dst_register tmp = get_temp(c); 781 struct prog_src_register tmpsrc = src_reg_from_dst(tmp); 782 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); 783 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); 784 785 /* tmp = TEX ... 786 */ 787 emit_tex_op(c, 788 OPCODE_TEX, 789 tmp, 790 inst->SaturateMode, 791 unit, 792 inst->TexSrcTarget, 793 inst->TexShadow, 794 coord, 795 src_undef(), 796 src_undef()); 797 798 /* tmp.xyz = ADD TMP, C0 799 */ 800 emit_op(c, 801 OPCODE_ADD, 802 dst_mask(tmp, WRITEMASK_XYZ), 803 0, 804 tmpsrc, 805 C0, 806 src_undef()); 807 808 /* YUV.y = MUL YUV.y, C0.w 809 */ 810 811 emit_op(c, 812 OPCODE_MUL, 813 dst_mask(tmp, WRITEMASK_Y), 814 0, 815 tmpsrc, 816 src_swizzle1(C0, W), 817 src_undef()); 818 819 /* 820 * if (UV swaped) 821 * RGB.xyz = MAD YUV.zzx, C1, YUV.y 822 * else 823 * RGB.xyz = MAD YUV.xxz, C1, YUV.y 824 */ 825 826 emit_op(c, 827 OPCODE_MAD, 828 dst_mask(dst, WRITEMASK_XYZ), 829 0, 830 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), 831 C1, 832 src_swizzle1(tmpsrc, Y)); 833 834 /* RGB.y = MAD YUV.z, C1.w, RGB.y 835 */ 836 emit_op(c, 837 OPCODE_MAD, 838 dst_mask(dst, WRITEMASK_Y), 839 0, 840 src_swizzle1(tmpsrc, Z), 841 src_swizzle1(C1, W), 842 src_swizzle1(src_reg_from_dst(dst), Y)); 843 844 release_temp(c, tmp); 845 } 846 else { 847 /* ordinary RGBA tex instruction */ 848 emit_tex_op(c, 849 OPCODE_TEX, 850 inst->DstReg, 851 inst->SaturateMode, 852 unit, 853 inst->TexSrcTarget, 854 inst->TexShadow, 855 coord, 856 src_undef(), 857 src_undef()); 858 } 859 860 /* For GL_EXT_texture_swizzle: */ 861 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { 862 /* swizzle the result of the TEX instruction */ 863 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); 864 emit_op(c, OPCODE_SWZ, 865 inst->DstReg, 866 SATURATE_OFF, /* saturate already done above */ 867 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), 868 src_undef(), 869 src_undef()); 870 } 871 872 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || 873 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) 874 release_temp(c, tmpcoord); 875} 876 877 878/** 879 * Check if the given TXP instruction really needs the divide-by-W step. 880 */ 881static GLboolean projtex( struct brw_wm_compile *c, 882 const struct prog_instruction *inst ) 883{ 884 const struct prog_src_register src = inst->SrcReg[0]; 885 GLboolean retVal; 886 887 assert(inst->Opcode == OPCODE_TXP); 888 889 /* Only try to detect the simplest cases. Could detect (later) 890 * cases where we are trying to emit code like RCP {1.0}, MUL x, 891 * {1.0}, and so on. 892 * 893 * More complex cases than this typically only arise from 894 * user-provided fragment programs anyway: 895 */ 896 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) 897 retVal = GL_FALSE; /* ut2004 gun rendering !?! */ 898 else if (src.File == PROGRAM_INPUT && 899 GET_SWZ(src.Swizzle, W) == W && 900 (c->key.proj_attrib_mask & (1 << src.Index)) == 0) 901 retVal = GL_FALSE; 902 else 903 retVal = GL_TRUE; 904 905 return retVal; 906} 907 908 909/** 910 * Emit code for TXP. 911 */ 912static void precalc_txp( struct brw_wm_compile *c, 913 const struct prog_instruction *inst ) 914{ 915 struct prog_src_register src0 = inst->SrcReg[0]; 916 917 if (projtex(c, inst)) { 918 struct prog_dst_register tmp = get_temp(c); 919 struct prog_instruction tmp_inst; 920 921 /* tmp0.w = RCP inst.arg[0][3] 922 */ 923 emit_op(c, 924 OPCODE_RCP, 925 dst_mask(tmp, WRITEMASK_W), 926 0, 927 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), 928 src_undef(), 929 src_undef()); 930 931 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww 932 */ 933 emit_op(c, 934 OPCODE_MUL, 935 dst_mask(tmp, WRITEMASK_XYZ), 936 0, 937 src0, 938 src_swizzle1(src_reg_from_dst(tmp), W), 939 src_undef()); 940 941 /* dst = precalc(TEX tmp0) 942 */ 943 tmp_inst = *inst; 944 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); 945 precalc_tex(c, &tmp_inst); 946 947 release_temp(c, tmp); 948 } 949 else 950 { 951 /* dst = precalc(TEX src0) 952 */ 953 precalc_tex(c, inst); 954 } 955} 956 957 958 959static void emit_render_target_writes( struct brw_wm_compile *c ) 960{ 961 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 962 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); 963 struct prog_src_register outcolor; 964 GLuint i; 965 966 struct prog_instruction *inst, *last_inst; 967 968 /* The inst->Aux field is used for FB write target and the EOT marker */ 969 970 if (c->key.nr_color_regions > 1) { 971 for (i = 0 ; i < c->key.nr_color_regions; i++) { 972 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); 973 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 974 0, outcolor, payload_r0_depth, outdepth); 975 inst->Aux = INST_AUX_TARGET(i); 976 if (c->fp_fragcolor_emitted) { 977 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); 978 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 979 0, outcolor, payload_r0_depth, outdepth); 980 inst->Aux = INST_AUX_TARGET(i); 981 } 982 } 983 last_inst->Aux |= INST_AUX_EOT; 984 } 985 else { 986 /* if gl_FragData[0] is written, use it, else use gl_FragColor */ 987 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) 988 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); 989 else 990 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); 991 992 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), 993 0, outcolor, payload_r0_depth, outdepth); 994 inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0); 995 } 996} 997 998 999 1000 1001/*********************************************************************** 1002 * Emit INTERP instructions ahead of first use of each attrib. 1003 */ 1004 1005static void validate_src_regs( struct brw_wm_compile *c, 1006 const struct prog_instruction *inst ) 1007{ 1008 GLuint nr_args = brw_wm_nr_args( inst->Opcode ); 1009 GLuint i; 1010 1011 for (i = 0; i < nr_args; i++) { 1012 if (inst->SrcReg[i].File == PROGRAM_INPUT) { 1013 GLuint idx = inst->SrcReg[i].Index; 1014 if (!(c->fp_interp_emitted & (1<<idx))) { 1015 emit_interp(c, idx); 1016 } 1017 } 1018 } 1019} 1020 1021static void validate_dst_regs( struct brw_wm_compile *c, 1022 const struct prog_instruction *inst ) 1023{ 1024 if (inst->DstReg.File == PROGRAM_OUTPUT) { 1025 GLuint idx = inst->DstReg.Index; 1026 if (idx == FRAG_RESULT_COLOR) 1027 c->fp_fragcolor_emitted = 1; 1028 } 1029} 1030 1031static void print_insns( const struct prog_instruction *insn, 1032 GLuint nr ) 1033{ 1034 GLuint i; 1035 for (i = 0; i < nr; i++, insn++) { 1036 _mesa_printf("%3d: ", i); 1037 if (insn->Opcode < MAX_OPCODE) 1038 _mesa_print_instruction(insn); 1039 else if (insn->Opcode < MAX_WM_OPCODE) { 1040 GLuint idx = insn->Opcode - MAX_OPCODE; 1041 1042 _mesa_print_alu_instruction(insn, 1043 wm_opcode_strings[idx], 1044 3); 1045 } 1046 else 1047 _mesa_printf("965 Opcode %d\n", insn->Opcode); 1048 } 1049} 1050 1051 1052/** 1053 * Initial pass for fragment program code generation. 1054 * This function is used by both the GLSL and non-GLSL paths. 1055 */ 1056void brw_wm_pass_fp( struct brw_wm_compile *c ) 1057{ 1058 struct brw_fragment_program *fp = c->fp; 1059 GLuint insn; 1060 1061 if (INTEL_DEBUG & DEBUG_WM) { 1062 _mesa_printf("pre-fp:\n"); 1063 _mesa_print_program(&fp->program.Base); 1064 _mesa_printf("\n"); 1065 } 1066 1067 c->pixel_xy = src_undef(); 1068 c->delta_xy = src_undef(); 1069 c->pixel_w = src_undef(); 1070 c->nr_fp_insns = 0; 1071 c->fp->tex_units_used = 0x0; 1072 1073 /* Emit preamble instructions. This is where special instructions such as 1074 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to 1075 * compute shader inputs from varying vars. 1076 */ 1077 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { 1078 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; 1079 validate_src_regs(c, inst); 1080 validate_dst_regs(c, inst); 1081 } 1082 1083 /* Loop over all instructions doing assorted simplifications and 1084 * transformations. 1085 */ 1086 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { 1087 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; 1088 struct prog_instruction *out; 1089 1090 /* Check for INPUT values, emit INTERP instructions where 1091 * necessary: 1092 */ 1093 1094 switch (inst->Opcode) { 1095 case OPCODE_SWZ: 1096 out = emit_insn(c, inst); 1097 out->Opcode = OPCODE_MOV; 1098 break; 1099 1100 case OPCODE_ABS: 1101 out = emit_insn(c, inst); 1102 out->Opcode = OPCODE_MOV; 1103 out->SrcReg[0].Negate = NEGATE_NONE; 1104 out->SrcReg[0].Abs = 1; 1105 break; 1106 1107 case OPCODE_SUB: 1108 out = emit_insn(c, inst); 1109 out->Opcode = OPCODE_ADD; 1110 out->SrcReg[1].Negate ^= NEGATE_XYZW; 1111 break; 1112 1113 case OPCODE_SCS: 1114 out = emit_insn(c, inst); 1115 /* This should probably be done in the parser. 1116 */ 1117 out->DstReg.WriteMask &= WRITEMASK_XY; 1118 break; 1119 1120 case OPCODE_DST: 1121 precalc_dst(c, inst); 1122 break; 1123 1124 case OPCODE_LIT: 1125 precalc_lit(c, inst); 1126 break; 1127 1128 case OPCODE_TEX: 1129 precalc_tex(c, inst); 1130 break; 1131 1132 case OPCODE_TXP: 1133 precalc_txp(c, inst); 1134 break; 1135 1136 case OPCODE_TXB: 1137 out = emit_insn(c, inst); 1138 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; 1139 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); 1140 break; 1141 1142 case OPCODE_XPD: 1143 out = emit_insn(c, inst); 1144 /* This should probably be done in the parser. 1145 */ 1146 out->DstReg.WriteMask &= WRITEMASK_XYZ; 1147 break; 1148 1149 case OPCODE_KIL: 1150 out = emit_insn(c, inst); 1151 /* This should probably be done in the parser. 1152 */ 1153 out->DstReg.WriteMask = 0; 1154 break; 1155 case OPCODE_END: 1156 emit_render_target_writes(c); 1157 break; 1158 case OPCODE_PRINT: 1159 break; 1160 default: 1161 if (brw_wm_is_scalar_result(inst->Opcode)) 1162 emit_scalar_insn(c, inst); 1163 else 1164 emit_insn(c, inst); 1165 break; 1166 } 1167 } 1168 1169 if (INTEL_DEBUG & DEBUG_WM) { 1170 _mesa_printf("pass_fp:\n"); 1171 print_insns( c->prog_instructions, c->nr_fp_insns ); 1172 _mesa_printf("\n"); 1173 } 1174} 1175 1176