brw_wm_fp.c revision f147599ef4b0d14c25a7e0d3f9f1c9b0229bb6fc
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "main/glheader.h" 34#include "main/macros.h" 35#include "main/enums.h" 36#include "brw_context.h" 37#include "brw_wm.h" 38#include "brw_util.h" 39 40#include "program/prog_parameter.h" 41#include "program/prog_print.h" 42#include "program/prog_statevars.h" 43 44 45/** An invalid texture target */ 46#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS 47 48/** An invalid texture unit */ 49#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT 50 51#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS 52 53#define X 0 54#define Y 1 55#define Z 2 56#define W 3 57 58 59static const char *wm_opcode_strings[] = { 60 "PIXELXY", 61 "DELTAXY", 62 "PIXELW", 63 "LINTERP", 64 "PINTERP", 65 "CINTERP", 66 "WPOSXY", 67 "FB_WRITE", 68 "FRONTFACING", 69}; 70 71#if 0 72static const char *wm_file_strings[] = { 73 "PAYLOAD" 74}; 75#endif 76 77 78/*********************************************************************** 79 * Source regs 80 */ 81 82static struct prog_src_register src_reg(GLuint file, GLuint idx) 83{ 84 struct prog_src_register reg; 85 reg.File = file; 86 reg.Index = idx; 87 reg.Swizzle = SWIZZLE_NOOP; 88 reg.RelAddr = 0; 89 reg.Negate = NEGATE_NONE; 90 reg.Abs = 0; 91 reg.HasIndex2 = 0; 92 reg.RelAddr2 = 0; 93 reg.Index2 = 0; 94 return reg; 95} 96 97static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) 98{ 99 return src_reg(dst.File, dst.Index); 100} 101 102static struct prog_src_register src_undef( void ) 103{ 104 return src_reg(PROGRAM_UNDEFINED, 0); 105} 106 107static GLboolean src_is_undef(struct prog_src_register src) 108{ 109 return src.File == PROGRAM_UNDEFINED; 110} 111 112static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) 113{ 114 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); 115 return reg; 116} 117 118static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) 119{ 120 return src_swizzle(reg, x, x, x, x); 121} 122 123static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) 124{ 125 reg.Swizzle = swizzle; 126 return reg; 127} 128 129 130/*********************************************************************** 131 * Dest regs 132 */ 133 134static struct prog_dst_register dst_reg(GLuint file, GLuint idx) 135{ 136 struct prog_dst_register reg; 137 reg.File = file; 138 reg.Index = idx; 139 reg.WriteMask = WRITEMASK_XYZW; 140 reg.RelAddr = 0; 141 reg.CondMask = COND_TR; 142 reg.CondSwizzle = 0; 143 reg.CondSrc = 0; 144 return reg; 145} 146 147static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) 148{ 149 reg.WriteMask &= mask; 150 return reg; 151} 152 153static struct prog_dst_register dst_undef( void ) 154{ 155 return dst_reg(PROGRAM_UNDEFINED, 0); 156} 157 158 159 160static struct prog_dst_register get_temp( struct brw_wm_compile *c ) 161{ 162 int bit = _mesa_ffs( ~c->fp_temp ); 163 164 if (!bit) { 165 printf("%s: out of temporaries\n", __FILE__); 166 exit(1); 167 } 168 169 c->fp_temp |= 1<<(bit-1); 170 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); 171} 172 173 174static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) 175{ 176 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); 177} 178 179 180/*********************************************************************** 181 * Instructions 182 */ 183 184static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) 185{ 186 assert(c->nr_fp_insns < BRW_WM_MAX_INSN); 187 memset(&c->prog_instructions[c->nr_fp_insns], 0, 188 sizeof(*c->prog_instructions)); 189 return &c->prog_instructions[c->nr_fp_insns++]; 190} 191 192static struct prog_instruction *emit_insn(struct brw_wm_compile *c, 193 const struct prog_instruction *inst0) 194{ 195 struct prog_instruction *inst = get_fp_inst(c); 196 *inst = *inst0; 197 return inst; 198} 199 200static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, 201 GLuint op, 202 struct prog_dst_register dest, 203 GLuint saturate, 204 GLuint tex_src_unit, 205 GLuint tex_src_target, 206 GLuint tex_shadow, 207 struct prog_src_register src0, 208 struct prog_src_register src1, 209 struct prog_src_register src2 ) 210{ 211 struct prog_instruction *inst = get_fp_inst(c); 212 213 assert(tex_src_unit < BRW_MAX_TEX_UNIT || 214 tex_src_unit == TEX_UNIT_NONE); 215 assert(tex_src_target < NUM_TEXTURE_TARGETS || 216 tex_src_target == TEX_TARGET_NONE); 217 218 /* update mask of which texture units are referenced by this program */ 219 if (tex_src_unit != TEX_UNIT_NONE) 220 c->fp->tex_units_used |= (1 << tex_src_unit); 221 222 memset(inst, 0, sizeof(*inst)); 223 224 inst->Opcode = op; 225 inst->DstReg = dest; 226 inst->SaturateMode = saturate; 227 inst->TexSrcUnit = tex_src_unit; 228 inst->TexSrcTarget = tex_src_target; 229 inst->TexShadow = tex_shadow; 230 inst->SrcReg[0] = src0; 231 inst->SrcReg[1] = src1; 232 inst->SrcReg[2] = src2; 233 return inst; 234} 235 236 237static struct prog_instruction * emit_op(struct brw_wm_compile *c, 238 GLuint op, 239 struct prog_dst_register dest, 240 GLuint saturate, 241 struct prog_src_register src0, 242 struct prog_src_register src1, 243 struct prog_src_register src2 ) 244{ 245 return emit_tex_op(c, op, dest, saturate, 246 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ 247 src0, src1, src2); 248} 249 250 251/* Many Mesa opcodes produce the same value across all the result channels. 252 * We'd rather not have to support that splatting in the opcode implementations, 253 * and brw_wm_pass*.c wants to optimize them out by shuffling references around 254 * anyway. We can easily get both by emitting the opcode to one channel, and 255 * then MOVing it to the others, which brw_wm_pass*.c already understands. 256 */ 257static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, 258 const struct prog_instruction *inst0) 259{ 260 struct prog_instruction *inst; 261 unsigned int dst_chan; 262 unsigned int other_channel_mask; 263 264 if (inst0->DstReg.WriteMask == 0) 265 return NULL; 266 267 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; 268 inst = get_fp_inst(c); 269 *inst = *inst0; 270 inst->DstReg.WriteMask = 1 << dst_chan; 271 272 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); 273 if (other_channel_mask != 0) { 274 inst = emit_op(c, 275 OPCODE_MOV, 276 dst_mask(inst0->DstReg, other_channel_mask), 277 0, 278 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), 279 src_undef(), 280 src_undef()); 281 } 282 return inst; 283} 284 285 286/*********************************************************************** 287 * Special instructions for interpolation and other tasks 288 */ 289 290static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) 291{ 292 if (src_is_undef(c->pixel_xy)) { 293 struct prog_dst_register pixel_xy = get_temp(c); 294 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 295 296 297 /* Emit the out calculations, and hold onto the results. Use 298 * two instructions as a temporary is required. 299 */ 300 /* pixel_xy.xy = PIXELXY payload[0]; 301 */ 302 emit_op(c, 303 WM_PIXELXY, 304 dst_mask(pixel_xy, WRITEMASK_XY), 305 0, 306 payload_r0_depth, 307 src_undef(), 308 src_undef()); 309 310 c->pixel_xy = src_reg_from_dst(pixel_xy); 311 } 312 313 return c->pixel_xy; 314} 315 316static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) 317{ 318 if (src_is_undef(c->delta_xy)) { 319 struct prog_dst_register delta_xy = get_temp(c); 320 struct prog_src_register pixel_xy = get_pixel_xy(c); 321 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 322 323 /* deltas.xy = DELTAXY pixel_xy, payload[0] 324 */ 325 emit_op(c, 326 WM_DELTAXY, 327 dst_mask(delta_xy, WRITEMASK_XY), 328 0, 329 pixel_xy, 330 payload_r0_depth, 331 src_undef()); 332 333 c->delta_xy = src_reg_from_dst(delta_xy); 334 } 335 336 return c->delta_xy; 337} 338 339static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) 340{ 341 /* This is called for producing 1/w in pre-gen6 interp. for gen6, 342 * the interp opcodes don't use this argument. But to keep the 343 * nr_args = 3 expectations of pinterp happy, just stuff delta_xy 344 * into the slot. 345 */ 346 if (c->func.brw->intel.gen >= 6) 347 return c->delta_xy; 348 349 if (src_is_undef(c->pixel_w)) { 350 struct prog_dst_register pixel_w = get_temp(c); 351 struct prog_src_register deltas = get_delta_xy(c); 352 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); 353 354 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x 355 */ 356 emit_op(c, 357 WM_PIXELW, 358 dst_mask(pixel_w, WRITEMASK_W), 359 0, 360 interp_wpos, 361 deltas, 362 src_undef()); 363 364 365 c->pixel_w = src_reg_from_dst(pixel_w); 366 } 367 368 return c->pixel_w; 369} 370 371static void emit_interp( struct brw_wm_compile *c, 372 GLuint idx ) 373{ 374 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); 375 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); 376 struct prog_src_register deltas; 377 378 deltas = get_delta_xy(c); 379 380 /* Need to use PINTERP on attributes which have been 381 * multiplied by 1/W in the SF program, and LINTERP on those 382 * which have not: 383 */ 384 switch (idx) { 385 case FRAG_ATTRIB_WPOS: 386 /* Have to treat wpos.xy specially: 387 */ 388 emit_op(c, 389 WM_WPOSXY, 390 dst_mask(dst, WRITEMASK_XY), 391 0, 392 get_pixel_xy(c), 393 src_undef(), 394 src_undef()); 395 396 dst = dst_mask(dst, WRITEMASK_ZW); 397 398 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw 399 */ 400 emit_op(c, 401 WM_LINTERP, 402 dst, 403 0, 404 interp, 405 deltas, 406 src_undef()); 407 break; 408 case FRAG_ATTRIB_COL0: 409 case FRAG_ATTRIB_COL1: 410 if (c->key.flat_shade) { 411 emit_op(c, 412 WM_CINTERP, 413 dst, 414 0, 415 interp, 416 src_undef(), 417 src_undef()); 418 } 419 else { 420 /* perspective-corrected color interpolation */ 421 emit_op(c, 422 WM_PINTERP, 423 dst, 424 0, 425 interp, 426 deltas, 427 get_pixel_w(c)); 428 } 429 break; 430 case FRAG_ATTRIB_FOGC: 431 /* Interpolate the fog coordinate */ 432 emit_op(c, 433 WM_PINTERP, 434 dst_mask(dst, WRITEMASK_X), 435 0, 436 interp, 437 deltas, 438 get_pixel_w(c)); 439 440 emit_op(c, 441 OPCODE_MOV, 442 dst_mask(dst, WRITEMASK_YZW), 443 0, 444 src_swizzle(interp, 445 SWIZZLE_ZERO, 446 SWIZZLE_ZERO, 447 SWIZZLE_ZERO, 448 SWIZZLE_ONE), 449 src_undef(), 450 src_undef()); 451 break; 452 453 case FRAG_ATTRIB_FACE: 454 emit_op(c, 455 WM_FRONTFACING, 456 dst_mask(dst, WRITEMASK_X), 457 0, 458 src_undef(), 459 src_undef(), 460 src_undef()); 461 break; 462 463 case FRAG_ATTRIB_PNTC: 464 /* XXX review/test this case */ 465 emit_op(c, 466 WM_PINTERP, 467 dst_mask(dst, WRITEMASK_XY), 468 0, 469 interp, 470 deltas, 471 get_pixel_w(c)); 472 473 emit_op(c, 474 OPCODE_MOV, 475 dst_mask(dst, WRITEMASK_ZW), 476 0, 477 src_swizzle(interp, 478 SWIZZLE_ZERO, 479 SWIZZLE_ZERO, 480 SWIZZLE_ZERO, 481 SWIZZLE_ONE), 482 src_undef(), 483 src_undef()); 484 break; 485 486 default: 487 emit_op(c, 488 WM_PINTERP, 489 dst, 490 0, 491 interp, 492 deltas, 493 get_pixel_w(c)); 494 break; 495 } 496 497 c->fp_interp_emitted |= 1<<idx; 498} 499 500/*********************************************************************** 501 * Hacks to extend the program parameter and constant lists. 502 */ 503 504/* Add the fog parameters to the parameter list of the original 505 * program, rather than creating a new list. Doesn't really do any 506 * harm and it's not as if the parameter handling isn't a big hack 507 * anyway. 508 */ 509static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 510 GLint s0, 511 GLint s1, 512 GLint s2, 513 GLint s3, 514 GLint s4) 515{ 516 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; 517 gl_state_index tokens[STATE_LENGTH]; 518 GLuint idx; 519 tokens[0] = s0; 520 tokens[1] = s1; 521 tokens[2] = s2; 522 tokens[3] = s3; 523 tokens[4] = s4; 524 525 idx = _mesa_add_state_reference( paramList, tokens ); 526 527 return src_reg(PROGRAM_STATE_VAR, idx); 528} 529 530 531static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 532 GLfloat s0, 533 GLfloat s1, 534 GLfloat s2, 535 GLfloat s3) 536{ 537 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; 538 GLfloat values[4]; 539 GLuint idx; 540 GLuint swizzle; 541 struct prog_src_register reg; 542 543 values[0] = s0; 544 values[1] = s1; 545 values[2] = s2; 546 values[3] = s3; 547 548 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); 549 reg = src_reg(PROGRAM_STATE_VAR, idx); 550 reg.Swizzle = swizzle; 551 552 return reg; 553} 554 555 556 557/*********************************************************************** 558 * Expand various instructions here to simpler forms. 559 */ 560static void precalc_dst( struct brw_wm_compile *c, 561 const struct prog_instruction *inst ) 562{ 563 struct prog_src_register src0 = inst->SrcReg[0]; 564 struct prog_src_register src1 = inst->SrcReg[1]; 565 struct prog_dst_register dst = inst->DstReg; 566 567 if (dst.WriteMask & WRITEMASK_Y) { 568 /* dst.y = mul src0.y, src1.y 569 */ 570 emit_op(c, 571 OPCODE_MUL, 572 dst_mask(dst, WRITEMASK_Y), 573 inst->SaturateMode, 574 src0, 575 src1, 576 src_undef()); 577 } 578 579 if (dst.WriteMask & WRITEMASK_XZ) { 580 struct prog_instruction *swz; 581 GLuint z = GET_SWZ(src0.Swizzle, Z); 582 583 /* dst.xz = swz src0.1zzz 584 */ 585 swz = emit_op(c, 586 OPCODE_SWZ, 587 dst_mask(dst, WRITEMASK_XZ), 588 inst->SaturateMode, 589 src_swizzle(src0, SWIZZLE_ONE, z, z, z), 590 src_undef(), 591 src_undef()); 592 /* Avoid letting negation flag of src0 affect our 1 constant. */ 593 swz->SrcReg[0].Negate &= ~NEGATE_X; 594 } 595 if (dst.WriteMask & WRITEMASK_W) { 596 /* dst.w = mov src1.w 597 */ 598 emit_op(c, 599 OPCODE_MOV, 600 dst_mask(dst, WRITEMASK_W), 601 inst->SaturateMode, 602 src1, 603 src_undef(), 604 src_undef()); 605 } 606} 607 608 609static void precalc_lit( struct brw_wm_compile *c, 610 const struct prog_instruction *inst ) 611{ 612 struct prog_src_register src0 = inst->SrcReg[0]; 613 struct prog_dst_register dst = inst->DstReg; 614 615 if (dst.WriteMask & WRITEMASK_XW) { 616 struct prog_instruction *swz; 617 618 /* dst.xw = swz src0.1111 619 */ 620 swz = emit_op(c, 621 OPCODE_SWZ, 622 dst_mask(dst, WRITEMASK_XW), 623 0, 624 src_swizzle1(src0, SWIZZLE_ONE), 625 src_undef(), 626 src_undef()); 627 /* Avoid letting the negation flag of src0 affect our 1 constant. */ 628 swz->SrcReg[0].Negate = NEGATE_NONE; 629 } 630 631 if (dst.WriteMask & WRITEMASK_YZ) { 632 emit_op(c, 633 OPCODE_LIT, 634 dst_mask(dst, WRITEMASK_YZ), 635 inst->SaturateMode, 636 src0, 637 src_undef(), 638 src_undef()); 639 } 640} 641 642 643/** 644 * Some TEX instructions require extra code, cube map coordinate 645 * normalization, or coordinate scaling for RECT textures, etc. 646 * This function emits those extra instructions and the TEX 647 * instruction itself. 648 */ 649static void precalc_tex( struct brw_wm_compile *c, 650 const struct prog_instruction *inst ) 651{ 652 struct prog_src_register coord; 653 struct prog_dst_register tmpcoord = { 0 }; 654 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; 655 656 assert(unit < BRW_MAX_TEX_UNIT); 657 658 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { 659 struct prog_instruction *out; 660 struct prog_dst_register tmp0 = get_temp(c); 661 struct prog_src_register tmp0src = src_reg_from_dst(tmp0); 662 struct prog_dst_register tmp1 = get_temp(c); 663 struct prog_src_register tmp1src = src_reg_from_dst(tmp1); 664 struct prog_src_register src0 = inst->SrcReg[0]; 665 666 /* find longest component of coord vector and normalize it */ 667 tmpcoord = get_temp(c); 668 coord = src_reg_from_dst(tmpcoord); 669 670 /* tmpcoord = src0 (i.e.: coord = src0) */ 671 out = emit_op(c, OPCODE_MOV, 672 tmpcoord, 673 0, 674 src0, 675 src_undef(), 676 src_undef()); 677 out->SrcReg[0].Negate = NEGATE_NONE; 678 out->SrcReg[0].Abs = 1; 679 680 /* tmp0 = MAX(coord.X, coord.Y) */ 681 emit_op(c, OPCODE_MAX, 682 tmp0, 683 0, 684 src_swizzle1(coord, X), 685 src_swizzle1(coord, Y), 686 src_undef()); 687 688 /* tmp1 = MAX(tmp0, coord.Z) */ 689 emit_op(c, OPCODE_MAX, 690 tmp1, 691 0, 692 tmp0src, 693 src_swizzle1(coord, Z), 694 src_undef()); 695 696 /* tmp0 = 1 / tmp1 */ 697 emit_op(c, OPCODE_RCP, 698 dst_mask(tmp0, WRITEMASK_X), 699 0, 700 tmp1src, 701 src_undef(), 702 src_undef()); 703 704 /* tmpCoord = src0 * tmp0 */ 705 emit_op(c, OPCODE_MUL, 706 tmpcoord, 707 0, 708 src0, 709 src_swizzle1(tmp0src, SWIZZLE_X), 710 src_undef()); 711 712 release_temp(c, tmp0); 713 release_temp(c, tmp1); 714 } 715 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { 716 struct prog_src_register scale = 717 search_or_add_param5( c, 718 STATE_INTERNAL, 719 STATE_TEXRECT_SCALE, 720 unit, 721 0,0 ); 722 723 tmpcoord = get_temp(c); 724 725 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } 726 */ 727 emit_op(c, 728 OPCODE_MUL, 729 tmpcoord, 730 0, 731 inst->SrcReg[0], 732 src_swizzle(scale, 733 SWIZZLE_X, 734 SWIZZLE_Y, 735 SWIZZLE_ONE, 736 SWIZZLE_ONE), 737 src_undef()); 738 739 coord = src_reg_from_dst(tmpcoord); 740 } 741 else { 742 coord = inst->SrcReg[0]; 743 } 744 745 /* Need to emit YUV texture conversions by hand. Probably need to 746 * do this here - the alternative is in brw_wm_emit.c, but the 747 * conversion requires allocating a temporary variable which we 748 * don't have the facility to do that late in the compilation. 749 */ 750 if (c->key.yuvtex_mask & (1 << unit)) { 751 /* convert ycbcr to RGBA */ 752 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); 753 754 /* 755 CONST C0 = { -.5, -.0625, -.5, 1.164 } 756 CONST C1 = { 1.596, -0.813, 2.018, -.391 } 757 UYV = TEX ... 758 UYV.xyz = ADD UYV, C0 759 UYV.y = MUL UYV.y, C0.w 760 if (UV swaped) 761 RGB.xyz = MAD UYV.zzx, C1, UYV.y 762 else 763 RGB.xyz = MAD UYV.xxz, C1, UYV.y 764 RGB.y = MAD UYV.z, C1.w, RGB.y 765 */ 766 struct prog_dst_register dst = inst->DstReg; 767 struct prog_dst_register tmp = get_temp(c); 768 struct prog_src_register tmpsrc = src_reg_from_dst(tmp); 769 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); 770 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); 771 772 /* tmp = TEX ... 773 */ 774 emit_tex_op(c, 775 OPCODE_TEX, 776 tmp, 777 inst->SaturateMode, 778 unit, 779 inst->TexSrcTarget, 780 inst->TexShadow, 781 coord, 782 src_undef(), 783 src_undef()); 784 785 /* tmp.xyz = ADD TMP, C0 786 */ 787 emit_op(c, 788 OPCODE_ADD, 789 dst_mask(tmp, WRITEMASK_XYZ), 790 0, 791 tmpsrc, 792 C0, 793 src_undef()); 794 795 /* YUV.y = MUL YUV.y, C0.w 796 */ 797 798 emit_op(c, 799 OPCODE_MUL, 800 dst_mask(tmp, WRITEMASK_Y), 801 0, 802 tmpsrc, 803 src_swizzle1(C0, W), 804 src_undef()); 805 806 /* 807 * if (UV swaped) 808 * RGB.xyz = MAD YUV.zzx, C1, YUV.y 809 * else 810 * RGB.xyz = MAD YUV.xxz, C1, YUV.y 811 */ 812 813 emit_op(c, 814 OPCODE_MAD, 815 dst_mask(dst, WRITEMASK_XYZ), 816 0, 817 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), 818 C1, 819 src_swizzle1(tmpsrc, Y)); 820 821 /* RGB.y = MAD YUV.z, C1.w, RGB.y 822 */ 823 emit_op(c, 824 OPCODE_MAD, 825 dst_mask(dst, WRITEMASK_Y), 826 0, 827 src_swizzle1(tmpsrc, Z), 828 src_swizzle1(C1, W), 829 src_swizzle1(src_reg_from_dst(dst), Y)); 830 831 release_temp(c, tmp); 832 } 833 else { 834 /* ordinary RGBA tex instruction */ 835 emit_tex_op(c, 836 OPCODE_TEX, 837 inst->DstReg, 838 inst->SaturateMode, 839 unit, 840 inst->TexSrcTarget, 841 inst->TexShadow, 842 coord, 843 src_undef(), 844 src_undef()); 845 } 846 847 /* For GL_EXT_texture_swizzle: */ 848 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { 849 /* swizzle the result of the TEX instruction */ 850 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); 851 emit_op(c, OPCODE_SWZ, 852 inst->DstReg, 853 SATURATE_OFF, /* saturate already done above */ 854 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), 855 src_undef(), 856 src_undef()); 857 } 858 859 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || 860 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) 861 release_temp(c, tmpcoord); 862} 863 864 865/** 866 * Check if the given TXP instruction really needs the divide-by-W step. 867 */ 868static GLboolean projtex( struct brw_wm_compile *c, 869 const struct prog_instruction *inst ) 870{ 871 const struct prog_src_register src = inst->SrcReg[0]; 872 GLboolean retVal; 873 874 assert(inst->Opcode == OPCODE_TXP); 875 876 /* Only try to detect the simplest cases. Could detect (later) 877 * cases where we are trying to emit code like RCP {1.0}, MUL x, 878 * {1.0}, and so on. 879 * 880 * More complex cases than this typically only arise from 881 * user-provided fragment programs anyway: 882 */ 883 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) 884 retVal = GL_FALSE; /* ut2004 gun rendering !?! */ 885 else if (src.File == PROGRAM_INPUT && 886 GET_SWZ(src.Swizzle, W) == W && 887 (c->key.proj_attrib_mask & (1 << src.Index)) == 0) 888 retVal = GL_FALSE; 889 else 890 retVal = GL_TRUE; 891 892 return retVal; 893} 894 895 896/** 897 * Emit code for TXP. 898 */ 899static void precalc_txp( struct brw_wm_compile *c, 900 const struct prog_instruction *inst ) 901{ 902 struct prog_src_register src0 = inst->SrcReg[0]; 903 904 if (projtex(c, inst)) { 905 struct prog_dst_register tmp = get_temp(c); 906 struct prog_instruction tmp_inst; 907 908 /* tmp0.w = RCP inst.arg[0][3] 909 */ 910 emit_op(c, 911 OPCODE_RCP, 912 dst_mask(tmp, WRITEMASK_W), 913 0, 914 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), 915 src_undef(), 916 src_undef()); 917 918 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww 919 */ 920 emit_op(c, 921 OPCODE_MUL, 922 dst_mask(tmp, WRITEMASK_XYZ), 923 0, 924 src0, 925 src_swizzle1(src_reg_from_dst(tmp), W), 926 src_undef()); 927 928 /* dst = precalc(TEX tmp0) 929 */ 930 tmp_inst = *inst; 931 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); 932 precalc_tex(c, &tmp_inst); 933 934 release_temp(c, tmp); 935 } 936 else 937 { 938 /* dst = precalc(TEX src0) 939 */ 940 precalc_tex(c, inst); 941 } 942} 943 944 945 946static void emit_render_target_writes( struct brw_wm_compile *c ) 947{ 948 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 949 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); 950 struct prog_src_register outcolor; 951 GLuint i; 952 953 struct prog_instruction *inst = NULL; 954 955 /* The inst->Aux field is used for FB write target and the EOT marker */ 956 957 for (i = 0; i < c->key.nr_color_regions; i++) { 958 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) { 959 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); 960 } else { 961 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); 962 } 963 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 964 0, outcolor, payload_r0_depth, outdepth); 965 inst->Aux = INST_AUX_TARGET(i); 966 } 967 968 /* Mark the last FB write as final, or emit a dummy write if we had 969 * no render targets bound. 970 */ 971 if (c->key.nr_color_regions != 0) { 972 inst->Aux |= INST_AUX_EOT; 973 } else { 974 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 975 0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR), 976 payload_r0_depth, outdepth); 977 inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT; 978 } 979} 980 981 982 983 984/*********************************************************************** 985 * Emit INTERP instructions ahead of first use of each attrib. 986 */ 987 988static void validate_src_regs( struct brw_wm_compile *c, 989 const struct prog_instruction *inst ) 990{ 991 GLuint nr_args = brw_wm_nr_args( inst->Opcode ); 992 GLuint i; 993 994 for (i = 0; i < nr_args; i++) { 995 if (inst->SrcReg[i].File == PROGRAM_INPUT) { 996 GLuint idx = inst->SrcReg[i].Index; 997 if (!(c->fp_interp_emitted & (1<<idx))) { 998 emit_interp(c, idx); 999 } 1000 } 1001 } 1002} 1003 1004static void print_insns( const struct prog_instruction *insn, 1005 GLuint nr ) 1006{ 1007 GLuint i; 1008 for (i = 0; i < nr; i++, insn++) { 1009 printf("%3d: ", i); 1010 if (insn->Opcode < MAX_OPCODE) 1011 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL); 1012 else if (insn->Opcode < MAX_WM_OPCODE) { 1013 GLuint idx = insn->Opcode - MAX_OPCODE; 1014 1015 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx], 1016 3, PROG_PRINT_DEBUG, NULL); 1017 } 1018 else 1019 printf("965 Opcode %d\n", insn->Opcode); 1020 } 1021} 1022 1023 1024/** 1025 * Initial pass for fragment program code generation. 1026 * This function is used by both the GLSL and non-GLSL paths. 1027 */ 1028void brw_wm_pass_fp( struct brw_wm_compile *c ) 1029{ 1030 struct intel_context *intel = &c->func.brw->intel; 1031 struct brw_fragment_program *fp = c->fp; 1032 GLuint insn; 1033 1034 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 1035 printf("pre-fp:\n"); 1036 _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG, 1037 GL_TRUE); 1038 printf("\n"); 1039 } 1040 1041 c->pixel_xy = src_undef(); 1042 if (intel->gen >= 6) { 1043 /* The interpolation deltas come in as the perspective pixel 1044 * location barycentric params. 1045 */ 1046 c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 1047 } else { 1048 c->delta_xy = src_undef(); 1049 } 1050 c->pixel_w = src_undef(); 1051 c->nr_fp_insns = 0; 1052 c->fp->tex_units_used = 0x0; 1053 1054 /* Emit preamble instructions. This is where special instructions such as 1055 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to 1056 * compute shader inputs from varying vars. 1057 */ 1058 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { 1059 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; 1060 validate_src_regs(c, inst); 1061 } 1062 1063 /* Loop over all instructions doing assorted simplifications and 1064 * transformations. 1065 */ 1066 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { 1067 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; 1068 struct prog_instruction *out; 1069 1070 /* Check for INPUT values, emit INTERP instructions where 1071 * necessary: 1072 */ 1073 1074 switch (inst->Opcode) { 1075 case OPCODE_SWZ: 1076 out = emit_insn(c, inst); 1077 out->Opcode = OPCODE_MOV; 1078 break; 1079 1080 case OPCODE_ABS: 1081 out = emit_insn(c, inst); 1082 out->Opcode = OPCODE_MOV; 1083 out->SrcReg[0].Negate = NEGATE_NONE; 1084 out->SrcReg[0].Abs = 1; 1085 break; 1086 1087 case OPCODE_SUB: 1088 out = emit_insn(c, inst); 1089 out->Opcode = OPCODE_ADD; 1090 out->SrcReg[1].Negate ^= NEGATE_XYZW; 1091 break; 1092 1093 case OPCODE_SCS: 1094 out = emit_insn(c, inst); 1095 /* This should probably be done in the parser. 1096 */ 1097 out->DstReg.WriteMask &= WRITEMASK_XY; 1098 break; 1099 1100 case OPCODE_DST: 1101 precalc_dst(c, inst); 1102 break; 1103 1104 case OPCODE_LIT: 1105 precalc_lit(c, inst); 1106 break; 1107 1108 case OPCODE_RSQ: 1109 out = emit_scalar_insn(c, inst); 1110 out->SrcReg[0].Abs = GL_TRUE; 1111 break; 1112 1113 case OPCODE_TEX: 1114 precalc_tex(c, inst); 1115 break; 1116 1117 case OPCODE_TXP: 1118 precalc_txp(c, inst); 1119 break; 1120 1121 case OPCODE_TXB: 1122 out = emit_insn(c, inst); 1123 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; 1124 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); 1125 break; 1126 1127 case OPCODE_XPD: 1128 out = emit_insn(c, inst); 1129 /* This should probably be done in the parser. 1130 */ 1131 out->DstReg.WriteMask &= WRITEMASK_XYZ; 1132 break; 1133 1134 case OPCODE_KIL: 1135 out = emit_insn(c, inst); 1136 /* This should probably be done in the parser. 1137 */ 1138 out->DstReg.WriteMask = 0; 1139 break; 1140 case OPCODE_END: 1141 emit_render_target_writes(c); 1142 break; 1143 case OPCODE_PRINT: 1144 break; 1145 default: 1146 if (brw_wm_is_scalar_result(inst->Opcode)) 1147 emit_scalar_insn(c, inst); 1148 else 1149 emit_insn(c, inst); 1150 break; 1151 } 1152 } 1153 1154 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 1155 printf("pass_fp:\n"); 1156 print_insns( c->prog_instructions, c->nr_fp_insns ); 1157 printf("\n"); 1158 } 1159} 1160 1161