1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "main/glheader.h" 34#include "main/macros.h" 35#include "main/enums.h" 36#include "brw_context.h" 37#include "brw_wm.h" 38#include "brw_util.h" 39 40#include "program/prog_parameter.h" 41#include "program/prog_print.h" 42#include "program/prog_statevars.h" 43 44 45/** An invalid texture target */ 46#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS 47 48/** An invalid texture unit */ 49#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT 50 51#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS 52 53#define X 0 54#define Y 1 55#define Z 2 56#define W 3 57 58 59static const char *wm_opcode_strings[] = { 60 "PIXELXY", 61 "DELTAXY", 62 "PIXELW", 63 "LINTERP", 64 "PINTERP", 65 "CINTERP", 66 "WPOSXY", 67 "FB_WRITE", 68 "FRONTFACING", 69}; 70 71#if 0 72static const char *wm_file_strings[] = { 73 "PAYLOAD" 74}; 75#endif 76 77 78/*********************************************************************** 79 * Source regs 80 */ 81 82static struct prog_src_register src_reg(GLuint file, GLuint idx) 83{ 84 struct prog_src_register reg; 85 reg.File = file; 86 reg.Index = idx; 87 reg.Swizzle = SWIZZLE_NOOP; 88 reg.RelAddr = 0; 89 reg.Negate = NEGATE_NONE; 90 reg.Abs = 0; 91 reg.HasIndex2 = 0; 92 reg.RelAddr2 = 0; 93 reg.Index2 = 0; 94 return reg; 95} 96 97static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) 98{ 99 return src_reg(dst.File, dst.Index); 100} 101 102static struct prog_src_register src_undef( void ) 103{ 104 return src_reg(PROGRAM_UNDEFINED, 0); 105} 106 107static bool src_is_undef(struct prog_src_register src) 108{ 109 return src.File == PROGRAM_UNDEFINED; 110} 111 112static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) 113{ 114 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); 115 return reg; 116} 117 118static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) 119{ 120 return src_swizzle(reg, x, x, x, x); 121} 122 123static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) 124{ 125 reg.Swizzle = swizzle; 126 return reg; 127} 128 129 130/*********************************************************************** 131 * Dest regs 132 */ 133 134static struct prog_dst_register dst_reg(GLuint file, GLuint idx) 135{ 136 struct prog_dst_register reg; 137 reg.File = file; 138 reg.Index = idx; 139 reg.WriteMask = WRITEMASK_XYZW; 140 reg.RelAddr = 0; 141 reg.CondMask = COND_TR; 142 reg.CondSwizzle = 0; 143 reg.CondSrc = 0; 144 return reg; 145} 146 147static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) 148{ 149 reg.WriteMask &= mask; 150 return reg; 151} 152 153static struct prog_dst_register dst_undef( void ) 154{ 155 return dst_reg(PROGRAM_UNDEFINED, 0); 156} 157 158 159 160static struct prog_dst_register get_temp( struct brw_wm_compile *c ) 161{ 162 int bit = ffs( ~c->fp_temp ); 163 164 if (!bit) { 165 printf("%s: out of temporaries\n", __FILE__); 166 exit(1); 167 } 168 169 c->fp_temp |= 1<<(bit-1); 170 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); 171} 172 173 174static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) 175{ 176 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); 177} 178 179 180/*********************************************************************** 181 * Instructions 182 */ 183 184static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) 185{ 186 assert(c->nr_fp_insns < BRW_WM_MAX_INSN); 187 memset(&c->prog_instructions[c->nr_fp_insns], 0, 188 sizeof(*c->prog_instructions)); 189 return &c->prog_instructions[c->nr_fp_insns++]; 190} 191 192static struct prog_instruction *emit_insn(struct brw_wm_compile *c, 193 const struct prog_instruction *inst0) 194{ 195 struct prog_instruction *inst = get_fp_inst(c); 196 *inst = *inst0; 197 return inst; 198} 199 200static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, 201 GLuint op, 202 struct prog_dst_register dest, 203 GLuint saturate, 204 GLuint tex_src_unit, 205 GLuint tex_src_target, 206 GLuint tex_shadow, 207 struct prog_src_register src0, 208 struct prog_src_register src1, 209 struct prog_src_register src2 ) 210{ 211 struct prog_instruction *inst = get_fp_inst(c); 212 213 assert(tex_src_unit < BRW_MAX_TEX_UNIT || 214 tex_src_unit == TEX_UNIT_NONE); 215 assert(tex_src_target < NUM_TEXTURE_TARGETS || 216 tex_src_target == TEX_TARGET_NONE); 217 218 memset(inst, 0, sizeof(*inst)); 219 220 inst->Opcode = op; 221 inst->DstReg = dest; 222 inst->SaturateMode = saturate; 223 inst->TexSrcUnit = tex_src_unit; 224 inst->TexSrcTarget = tex_src_target; 225 inst->TexShadow = tex_shadow; 226 inst->SrcReg[0] = src0; 227 inst->SrcReg[1] = src1; 228 inst->SrcReg[2] = src2; 229 return inst; 230} 231 232 233static struct prog_instruction * emit_op(struct brw_wm_compile *c, 234 GLuint op, 235 struct prog_dst_register dest, 236 GLuint saturate, 237 struct prog_src_register src0, 238 struct prog_src_register src1, 239 struct prog_src_register src2 ) 240{ 241 return emit_tex_op(c, op, dest, saturate, 242 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ 243 src0, src1, src2); 244} 245 246 247/* Many Mesa opcodes produce the same value across all the result channels. 248 * We'd rather not have to support that splatting in the opcode implementations, 249 * and brw_wm_pass*.c wants to optimize them out by shuffling references around 250 * anyway. We can easily get both by emitting the opcode to one channel, and 251 * then MOVing it to the others, which brw_wm_pass*.c already understands. 252 */ 253static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, 254 const struct prog_instruction *inst0) 255{ 256 struct prog_instruction *inst; 257 unsigned int dst_chan; 258 unsigned int other_channel_mask; 259 260 if (inst0->DstReg.WriteMask == 0) 261 return NULL; 262 263 dst_chan = ffs(inst0->DstReg.WriteMask) - 1; 264 inst = get_fp_inst(c); 265 *inst = *inst0; 266 inst->DstReg.WriteMask = 1 << dst_chan; 267 268 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); 269 if (other_channel_mask != 0) { 270 inst = emit_op(c, 271 OPCODE_MOV, 272 dst_mask(inst0->DstReg, other_channel_mask), 273 0, 274 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), 275 src_undef(), 276 src_undef()); 277 } 278 return inst; 279} 280 281 282/*********************************************************************** 283 * Special instructions for interpolation and other tasks 284 */ 285 286static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) 287{ 288 if (src_is_undef(c->pixel_xy)) { 289 struct prog_dst_register pixel_xy = get_temp(c); 290 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 291 292 293 /* Emit the out calculations, and hold onto the results. Use 294 * two instructions as a temporary is required. 295 */ 296 /* pixel_xy.xy = PIXELXY payload[0]; 297 */ 298 emit_op(c, 299 WM_PIXELXY, 300 dst_mask(pixel_xy, WRITEMASK_XY), 301 0, 302 payload_r0_depth, 303 src_undef(), 304 src_undef()); 305 306 c->pixel_xy = src_reg_from_dst(pixel_xy); 307 } 308 309 return c->pixel_xy; 310} 311 312static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) 313{ 314 if (src_is_undef(c->delta_xy)) { 315 struct prog_dst_register delta_xy = get_temp(c); 316 struct prog_src_register pixel_xy = get_pixel_xy(c); 317 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 318 319 /* deltas.xy = DELTAXY pixel_xy, payload[0] 320 */ 321 emit_op(c, 322 WM_DELTAXY, 323 dst_mask(delta_xy, WRITEMASK_XY), 324 0, 325 pixel_xy, 326 payload_r0_depth, 327 src_undef()); 328 329 c->delta_xy = src_reg_from_dst(delta_xy); 330 } 331 332 return c->delta_xy; 333} 334 335static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) 336{ 337 /* This is called for producing 1/w in pre-gen6 interp. for gen6, 338 * the interp opcodes don't use this argument. But to keep the 339 * nr_args = 3 expectations of pinterp happy, just stuff delta_xy 340 * into the slot. 341 */ 342 if (c->func.brw->intel.gen >= 6) 343 return c->delta_xy; 344 345 if (src_is_undef(c->pixel_w)) { 346 struct prog_dst_register pixel_w = get_temp(c); 347 struct prog_src_register deltas = get_delta_xy(c); 348 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); 349 350 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x 351 */ 352 emit_op(c, 353 WM_PIXELW, 354 dst_mask(pixel_w, WRITEMASK_W), 355 0, 356 interp_wpos, 357 deltas, 358 src_undef()); 359 360 361 c->pixel_w = src_reg_from_dst(pixel_w); 362 } 363 364 return c->pixel_w; 365} 366 367static void emit_interp( struct brw_wm_compile *c, 368 GLuint idx ) 369{ 370 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); 371 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); 372 struct prog_src_register deltas; 373 374 deltas = get_delta_xy(c); 375 376 /* Need to use PINTERP on attributes which have been 377 * multiplied by 1/W in the SF program, and LINTERP on those 378 * which have not: 379 */ 380 switch (idx) { 381 case FRAG_ATTRIB_WPOS: 382 /* Have to treat wpos.xy specially: 383 */ 384 emit_op(c, 385 WM_WPOSXY, 386 dst_mask(dst, WRITEMASK_XY), 387 0, 388 get_pixel_xy(c), 389 src_undef(), 390 src_undef()); 391 392 dst = dst_mask(dst, WRITEMASK_ZW); 393 394 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw 395 */ 396 emit_op(c, 397 WM_LINTERP, 398 dst, 399 0, 400 interp, 401 deltas, 402 src_undef()); 403 break; 404 case FRAG_ATTRIB_COL0: 405 case FRAG_ATTRIB_COL1: 406 if (c->key.flat_shade) { 407 emit_op(c, 408 WM_CINTERP, 409 dst, 410 0, 411 interp, 412 src_undef(), 413 src_undef()); 414 } 415 else { 416 /* perspective-corrected color interpolation */ 417 emit_op(c, 418 WM_PINTERP, 419 dst, 420 0, 421 interp, 422 deltas, 423 get_pixel_w(c)); 424 } 425 break; 426 case FRAG_ATTRIB_FOGC: 427 /* Interpolate the fog coordinate */ 428 emit_op(c, 429 WM_PINTERP, 430 dst_mask(dst, WRITEMASK_X), 431 0, 432 interp, 433 deltas, 434 get_pixel_w(c)); 435 436 emit_op(c, 437 OPCODE_MOV, 438 dst_mask(dst, WRITEMASK_YZW), 439 0, 440 src_swizzle(interp, 441 SWIZZLE_ZERO, 442 SWIZZLE_ZERO, 443 SWIZZLE_ZERO, 444 SWIZZLE_ONE), 445 src_undef(), 446 src_undef()); 447 break; 448 449 case FRAG_ATTRIB_FACE: 450 emit_op(c, 451 WM_FRONTFACING, 452 dst_mask(dst, WRITEMASK_X), 453 0, 454 src_undef(), 455 src_undef(), 456 src_undef()); 457 break; 458 459 case FRAG_ATTRIB_PNTC: 460 /* XXX review/test this case */ 461 emit_op(c, 462 WM_PINTERP, 463 dst_mask(dst, WRITEMASK_XY), 464 0, 465 interp, 466 deltas, 467 get_pixel_w(c)); 468 469 emit_op(c, 470 OPCODE_MOV, 471 dst_mask(dst, WRITEMASK_ZW), 472 0, 473 src_swizzle(interp, 474 SWIZZLE_ZERO, 475 SWIZZLE_ZERO, 476 SWIZZLE_ZERO, 477 SWIZZLE_ONE), 478 src_undef(), 479 src_undef()); 480 break; 481 482 default: 483 emit_op(c, 484 WM_PINTERP, 485 dst, 486 0, 487 interp, 488 deltas, 489 get_pixel_w(c)); 490 break; 491 } 492 493 c->fp_interp_emitted |= 1<<idx; 494} 495 496/*********************************************************************** 497 * Hacks to extend the program parameter and constant lists. 498 */ 499 500/* Add the fog parameters to the parameter list of the original 501 * program, rather than creating a new list. Doesn't really do any 502 * harm and it's not as if the parameter handling isn't a big hack 503 * anyway. 504 */ 505static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 506 GLint s0, 507 GLint s1, 508 GLint s2, 509 GLint s3, 510 GLint s4) 511{ 512 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; 513 gl_state_index tokens[STATE_LENGTH]; 514 GLuint idx; 515 tokens[0] = s0; 516 tokens[1] = s1; 517 tokens[2] = s2; 518 tokens[3] = s3; 519 tokens[4] = s4; 520 521 idx = _mesa_add_state_reference( paramList, tokens ); 522 523 return src_reg(PROGRAM_STATE_VAR, idx); 524} 525 526 527static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 528 GLfloat s0, 529 GLfloat s1, 530 GLfloat s2, 531 GLfloat s3) 532{ 533 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; 534 gl_constant_value values[4]; 535 GLuint idx; 536 GLuint swizzle; 537 struct prog_src_register reg; 538 539 values[0].f = s0; 540 values[1].f = s1; 541 values[2].f = s2; 542 values[3].f = s3; 543 544 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); 545 reg = src_reg(PROGRAM_STATE_VAR, idx); 546 reg.Swizzle = swizzle; 547 548 return reg; 549} 550 551 552 553/*********************************************************************** 554 * Expand various instructions here to simpler forms. 555 */ 556static void precalc_dst( struct brw_wm_compile *c, 557 const struct prog_instruction *inst ) 558{ 559 struct prog_src_register src0 = inst->SrcReg[0]; 560 struct prog_src_register src1 = inst->SrcReg[1]; 561 struct prog_dst_register dst = inst->DstReg; 562 struct prog_dst_register temp = get_temp(c); 563 564 if (dst.WriteMask & WRITEMASK_Y) { 565 /* dst.y = mul src0.y, src1.y 566 */ 567 emit_op(c, 568 OPCODE_MUL, 569 dst_mask(temp, WRITEMASK_Y), 570 inst->SaturateMode, 571 src0, 572 src1, 573 src_undef()); 574 } 575 576 if (dst.WriteMask & WRITEMASK_XZ) { 577 struct prog_instruction *swz; 578 GLuint z = GET_SWZ(src0.Swizzle, Z); 579 580 /* dst.xz = swz src0.1zzz 581 */ 582 swz = emit_op(c, 583 OPCODE_SWZ, 584 dst_mask(temp, WRITEMASK_XZ), 585 inst->SaturateMode, 586 src_swizzle(src0, SWIZZLE_ONE, z, z, z), 587 src_undef(), 588 src_undef()); 589 /* Avoid letting negation flag of src0 affect our 1 constant. */ 590 swz->SrcReg[0].Negate &= ~NEGATE_X; 591 } 592 if (dst.WriteMask & WRITEMASK_W) { 593 /* dst.w = mov src1.w 594 */ 595 emit_op(c, 596 OPCODE_MOV, 597 dst_mask(temp, WRITEMASK_W), 598 inst->SaturateMode, 599 src1, 600 src_undef(), 601 src_undef()); 602 } 603 604 /* This will get optimized out in general, but it ensures that we 605 * don't overwrite src operands in our channel-wise splitting 606 * above. See piglit fp-dst-aliasing-[12]. 607 */ 608 emit_op(c, 609 OPCODE_MOV, 610 dst, 611 0, 612 src_reg_from_dst(temp), 613 src_undef(), 614 src_undef()); 615 616 release_temp(c, temp); 617} 618 619 620static void precalc_lit( struct brw_wm_compile *c, 621 const struct prog_instruction *inst ) 622{ 623 struct prog_src_register src0 = inst->SrcReg[0]; 624 struct prog_dst_register dst = inst->DstReg; 625 626 if (dst.WriteMask & WRITEMASK_YZ) { 627 emit_op(c, 628 OPCODE_LIT, 629 dst_mask(dst, WRITEMASK_YZ), 630 inst->SaturateMode, 631 src0, 632 src_undef(), 633 src_undef()); 634 } 635 636 if (dst.WriteMask & WRITEMASK_XW) { 637 struct prog_instruction *swz; 638 639 /* dst.xw = swz src0.1111 640 */ 641 swz = emit_op(c, 642 OPCODE_SWZ, 643 dst_mask(dst, WRITEMASK_XW), 644 0, 645 src_swizzle1(src0, SWIZZLE_ONE), 646 src_undef(), 647 src_undef()); 648 /* Avoid letting the negation flag of src0 affect our 1 constant. */ 649 swz->SrcReg[0].Negate = NEGATE_NONE; 650 } 651} 652 653 654/** 655 * Some TEX instructions require extra code, cube map coordinate 656 * normalization, or coordinate scaling for RECT textures, etc. 657 * This function emits those extra instructions and the TEX 658 * instruction itself. 659 */ 660static void precalc_tex( struct brw_wm_compile *c, 661 const struct prog_instruction *inst ) 662{ 663 struct brw_compile *p = &c->func; 664 struct intel_context *intel = &p->brw->intel; 665 struct prog_src_register coord; 666 struct prog_dst_register tmpcoord = { 0 }; 667 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; 668 struct prog_dst_register unswizzled_tmp; 669 670 /* If we are doing EXT_texture_swizzle, we need to write our result into a 671 * temporary, otherwise writemasking of the real dst could lose some of our 672 * channels. 673 */ 674 if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) { 675 unswizzled_tmp = get_temp(c); 676 } else { 677 unswizzled_tmp = inst->DstReg; 678 } 679 680 assert(unit < BRW_MAX_TEX_UNIT); 681 682 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { 683 struct prog_instruction *out; 684 struct prog_dst_register tmp0 = get_temp(c); 685 struct prog_src_register tmp0src = src_reg_from_dst(tmp0); 686 struct prog_dst_register tmp1 = get_temp(c); 687 struct prog_src_register tmp1src = src_reg_from_dst(tmp1); 688 struct prog_src_register src0 = inst->SrcReg[0]; 689 690 /* find longest component of coord vector and normalize it */ 691 tmpcoord = get_temp(c); 692 coord = src_reg_from_dst(tmpcoord); 693 694 /* tmpcoord = src0 (i.e.: coord = src0) */ 695 out = emit_op(c, OPCODE_MOV, 696 tmpcoord, 697 0, 698 src0, 699 src_undef(), 700 src_undef()); 701 out->SrcReg[0].Negate = NEGATE_NONE; 702 out->SrcReg[0].Abs = 1; 703 704 /* tmp0 = MAX(coord.X, coord.Y) */ 705 emit_op(c, OPCODE_MAX, 706 tmp0, 707 0, 708 src_swizzle1(coord, X), 709 src_swizzle1(coord, Y), 710 src_undef()); 711 712 /* tmp1 = MAX(tmp0, coord.Z) */ 713 emit_op(c, OPCODE_MAX, 714 tmp1, 715 0, 716 tmp0src, 717 src_swizzle1(coord, Z), 718 src_undef()); 719 720 /* tmp0 = 1 / tmp1 */ 721 emit_op(c, OPCODE_RCP, 722 dst_mask(tmp0, WRITEMASK_X), 723 0, 724 tmp1src, 725 src_undef(), 726 src_undef()); 727 728 /* tmpCoord = src0 * tmp0 */ 729 emit_op(c, OPCODE_MUL, 730 tmpcoord, 731 0, 732 src0, 733 src_swizzle1(tmp0src, SWIZZLE_X), 734 src_undef()); 735 736 release_temp(c, tmp0); 737 release_temp(c, tmp1); 738 } 739 else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) { 740 struct prog_src_register scale = 741 search_or_add_param5( c, 742 STATE_INTERNAL, 743 STATE_TEXRECT_SCALE, 744 unit, 745 0,0 ); 746 747 tmpcoord = get_temp(c); 748 749 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } 750 */ 751 emit_op(c, 752 OPCODE_MUL, 753 tmpcoord, 754 0, 755 inst->SrcReg[0], 756 src_swizzle(scale, 757 SWIZZLE_X, 758 SWIZZLE_Y, 759 SWIZZLE_ONE, 760 SWIZZLE_ONE), 761 src_undef()); 762 763 coord = src_reg_from_dst(tmpcoord); 764 } 765 else { 766 coord = inst->SrcReg[0]; 767 } 768 769 /* Need to emit YUV texture conversions by hand. Probably need to 770 * do this here - the alternative is in brw_wm_emit.c, but the 771 * conversion requires allocating a temporary variable which we 772 * don't have the facility to do that late in the compilation. 773 */ 774 if (c->key.tex.yuvtex_mask & (1 << unit)) { 775 /* convert ycbcr to RGBA */ 776 bool swap_uv = c->key.tex.yuvtex_swap_mask & (1 << unit); 777 778 /* 779 CONST C0 = { -.5, -.0625, -.5, 1.164 } 780 CONST C1 = { 1.596, -0.813, 2.018, -.391 } 781 UYV = TEX ... 782 UYV.xyz = ADD UYV, C0 783 UYV.y = MUL UYV.y, C0.w 784 if (UV swaped) 785 RGB.xyz = MAD UYV.zzx, C1, UYV.y 786 else 787 RGB.xyz = MAD UYV.xxz, C1, UYV.y 788 RGB.y = MAD UYV.z, C1.w, RGB.y 789 */ 790 struct prog_dst_register tmp = get_temp(c); 791 struct prog_src_register tmpsrc = src_reg_from_dst(tmp); 792 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); 793 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); 794 795 /* tmp = TEX ... 796 */ 797 emit_tex_op(c, 798 OPCODE_TEX, 799 tmp, 800 inst->SaturateMode, 801 unit, 802 inst->TexSrcTarget, 803 inst->TexShadow, 804 coord, 805 src_undef(), 806 src_undef()); 807 808 /* tmp.xyz = ADD TMP, C0 809 */ 810 emit_op(c, 811 OPCODE_ADD, 812 dst_mask(tmp, WRITEMASK_XYZ), 813 0, 814 tmpsrc, 815 C0, 816 src_undef()); 817 818 /* YUV.y = MUL YUV.y, C0.w 819 */ 820 821 emit_op(c, 822 OPCODE_MUL, 823 dst_mask(tmp, WRITEMASK_Y), 824 0, 825 tmpsrc, 826 src_swizzle1(C0, W), 827 src_undef()); 828 829 /* 830 * if (UV swaped) 831 * RGB.xyz = MAD YUV.zzx, C1, YUV.y 832 * else 833 * RGB.xyz = MAD YUV.xxz, C1, YUV.y 834 */ 835 836 emit_op(c, 837 OPCODE_MAD, 838 dst_mask(unswizzled_tmp, WRITEMASK_XYZ), 839 0, 840 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), 841 C1, 842 src_swizzle1(tmpsrc, Y)); 843 844 /* RGB.y = MAD YUV.z, C1.w, RGB.y 845 */ 846 emit_op(c, 847 OPCODE_MAD, 848 dst_mask(unswizzled_tmp, WRITEMASK_Y), 849 0, 850 src_swizzle1(tmpsrc, Z), 851 src_swizzle1(C1, W), 852 src_swizzle1(src_reg_from_dst(unswizzled_tmp), Y)); 853 854 release_temp(c, tmp); 855 } 856 else { 857 /* ordinary RGBA tex instruction */ 858 emit_tex_op(c, 859 OPCODE_TEX, 860 unswizzled_tmp, 861 inst->SaturateMode, 862 unit, 863 inst->TexSrcTarget, 864 inst->TexShadow, 865 coord, 866 src_undef(), 867 src_undef()); 868 } 869 870 /* For GL_EXT_texture_swizzle: */ 871 if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) { 872 /* swizzle the result of the TEX instruction */ 873 struct prog_src_register tmpsrc = src_reg_from_dst(unswizzled_tmp); 874 emit_op(c, OPCODE_SWZ, 875 inst->DstReg, 876 SATURATE_OFF, /* saturate already done above */ 877 src_swizzle4(tmpsrc, c->key.tex.swizzles[unit]), 878 src_undef(), 879 src_undef()); 880 } 881 882 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || 883 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) 884 release_temp(c, tmpcoord); 885} 886 887 888/** 889 * Check if the given TXP instruction really needs the divide-by-W step. 890 */ 891static bool 892projtex(struct brw_wm_compile *c, const struct prog_instruction *inst) 893{ 894 const struct prog_src_register src = inst->SrcReg[0]; 895 bool retVal; 896 897 assert(inst->Opcode == OPCODE_TXP); 898 899 /* Only try to detect the simplest cases. Could detect (later) 900 * cases where we are trying to emit code like RCP {1.0}, MUL x, 901 * {1.0}, and so on. 902 * 903 * More complex cases than this typically only arise from 904 * user-provided fragment programs anyway: 905 */ 906 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) 907 retVal = false; /* ut2004 gun rendering !?! */ 908 else if (src.File == PROGRAM_INPUT && 909 GET_SWZ(src.Swizzle, W) == W && 910 (c->key.proj_attrib_mask & (1 << src.Index)) == 0) 911 retVal = false; 912 else 913 retVal = true; 914 915 return retVal; 916} 917 918 919/** 920 * Emit code for TXP. 921 */ 922static void precalc_txp( struct brw_wm_compile *c, 923 const struct prog_instruction *inst ) 924{ 925 struct prog_src_register src0 = inst->SrcReg[0]; 926 927 if (projtex(c, inst)) { 928 struct prog_dst_register tmp = get_temp(c); 929 struct prog_instruction tmp_inst; 930 931 /* tmp0.w = RCP inst.arg[0][3] 932 */ 933 emit_op(c, 934 OPCODE_RCP, 935 dst_mask(tmp, WRITEMASK_W), 936 0, 937 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), 938 src_undef(), 939 src_undef()); 940 941 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww 942 */ 943 emit_op(c, 944 OPCODE_MUL, 945 dst_mask(tmp, WRITEMASK_XYZ), 946 0, 947 src0, 948 src_swizzle1(src_reg_from_dst(tmp), W), 949 src_undef()); 950 951 /* dst = precalc(TEX tmp0) 952 */ 953 tmp_inst = *inst; 954 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); 955 precalc_tex(c, &tmp_inst); 956 957 release_temp(c, tmp); 958 } 959 else 960 { 961 /* dst = precalc(TEX src0) 962 */ 963 precalc_tex(c, inst); 964 } 965} 966 967 968 969static void emit_render_target_writes( struct brw_wm_compile *c ) 970{ 971 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 972 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); 973 struct prog_src_register outcolor; 974 GLuint i; 975 976 struct prog_instruction *inst = NULL; 977 978 /* The inst->Aux field is used for FB write target and the EOT marker */ 979 980 for (i = 0; i < c->key.nr_color_regions; i++) { 981 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) { 982 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); 983 } else { 984 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); 985 } 986 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 987 0, outcolor, payload_r0_depth, outdepth); 988 inst->Aux = INST_AUX_TARGET(i); 989 } 990 991 /* Mark the last FB write as final, or emit a dummy write if we had 992 * no render targets bound. 993 */ 994 if (c->key.nr_color_regions != 0) { 995 inst->Aux |= INST_AUX_EOT; 996 } else { 997 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 998 0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR), 999 payload_r0_depth, outdepth); 1000 inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT; 1001 } 1002} 1003 1004 1005 1006 1007/*********************************************************************** 1008 * Emit INTERP instructions ahead of first use of each attrib. 1009 */ 1010 1011static void validate_src_regs( struct brw_wm_compile *c, 1012 const struct prog_instruction *inst ) 1013{ 1014 GLuint nr_args = brw_wm_nr_args( inst->Opcode ); 1015 GLuint i; 1016 1017 for (i = 0; i < nr_args; i++) { 1018 if (inst->SrcReg[i].File == PROGRAM_INPUT) { 1019 GLuint idx = inst->SrcReg[i].Index; 1020 if (!(c->fp_interp_emitted & (1<<idx))) { 1021 emit_interp(c, idx); 1022 } 1023 } 1024 } 1025} 1026 1027static void print_insns( const struct prog_instruction *insn, 1028 GLuint nr ) 1029{ 1030 GLuint i; 1031 for (i = 0; i < nr; i++, insn++) { 1032 printf("%3d: ", i); 1033 if (insn->Opcode < MAX_OPCODE) 1034 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL); 1035 else if (insn->Opcode < MAX_WM_OPCODE) { 1036 GLuint idx = insn->Opcode - MAX_OPCODE; 1037 1038 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx], 1039 3, PROG_PRINT_DEBUG, NULL); 1040 } 1041 else 1042 printf("965 Opcode %d\n", insn->Opcode); 1043 } 1044} 1045 1046 1047/** 1048 * Initial pass for fragment program code generation. 1049 * This function is used by both the GLSL and non-GLSL paths. 1050 */ 1051void brw_wm_pass_fp( struct brw_wm_compile *c ) 1052{ 1053 struct intel_context *intel = &c->func.brw->intel; 1054 struct brw_fragment_program *fp = c->fp; 1055 GLuint insn; 1056 1057 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 1058 printf("pre-fp:\n"); 1059 _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG, 1060 true); 1061 printf("\n"); 1062 } 1063 1064 c->pixel_xy = src_undef(); 1065 if (intel->gen >= 6) { 1066 /* The interpolation deltas come in as the perspective pixel 1067 * location barycentric params. 1068 */ 1069 c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); 1070 } else { 1071 c->delta_xy = src_undef(); 1072 } 1073 c->pixel_w = src_undef(); 1074 c->nr_fp_insns = 0; 1075 1076 /* Emit preamble instructions. This is where special instructions such as 1077 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to 1078 * compute shader inputs from varying vars. 1079 */ 1080 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { 1081 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; 1082 validate_src_regs(c, inst); 1083 } 1084 1085 /* Loop over all instructions doing assorted simplifications and 1086 * transformations. 1087 */ 1088 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { 1089 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; 1090 struct prog_instruction *out; 1091 1092 /* Check for INPUT values, emit INTERP instructions where 1093 * necessary: 1094 */ 1095 1096 switch (inst->Opcode) { 1097 case OPCODE_SWZ: 1098 out = emit_insn(c, inst); 1099 out->Opcode = OPCODE_MOV; 1100 break; 1101 1102 case OPCODE_ABS: 1103 out = emit_insn(c, inst); 1104 out->Opcode = OPCODE_MOV; 1105 out->SrcReg[0].Negate = NEGATE_NONE; 1106 out->SrcReg[0].Abs = 1; 1107 break; 1108 1109 case OPCODE_SUB: 1110 out = emit_insn(c, inst); 1111 out->Opcode = OPCODE_ADD; 1112 out->SrcReg[1].Negate ^= NEGATE_XYZW; 1113 break; 1114 1115 case OPCODE_SCS: 1116 out = emit_insn(c, inst); 1117 /* This should probably be done in the parser. 1118 */ 1119 out->DstReg.WriteMask &= WRITEMASK_XY; 1120 break; 1121 1122 case OPCODE_DST: 1123 precalc_dst(c, inst); 1124 break; 1125 1126 case OPCODE_LIT: 1127 precalc_lit(c, inst); 1128 break; 1129 1130 case OPCODE_RSQ: 1131 out = emit_scalar_insn(c, inst); 1132 out->SrcReg[0].Abs = true; 1133 break; 1134 1135 case OPCODE_TEX: 1136 precalc_tex(c, inst); 1137 break; 1138 1139 case OPCODE_TXP: 1140 precalc_txp(c, inst); 1141 break; 1142 1143 case OPCODE_TXB: 1144 out = emit_insn(c, inst); 1145 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; 1146 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); 1147 break; 1148 1149 case OPCODE_XPD: 1150 out = emit_insn(c, inst); 1151 /* This should probably be done in the parser. 1152 */ 1153 out->DstReg.WriteMask &= WRITEMASK_XYZ; 1154 break; 1155 1156 case OPCODE_KIL: 1157 out = emit_insn(c, inst); 1158 /* This should probably be done in the parser. 1159 */ 1160 out->DstReg.WriteMask = 0; 1161 break; 1162 case OPCODE_END: 1163 emit_render_target_writes(c); 1164 break; 1165 case OPCODE_PRINT: 1166 break; 1167 default: 1168 if (brw_wm_is_scalar_result(inst->Opcode)) 1169 emit_scalar_insn(c, inst); 1170 else 1171 emit_insn(c, inst); 1172 break; 1173 } 1174 } 1175 1176 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 1177 printf("pass_fp:\n"); 1178 print_insns( c->prog_instructions, c->nr_fp_insns ); 1179 printf("\n"); 1180 } 1181} 1182 1183