brw_wm_fp.c revision 9d4b98eb9eadecc17cd1cda0074b420a39e74647
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36#include "brw_context.h"
37#include "brw_wm.h"
38#include "brw_util.h"
39
40#include "program/prog_parameter.h"
41#include "program/prog_print.h"
42#include "program/prog_statevars.h"
43
44
45/** An invalid texture target */
46#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48/** An invalid texture unit */
49#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53#define X    0
54#define Y    1
55#define Z    2
56#define W    3
57
58
59static const char *wm_opcode_strings[] = {
60   "PIXELXY",
61   "DELTAXY",
62   "PIXELW",
63   "LINTERP",
64   "PINTERP",
65   "CINTERP",
66   "WPOSXY",
67   "FB_WRITE",
68   "FRONTFACING",
69};
70
71#if 0
72static const char *wm_file_strings[] = {
73   "PAYLOAD"
74};
75#endif
76
77
78/***********************************************************************
79 * Source regs
80 */
81
82static struct prog_src_register src_reg(GLuint file, GLuint idx)
83{
84   struct prog_src_register reg;
85   reg.File = file;
86   reg.Index = idx;
87   reg.Swizzle = SWIZZLE_NOOP;
88   reg.RelAddr = 0;
89   reg.Negate = NEGATE_NONE;
90   reg.Abs = 0;
91   reg.HasIndex2 = 0;
92   reg.RelAddr2 = 0;
93   reg.Index2 = 0;
94   return reg;
95}
96
97static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
98{
99   return src_reg(dst.File, dst.Index);
100}
101
102static struct prog_src_register src_undef( void )
103{
104   return src_reg(PROGRAM_UNDEFINED, 0);
105}
106
107static GLboolean src_is_undef(struct prog_src_register src)
108{
109   return src.File == PROGRAM_UNDEFINED;
110}
111
112static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
113{
114   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
115   return reg;
116}
117
118static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
119{
120   return src_swizzle(reg, x, x, x, x);
121}
122
123static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
124{
125   reg.Swizzle = swizzle;
126   return reg;
127}
128
129
130/***********************************************************************
131 * Dest regs
132 */
133
134static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
135{
136   struct prog_dst_register reg;
137   reg.File = file;
138   reg.Index = idx;
139   reg.WriteMask = WRITEMASK_XYZW;
140   reg.RelAddr = 0;
141   reg.CondMask = COND_TR;
142   reg.CondSwizzle = 0;
143   reg.CondSrc = 0;
144   return reg;
145}
146
147static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
148{
149   reg.WriteMask &= mask;
150   return reg;
151}
152
153static struct prog_dst_register dst_undef( void )
154{
155   return dst_reg(PROGRAM_UNDEFINED, 0);
156}
157
158
159
160static struct prog_dst_register get_temp( struct brw_wm_compile *c )
161{
162   int bit = _mesa_ffs( ~c->fp_temp );
163
164   if (!bit) {
165      printf("%s: out of temporaries\n", __FILE__);
166      exit(1);
167   }
168
169   c->fp_temp |= 1<<(bit-1);
170   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
171}
172
173
174static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
175{
176   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
177}
178
179
180/***********************************************************************
181 * Instructions
182 */
183
184static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
185{
186   assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
187   memset(&c->prog_instructions[c->nr_fp_insns], 0,
188	  sizeof(*c->prog_instructions));
189   return &c->prog_instructions[c->nr_fp_insns++];
190}
191
192static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
193					const struct prog_instruction *inst0)
194{
195   struct prog_instruction *inst = get_fp_inst(c);
196   *inst = *inst0;
197   return inst;
198}
199
200static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
201				       GLuint op,
202				       struct prog_dst_register dest,
203				       GLuint saturate,
204				       GLuint tex_src_unit,
205				       GLuint tex_src_target,
206				       GLuint tex_shadow,
207				       struct prog_src_register src0,
208				       struct prog_src_register src1,
209				       struct prog_src_register src2 )
210{
211   struct prog_instruction *inst = get_fp_inst(c);
212
213   assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
214          tex_src_unit == TEX_UNIT_NONE);
215   assert(tex_src_target < NUM_TEXTURE_TARGETS ||
216          tex_src_target == TEX_TARGET_NONE);
217
218   /* update mask of which texture units are referenced by this program */
219   if (tex_src_unit != TEX_UNIT_NONE)
220      c->fp->tex_units_used |= (1 << tex_src_unit);
221
222   memset(inst, 0, sizeof(*inst));
223
224   inst->Opcode = op;
225   inst->DstReg = dest;
226   inst->SaturateMode = saturate;
227   inst->TexSrcUnit = tex_src_unit;
228   inst->TexSrcTarget = tex_src_target;
229   inst->TexShadow = tex_shadow;
230   inst->SrcReg[0] = src0;
231   inst->SrcReg[1] = src1;
232   inst->SrcReg[2] = src2;
233   return inst;
234}
235
236
237static struct prog_instruction * emit_op(struct brw_wm_compile *c,
238				       GLuint op,
239				       struct prog_dst_register dest,
240				       GLuint saturate,
241				       struct prog_src_register src0,
242				       struct prog_src_register src1,
243				       struct prog_src_register src2 )
244{
245   return emit_tex_op(c, op, dest, saturate,
246                      TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */
247                      src0, src1, src2);
248}
249
250
251/* Many Mesa opcodes produce the same value across all the result channels.
252 * We'd rather not have to support that splatting in the opcode implementations,
253 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
254 * anyway.  We can easily get both by emitting the opcode to one channel, and
255 * then MOVing it to the others, which brw_wm_pass*.c already understands.
256 */
257static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
258						 const struct prog_instruction *inst0)
259{
260   struct prog_instruction *inst;
261   unsigned int dst_chan;
262   unsigned int other_channel_mask;
263
264   if (inst0->DstReg.WriteMask == 0)
265      return NULL;
266
267   dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
268   inst = get_fp_inst(c);
269   *inst = *inst0;
270   inst->DstReg.WriteMask = 1 << dst_chan;
271
272   other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
273   if (other_channel_mask != 0) {
274      inst = emit_op(c,
275		     OPCODE_MOV,
276		     dst_mask(inst0->DstReg, other_channel_mask),
277		     0,
278		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
279		     src_undef(),
280		     src_undef());
281   }
282   return inst;
283}
284
285
286/***********************************************************************
287 * Special instructions for interpolation and other tasks
288 */
289
290static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
291{
292   if (src_is_undef(c->pixel_xy)) {
293      struct prog_dst_register pixel_xy = get_temp(c);
294      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
295
296
297      /* Emit the out calculations, and hold onto the results.  Use
298       * two instructions as a temporary is required.
299       */
300      /* pixel_xy.xy = PIXELXY payload[0];
301       */
302      emit_op(c,
303	      WM_PIXELXY,
304	      dst_mask(pixel_xy, WRITEMASK_XY),
305	      0,
306	      payload_r0_depth,
307	      src_undef(),
308	      src_undef());
309
310      c->pixel_xy = src_reg_from_dst(pixel_xy);
311   }
312
313   return c->pixel_xy;
314}
315
316static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
317{
318   if (src_is_undef(c->delta_xy)) {
319      struct prog_dst_register delta_xy = get_temp(c);
320      struct prog_src_register pixel_xy = get_pixel_xy(c);
321      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
322
323      /* deltas.xy = DELTAXY pixel_xy, payload[0]
324       */
325      emit_op(c,
326	      WM_DELTAXY,
327	      dst_mask(delta_xy, WRITEMASK_XY),
328	      0,
329	      pixel_xy,
330	      payload_r0_depth,
331	      src_undef());
332
333      c->delta_xy = src_reg_from_dst(delta_xy);
334   }
335
336   return c->delta_xy;
337}
338
339static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
340{
341   /* This is called for producing 1/w in pre-gen6 interp.  for gen6,
342    * the interp opcodes don't use this argument.  But to keep the
343    * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
344    * into the slot.
345    */
346   if (c->func.brw->intel.gen >= 6)
347      return c->delta_xy;
348
349   if (src_is_undef(c->pixel_w)) {
350      struct prog_dst_register pixel_w = get_temp(c);
351      struct prog_src_register deltas = get_delta_xy(c);
352      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
353
354      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
355       */
356      emit_op(c,
357	      WM_PIXELW,
358	      dst_mask(pixel_w, WRITEMASK_W),
359	      0,
360	      interp_wpos,
361	      deltas,
362	      src_undef());
363
364
365      c->pixel_w = src_reg_from_dst(pixel_w);
366   }
367
368   return c->pixel_w;
369}
370
371static void emit_interp( struct brw_wm_compile *c,
372			 GLuint idx )
373{
374   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
375   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
376   struct prog_src_register deltas;
377
378   deltas = get_delta_xy(c);
379
380   /* Need to use PINTERP on attributes which have been
381    * multiplied by 1/W in the SF program, and LINTERP on those
382    * which have not:
383    */
384   switch (idx) {
385   case FRAG_ATTRIB_WPOS:
386      /* Have to treat wpos.xy specially:
387       */
388      emit_op(c,
389	      WM_WPOSXY,
390	      dst_mask(dst, WRITEMASK_XY),
391	      0,
392	      get_pixel_xy(c),
393	      src_undef(),
394	      src_undef());
395
396      dst = dst_mask(dst, WRITEMASK_ZW);
397
398      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
399       */
400      emit_op(c,
401	      WM_LINTERP,
402	      dst,
403	      0,
404	      interp,
405	      deltas,
406	      src_undef());
407      break;
408   case FRAG_ATTRIB_COL0:
409   case FRAG_ATTRIB_COL1:
410      if (c->key.flat_shade) {
411	 emit_op(c,
412		 WM_CINTERP,
413		 dst,
414		 0,
415		 interp,
416		 src_undef(),
417		 src_undef());
418      }
419      else {
420	 /* perspective-corrected color interpolation */
421	 emit_op(c,
422		 WM_PINTERP,
423		 dst,
424		 0,
425		 interp,
426		 deltas,
427		 get_pixel_w(c));
428      }
429      break;
430   case FRAG_ATTRIB_FOGC:
431      /* Interpolate the fog coordinate */
432      emit_op(c,
433	      WM_PINTERP,
434	      dst_mask(dst, WRITEMASK_X),
435	      0,
436	      interp,
437	      deltas,
438	      get_pixel_w(c));
439
440      emit_op(c,
441	      OPCODE_MOV,
442	      dst_mask(dst, WRITEMASK_YZW),
443	      0,
444	      src_swizzle(interp,
445			  SWIZZLE_ZERO,
446			  SWIZZLE_ZERO,
447			  SWIZZLE_ZERO,
448			  SWIZZLE_ONE),
449	      src_undef(),
450	      src_undef());
451      break;
452
453   case FRAG_ATTRIB_FACE:
454      emit_op(c,
455              WM_FRONTFACING,
456              dst_mask(dst, WRITEMASK_X),
457              0,
458              src_undef(),
459              src_undef(),
460              src_undef());
461      break;
462
463   case FRAG_ATTRIB_PNTC:
464      /* XXX review/test this case */
465      emit_op(c,
466	      WM_PINTERP,
467	      dst_mask(dst, WRITEMASK_XY),
468	      0,
469	      interp,
470	      deltas,
471	      get_pixel_w(c));
472
473      emit_op(c,
474	      OPCODE_MOV,
475	      dst_mask(dst, WRITEMASK_ZW),
476	      0,
477	      src_swizzle(interp,
478			  SWIZZLE_ZERO,
479			  SWIZZLE_ZERO,
480			  SWIZZLE_ZERO,
481			  SWIZZLE_ONE),
482	      src_undef(),
483	      src_undef());
484      break;
485
486   default:
487      emit_op(c,
488	      WM_PINTERP,
489	      dst,
490	      0,
491	      interp,
492	      deltas,
493	      get_pixel_w(c));
494      break;
495   }
496
497   c->fp_interp_emitted |= 1<<idx;
498}
499
500/***********************************************************************
501 * Hacks to extend the program parameter and constant lists.
502 */
503
504/* Add the fog parameters to the parameter list of the original
505 * program, rather than creating a new list.  Doesn't really do any
506 * harm and it's not as if the parameter handling isn't a big hack
507 * anyway.
508 */
509static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
510                                                     GLint s0,
511                                                     GLint s1,
512                                                     GLint s2,
513                                                     GLint s3,
514                                                     GLint s4)
515{
516   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
517   gl_state_index tokens[STATE_LENGTH];
518   GLuint idx;
519   tokens[0] = s0;
520   tokens[1] = s1;
521   tokens[2] = s2;
522   tokens[3] = s3;
523   tokens[4] = s4;
524
525   idx = _mesa_add_state_reference( paramList, tokens );
526
527   return src_reg(PROGRAM_STATE_VAR, idx);
528}
529
530
531static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
532						     GLfloat s0,
533						     GLfloat s1,
534						     GLfloat s2,
535						     GLfloat s3)
536{
537   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
538   gl_constant_value values[4];
539   GLuint idx;
540   GLuint swizzle;
541   struct prog_src_register reg;
542
543   values[0].f = s0;
544   values[1].f = s1;
545   values[2].f = s2;
546   values[3].f = s3;
547
548   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
549   reg = src_reg(PROGRAM_STATE_VAR, idx);
550   reg.Swizzle = swizzle;
551
552   return reg;
553}
554
555
556
557/***********************************************************************
558 * Expand various instructions here to simpler forms.
559 */
560static void precalc_dst( struct brw_wm_compile *c,
561			       const struct prog_instruction *inst )
562{
563   struct prog_src_register src0 = inst->SrcReg[0];
564   struct prog_src_register src1 = inst->SrcReg[1];
565   struct prog_dst_register dst = inst->DstReg;
566   struct prog_dst_register temp = get_temp(c);
567
568   if (dst.WriteMask & WRITEMASK_Y) {
569      /* dst.y = mul src0.y, src1.y
570       */
571      emit_op(c,
572	      OPCODE_MUL,
573	      dst_mask(temp, WRITEMASK_Y),
574	      inst->SaturateMode,
575	      src0,
576	      src1,
577	      src_undef());
578   }
579
580   if (dst.WriteMask & WRITEMASK_XZ) {
581      struct prog_instruction *swz;
582      GLuint z = GET_SWZ(src0.Swizzle, Z);
583
584      /* dst.xz = swz src0.1zzz
585       */
586      swz = emit_op(c,
587		    OPCODE_SWZ,
588		    dst_mask(temp, WRITEMASK_XZ),
589		    inst->SaturateMode,
590		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
591		    src_undef(),
592		    src_undef());
593      /* Avoid letting negation flag of src0 affect our 1 constant. */
594      swz->SrcReg[0].Negate &= ~NEGATE_X;
595   }
596   if (dst.WriteMask & WRITEMASK_W) {
597      /* dst.w = mov src1.w
598       */
599      emit_op(c,
600	      OPCODE_MOV,
601	      dst_mask(temp, WRITEMASK_W),
602	      inst->SaturateMode,
603	      src1,
604	      src_undef(),
605	      src_undef());
606   }
607
608   /* This will get optimized out in general, but it ensures that we
609    * don't overwrite src operands in our channel-wise splitting
610    * above.  See piglit fp-dst-aliasing-[12].
611    */
612   emit_op(c,
613	   OPCODE_MOV,
614	   dst,
615	   0,
616	   src_reg_from_dst(temp),
617	   src_undef(),
618	   src_undef());
619
620   release_temp(c, temp);
621}
622
623
624static void precalc_lit( struct brw_wm_compile *c,
625			 const struct prog_instruction *inst )
626{
627   struct prog_src_register src0 = inst->SrcReg[0];
628   struct prog_dst_register dst = inst->DstReg;
629
630   if (dst.WriteMask & WRITEMASK_YZ) {
631      emit_op(c,
632	      OPCODE_LIT,
633	      dst_mask(dst, WRITEMASK_YZ),
634	      inst->SaturateMode,
635	      src0,
636	      src_undef(),
637	      src_undef());
638   }
639
640   if (dst.WriteMask & WRITEMASK_XW) {
641      struct prog_instruction *swz;
642
643      /* dst.xw = swz src0.1111
644       */
645      swz = emit_op(c,
646		    OPCODE_SWZ,
647		    dst_mask(dst, WRITEMASK_XW),
648		    0,
649		    src_swizzle1(src0, SWIZZLE_ONE),
650		    src_undef(),
651		    src_undef());
652      /* Avoid letting the negation flag of src0 affect our 1 constant. */
653      swz->SrcReg[0].Negate = NEGATE_NONE;
654   }
655}
656
657
658/**
659 * Some TEX instructions require extra code, cube map coordinate
660 * normalization, or coordinate scaling for RECT textures, etc.
661 * This function emits those extra instructions and the TEX
662 * instruction itself.
663 */
664static void precalc_tex( struct brw_wm_compile *c,
665			 const struct prog_instruction *inst )
666{
667   struct brw_compile *p = &c->func;
668   struct intel_context *intel = &p->brw->intel;
669   struct prog_src_register coord;
670   struct prog_dst_register tmpcoord = { 0 };
671   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
672
673   assert(unit < BRW_MAX_TEX_UNIT);
674
675   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
676       struct prog_instruction *out;
677       struct prog_dst_register tmp0 = get_temp(c);
678       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
679       struct prog_dst_register tmp1 = get_temp(c);
680       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
681       struct prog_src_register src0 = inst->SrcReg[0];
682
683       /* find longest component of coord vector and normalize it */
684       tmpcoord = get_temp(c);
685       coord = src_reg_from_dst(tmpcoord);
686
687       /* tmpcoord = src0 (i.e.: coord = src0) */
688       out = emit_op(c, OPCODE_MOV,
689                     tmpcoord,
690                     0,
691                     src0,
692                     src_undef(),
693                     src_undef());
694       out->SrcReg[0].Negate = NEGATE_NONE;
695       out->SrcReg[0].Abs = 1;
696
697       /* tmp0 = MAX(coord.X, coord.Y) */
698       emit_op(c, OPCODE_MAX,
699               tmp0,
700               0,
701               src_swizzle1(coord, X),
702               src_swizzle1(coord, Y),
703               src_undef());
704
705       /* tmp1 = MAX(tmp0, coord.Z) */
706       emit_op(c, OPCODE_MAX,
707               tmp1,
708               0,
709               tmp0src,
710               src_swizzle1(coord, Z),
711               src_undef());
712
713       /* tmp0 = 1 / tmp1 */
714       emit_op(c, OPCODE_RCP,
715               dst_mask(tmp0, WRITEMASK_X),
716               0,
717               tmp1src,
718               src_undef(),
719               src_undef());
720
721       /* tmpCoord = src0 * tmp0 */
722       emit_op(c, OPCODE_MUL,
723               tmpcoord,
724               0,
725               src0,
726               src_swizzle1(tmp0src, SWIZZLE_X),
727               src_undef());
728
729       release_temp(c, tmp0);
730       release_temp(c, tmp1);
731   }
732   else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
733      struct prog_src_register scale =
734	 search_or_add_param5( c,
735			       STATE_INTERNAL,
736			       STATE_TEXRECT_SCALE,
737			       unit,
738			       0,0 );
739
740      tmpcoord = get_temp(c);
741
742      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
743       */
744      emit_op(c,
745	      OPCODE_MUL,
746	      tmpcoord,
747	      0,
748	      inst->SrcReg[0],
749	      src_swizzle(scale,
750			  SWIZZLE_X,
751			  SWIZZLE_Y,
752			  SWIZZLE_ONE,
753			  SWIZZLE_ONE),
754	      src_undef());
755
756      coord = src_reg_from_dst(tmpcoord);
757   }
758   else {
759      coord = inst->SrcReg[0];
760   }
761
762   /* Need to emit YUV texture conversions by hand.  Probably need to
763    * do this here - the alternative is in brw_wm_emit.c, but the
764    * conversion requires allocating a temporary variable which we
765    * don't have the facility to do that late in the compilation.
766    */
767   if (c->key.yuvtex_mask & (1 << unit)) {
768      /* convert ycbcr to RGBA */
769      GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
770
771      /*
772	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
773	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
774	 UYV     = TEX ...
775	 UYV.xyz = ADD UYV,     C0
776	 UYV.y   = MUL UYV.y,   C0.w
777 	 if (UV swaped)
778	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
779	 else
780	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y
781	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
782      */
783      struct prog_dst_register dst = inst->DstReg;
784      struct prog_dst_register tmp = get_temp(c);
785      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
786      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
787      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
788
789      /* tmp     = TEX ...
790       */
791      emit_tex_op(c,
792                  OPCODE_TEX,
793                  tmp,
794                  inst->SaturateMode,
795                  unit,
796                  inst->TexSrcTarget,
797                  inst->TexShadow,
798                  coord,
799                  src_undef(),
800                  src_undef());
801
802      /* tmp.xyz =  ADD TMP, C0
803       */
804      emit_op(c,
805	      OPCODE_ADD,
806	      dst_mask(tmp, WRITEMASK_XYZ),
807	      0,
808	      tmpsrc,
809	      C0,
810	      src_undef());
811
812      /* YUV.y   = MUL YUV.y, C0.w
813       */
814
815      emit_op(c,
816	      OPCODE_MUL,
817	      dst_mask(tmp, WRITEMASK_Y),
818	      0,
819	      tmpsrc,
820	      src_swizzle1(C0, W),
821	      src_undef());
822
823      /*
824       * if (UV swaped)
825       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
826       * else
827       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
828       */
829
830      emit_op(c,
831	      OPCODE_MAD,
832	      dst_mask(dst, WRITEMASK_XYZ),
833	      0,
834	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
835	      C1,
836	      src_swizzle1(tmpsrc, Y));
837
838      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
839       */
840      emit_op(c,
841	      OPCODE_MAD,
842	      dst_mask(dst, WRITEMASK_Y),
843	      0,
844	      src_swizzle1(tmpsrc, Z),
845	      src_swizzle1(C1, W),
846	      src_swizzle1(src_reg_from_dst(dst), Y));
847
848      release_temp(c, tmp);
849   }
850   else {
851      /* ordinary RGBA tex instruction */
852      emit_tex_op(c,
853                  OPCODE_TEX,
854                  inst->DstReg,
855                  inst->SaturateMode,
856                  unit,
857                  inst->TexSrcTarget,
858                  inst->TexShadow,
859                  coord,
860                  src_undef(),
861                  src_undef());
862   }
863
864   /* For GL_EXT_texture_swizzle: */
865   if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
866      /* swizzle the result of the TEX instruction */
867      struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
868      emit_op(c, OPCODE_SWZ,
869              inst->DstReg,
870              SATURATE_OFF, /* saturate already done above */
871              src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
872              src_undef(),
873              src_undef());
874   }
875
876   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
877       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
878      release_temp(c, tmpcoord);
879}
880
881
882/**
883 * Check if the given TXP instruction really needs the divide-by-W step.
884 */
885static GLboolean projtex( struct brw_wm_compile *c,
886			  const struct prog_instruction *inst )
887{
888   const struct prog_src_register src = inst->SrcReg[0];
889   GLboolean retVal;
890
891   assert(inst->Opcode == OPCODE_TXP);
892
893   /* Only try to detect the simplest cases.  Could detect (later)
894    * cases where we are trying to emit code like RCP {1.0}, MUL x,
895    * {1.0}, and so on.
896    *
897    * More complex cases than this typically only arise from
898    * user-provided fragment programs anyway:
899    */
900   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
901      retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
902   else if (src.File == PROGRAM_INPUT &&
903	    GET_SWZ(src.Swizzle, W) == W &&
904            (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
905      retVal = GL_FALSE;
906   else
907      retVal = GL_TRUE;
908
909   return retVal;
910}
911
912
913/**
914 * Emit code for TXP.
915 */
916static void precalc_txp( struct brw_wm_compile *c,
917			       const struct prog_instruction *inst )
918{
919   struct prog_src_register src0 = inst->SrcReg[0];
920
921   if (projtex(c, inst)) {
922      struct prog_dst_register tmp = get_temp(c);
923      struct prog_instruction tmp_inst;
924
925      /* tmp0.w = RCP inst.arg[0][3]
926       */
927      emit_op(c,
928	      OPCODE_RCP,
929	      dst_mask(tmp, WRITEMASK_W),
930	      0,
931	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
932	      src_undef(),
933	      src_undef());
934
935      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
936       */
937      emit_op(c,
938	      OPCODE_MUL,
939	      dst_mask(tmp, WRITEMASK_XYZ),
940	      0,
941	      src0,
942	      src_swizzle1(src_reg_from_dst(tmp), W),
943	      src_undef());
944
945      /* dst = precalc(TEX tmp0)
946       */
947      tmp_inst = *inst;
948      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
949      precalc_tex(c, &tmp_inst);
950
951      release_temp(c, tmp);
952   }
953   else
954   {
955      /* dst = precalc(TEX src0)
956       */
957      precalc_tex(c, inst);
958   }
959}
960
961
962
963static void emit_render_target_writes( struct brw_wm_compile *c )
964{
965   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
966   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
967   struct prog_src_register outcolor;
968   GLuint i;
969
970   struct prog_instruction *inst = NULL;
971
972   /* The inst->Aux field is used for FB write target and the EOT marker */
973
974   for (i = 0; i < c->key.nr_color_regions; i++) {
975      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) {
976	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
977      } else {
978	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
979      }
980      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
981		     0, outcolor, payload_r0_depth, outdepth);
982      inst->Aux = INST_AUX_TARGET(i);
983   }
984
985   /* Mark the last FB write as final, or emit a dummy write if we had
986    * no render targets bound.
987    */
988   if (c->key.nr_color_regions != 0) {
989      inst->Aux |= INST_AUX_EOT;
990   } else {
991      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
992		     0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR),
993		     payload_r0_depth, outdepth);
994      inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT;
995   }
996}
997
998
999
1000
1001/***********************************************************************
1002 * Emit INTERP instructions ahead of first use of each attrib.
1003 */
1004
1005static void validate_src_regs( struct brw_wm_compile *c,
1006			       const struct prog_instruction *inst )
1007{
1008   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1009   GLuint i;
1010
1011   for (i = 0; i < nr_args; i++) {
1012      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1013	 GLuint idx = inst->SrcReg[i].Index;
1014	 if (!(c->fp_interp_emitted & (1<<idx))) {
1015	    emit_interp(c, idx);
1016	 }
1017      }
1018   }
1019}
1020
1021static void print_insns( const struct prog_instruction *insn,
1022			 GLuint nr )
1023{
1024   GLuint i;
1025   for (i = 0; i < nr; i++, insn++) {
1026      printf("%3d: ", i);
1027      if (insn->Opcode < MAX_OPCODE)
1028	 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
1029      else if (insn->Opcode < MAX_WM_OPCODE) {
1030	 GLuint idx = insn->Opcode - MAX_OPCODE;
1031
1032	 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
1033				      3, PROG_PRINT_DEBUG, NULL);
1034      }
1035      else
1036	 printf("965 Opcode %d\n", insn->Opcode);
1037   }
1038}
1039
1040
1041/**
1042 * Initial pass for fragment program code generation.
1043 * This function is used by both the GLSL and non-GLSL paths.
1044 */
1045void brw_wm_pass_fp( struct brw_wm_compile *c )
1046{
1047   struct intel_context *intel = &c->func.brw->intel;
1048   struct brw_fragment_program *fp = c->fp;
1049   GLuint insn;
1050
1051   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1052      printf("pre-fp:\n");
1053      _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
1054			       GL_TRUE);
1055      printf("\n");
1056   }
1057
1058   c->pixel_xy = src_undef();
1059   if (intel->gen >= 6) {
1060      /* The interpolation deltas come in as the perspective pixel
1061       * location barycentric params.
1062       */
1063      c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
1064   } else {
1065      c->delta_xy = src_undef();
1066   }
1067   c->pixel_w = src_undef();
1068   c->nr_fp_insns = 0;
1069   c->fp->tex_units_used = 0x0;
1070
1071   /* Emit preamble instructions.  This is where special instructions such as
1072    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1073    * compute shader inputs from varying vars.
1074    */
1075   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1076      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1077      validate_src_regs(c, inst);
1078   }
1079
1080   /* Loop over all instructions doing assorted simplifications and
1081    * transformations.
1082    */
1083   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1084      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1085      struct prog_instruction *out;
1086
1087      /* Check for INPUT values, emit INTERP instructions where
1088       * necessary:
1089       */
1090
1091      switch (inst->Opcode) {
1092      case OPCODE_SWZ:
1093	 out = emit_insn(c, inst);
1094	 out->Opcode = OPCODE_MOV;
1095	 break;
1096
1097      case OPCODE_ABS:
1098	 out = emit_insn(c, inst);
1099	 out->Opcode = OPCODE_MOV;
1100	 out->SrcReg[0].Negate = NEGATE_NONE;
1101	 out->SrcReg[0].Abs = 1;
1102	 break;
1103
1104      case OPCODE_SUB:
1105	 out = emit_insn(c, inst);
1106	 out->Opcode = OPCODE_ADD;
1107	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1108	 break;
1109
1110      case OPCODE_SCS:
1111	 out = emit_insn(c, inst);
1112	 /* This should probably be done in the parser.
1113	  */
1114	 out->DstReg.WriteMask &= WRITEMASK_XY;
1115	 break;
1116
1117      case OPCODE_DST:
1118	 precalc_dst(c, inst);
1119	 break;
1120
1121      case OPCODE_LIT:
1122	 precalc_lit(c, inst);
1123	 break;
1124
1125      case OPCODE_RSQ:
1126	 out = emit_scalar_insn(c, inst);
1127	 out->SrcReg[0].Abs = GL_TRUE;
1128	 break;
1129
1130      case OPCODE_TEX:
1131	 precalc_tex(c, inst);
1132	 break;
1133
1134      case OPCODE_TXP:
1135	 precalc_txp(c, inst);
1136	 break;
1137
1138      case OPCODE_TXB:
1139	 out = emit_insn(c, inst);
1140	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1141         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1142	 break;
1143
1144      case OPCODE_XPD:
1145	 out = emit_insn(c, inst);
1146	 /* This should probably be done in the parser.
1147	  */
1148	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1149	 break;
1150
1151      case OPCODE_KIL:
1152	 out = emit_insn(c, inst);
1153	 /* This should probably be done in the parser.
1154	  */
1155	 out->DstReg.WriteMask = 0;
1156	 break;
1157      case OPCODE_END:
1158	 emit_render_target_writes(c);
1159	 break;
1160      case OPCODE_PRINT:
1161	 break;
1162      default:
1163	 if (brw_wm_is_scalar_result(inst->Opcode))
1164	    emit_scalar_insn(c, inst);
1165	 else
1166	    emit_insn(c, inst);
1167	 break;
1168      }
1169   }
1170
1171   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1172      printf("pass_fp:\n");
1173      print_insns( c->prog_instructions, c->nr_fp_insns );
1174      printf("\n");
1175   }
1176}
1177
1178