1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36#include "brw_context.h"
37#include "brw_wm.h"
38#include "brw_util.h"
39
40#include "program/prog_parameter.h"
41#include "program/prog_print.h"
42#include "program/prog_statevars.h"
43
44
45/** An invalid texture target */
46#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48/** An invalid texture unit */
49#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53#define X    0
54#define Y    1
55#define Z    2
56#define W    3
57
58
59static const char *wm_opcode_strings[] = {
60   "PIXELXY",
61   "DELTAXY",
62   "PIXELW",
63   "LINTERP",
64   "PINTERP",
65   "CINTERP",
66   "WPOSXY",
67   "FB_WRITE",
68   "FRONTFACING",
69};
70
71#if 0
72static const char *wm_file_strings[] = {
73   "PAYLOAD"
74};
75#endif
76
77
78/***********************************************************************
79 * Source regs
80 */
81
82static struct prog_src_register src_reg(GLuint file, GLuint idx)
83{
84   struct prog_src_register reg;
85   reg.File = file;
86   reg.Index = idx;
87   reg.Swizzle = SWIZZLE_NOOP;
88   reg.RelAddr = 0;
89   reg.Negate = NEGATE_NONE;
90   reg.Abs = 0;
91   reg.HasIndex2 = 0;
92   reg.RelAddr2 = 0;
93   reg.Index2 = 0;
94   return reg;
95}
96
97static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
98{
99   return src_reg(dst.File, dst.Index);
100}
101
102static struct prog_src_register src_undef( void )
103{
104   return src_reg(PROGRAM_UNDEFINED, 0);
105}
106
107static bool src_is_undef(struct prog_src_register src)
108{
109   return src.File == PROGRAM_UNDEFINED;
110}
111
112static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
113{
114   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
115   return reg;
116}
117
118static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
119{
120   return src_swizzle(reg, x, x, x, x);
121}
122
123static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
124{
125   reg.Swizzle = swizzle;
126   return reg;
127}
128
129
130/***********************************************************************
131 * Dest regs
132 */
133
134static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
135{
136   struct prog_dst_register reg;
137   reg.File = file;
138   reg.Index = idx;
139   reg.WriteMask = WRITEMASK_XYZW;
140   reg.RelAddr = 0;
141   reg.CondMask = COND_TR;
142   reg.CondSwizzle = 0;
143   reg.CondSrc = 0;
144   return reg;
145}
146
147static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
148{
149   reg.WriteMask &= mask;
150   return reg;
151}
152
153static struct prog_dst_register dst_undef( void )
154{
155   return dst_reg(PROGRAM_UNDEFINED, 0);
156}
157
158
159
160static struct prog_dst_register get_temp( struct brw_wm_compile *c )
161{
162   int bit = ffs( ~c->fp_temp );
163
164   if (!bit) {
165      printf("%s: out of temporaries\n", __FILE__);
166      exit(1);
167   }
168
169   c->fp_temp |= 1<<(bit-1);
170   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
171}
172
173
174static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
175{
176   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
177}
178
179
180/***********************************************************************
181 * Instructions
182 */
183
184static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
185{
186   assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
187   memset(&c->prog_instructions[c->nr_fp_insns], 0,
188	  sizeof(*c->prog_instructions));
189   return &c->prog_instructions[c->nr_fp_insns++];
190}
191
192static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
193					const struct prog_instruction *inst0)
194{
195   struct prog_instruction *inst = get_fp_inst(c);
196   *inst = *inst0;
197   return inst;
198}
199
200static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
201				       GLuint op,
202				       struct prog_dst_register dest,
203				       GLuint saturate,
204				       GLuint tex_src_unit,
205				       GLuint tex_src_target,
206				       GLuint tex_shadow,
207				       struct prog_src_register src0,
208				       struct prog_src_register src1,
209				       struct prog_src_register src2 )
210{
211   struct prog_instruction *inst = get_fp_inst(c);
212
213   assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
214          tex_src_unit == TEX_UNIT_NONE);
215   assert(tex_src_target < NUM_TEXTURE_TARGETS ||
216          tex_src_target == TEX_TARGET_NONE);
217
218   memset(inst, 0, sizeof(*inst));
219
220   inst->Opcode = op;
221   inst->DstReg = dest;
222   inst->SaturateMode = saturate;
223   inst->TexSrcUnit = tex_src_unit;
224   inst->TexSrcTarget = tex_src_target;
225   inst->TexShadow = tex_shadow;
226   inst->SrcReg[0] = src0;
227   inst->SrcReg[1] = src1;
228   inst->SrcReg[2] = src2;
229   return inst;
230}
231
232
233static struct prog_instruction * emit_op(struct brw_wm_compile *c,
234				       GLuint op,
235				       struct prog_dst_register dest,
236				       GLuint saturate,
237				       struct prog_src_register src0,
238				       struct prog_src_register src1,
239				       struct prog_src_register src2 )
240{
241   return emit_tex_op(c, op, dest, saturate,
242                      TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */
243                      src0, src1, src2);
244}
245
246
247/* Many Mesa opcodes produce the same value across all the result channels.
248 * We'd rather not have to support that splatting in the opcode implementations,
249 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
250 * anyway.  We can easily get both by emitting the opcode to one channel, and
251 * then MOVing it to the others, which brw_wm_pass*.c already understands.
252 */
253static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
254						 const struct prog_instruction *inst0)
255{
256   struct prog_instruction *inst;
257   unsigned int dst_chan;
258   unsigned int other_channel_mask;
259
260   if (inst0->DstReg.WriteMask == 0)
261      return NULL;
262
263   dst_chan = ffs(inst0->DstReg.WriteMask) - 1;
264   inst = get_fp_inst(c);
265   *inst = *inst0;
266   inst->DstReg.WriteMask = 1 << dst_chan;
267
268   other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
269   if (other_channel_mask != 0) {
270      inst = emit_op(c,
271		     OPCODE_MOV,
272		     dst_mask(inst0->DstReg, other_channel_mask),
273		     0,
274		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
275		     src_undef(),
276		     src_undef());
277   }
278   return inst;
279}
280
281
282/***********************************************************************
283 * Special instructions for interpolation and other tasks
284 */
285
286static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
287{
288   if (src_is_undef(c->pixel_xy)) {
289      struct prog_dst_register pixel_xy = get_temp(c);
290      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
291
292
293      /* Emit the out calculations, and hold onto the results.  Use
294       * two instructions as a temporary is required.
295       */
296      /* pixel_xy.xy = PIXELXY payload[0];
297       */
298      emit_op(c,
299	      WM_PIXELXY,
300	      dst_mask(pixel_xy, WRITEMASK_XY),
301	      0,
302	      payload_r0_depth,
303	      src_undef(),
304	      src_undef());
305
306      c->pixel_xy = src_reg_from_dst(pixel_xy);
307   }
308
309   return c->pixel_xy;
310}
311
312static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
313{
314   if (src_is_undef(c->delta_xy)) {
315      struct prog_dst_register delta_xy = get_temp(c);
316      struct prog_src_register pixel_xy = get_pixel_xy(c);
317      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
318
319      /* deltas.xy = DELTAXY pixel_xy, payload[0]
320       */
321      emit_op(c,
322	      WM_DELTAXY,
323	      dst_mask(delta_xy, WRITEMASK_XY),
324	      0,
325	      pixel_xy,
326	      payload_r0_depth,
327	      src_undef());
328
329      c->delta_xy = src_reg_from_dst(delta_xy);
330   }
331
332   return c->delta_xy;
333}
334
335static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
336{
337   /* This is called for producing 1/w in pre-gen6 interp.  for gen6,
338    * the interp opcodes don't use this argument.  But to keep the
339    * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
340    * into the slot.
341    */
342   if (c->func.brw->intel.gen >= 6)
343      return c->delta_xy;
344
345   if (src_is_undef(c->pixel_w)) {
346      struct prog_dst_register pixel_w = get_temp(c);
347      struct prog_src_register deltas = get_delta_xy(c);
348      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
349
350      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
351       */
352      emit_op(c,
353	      WM_PIXELW,
354	      dst_mask(pixel_w, WRITEMASK_W),
355	      0,
356	      interp_wpos,
357	      deltas,
358	      src_undef());
359
360
361      c->pixel_w = src_reg_from_dst(pixel_w);
362   }
363
364   return c->pixel_w;
365}
366
367static void emit_interp( struct brw_wm_compile *c,
368			 GLuint idx )
369{
370   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
371   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
372   struct prog_src_register deltas;
373
374   deltas = get_delta_xy(c);
375
376   /* Need to use PINTERP on attributes which have been
377    * multiplied by 1/W in the SF program, and LINTERP on those
378    * which have not:
379    */
380   switch (idx) {
381   case FRAG_ATTRIB_WPOS:
382      /* Have to treat wpos.xy specially:
383       */
384      emit_op(c,
385	      WM_WPOSXY,
386	      dst_mask(dst, WRITEMASK_XY),
387	      0,
388	      get_pixel_xy(c),
389	      src_undef(),
390	      src_undef());
391
392      dst = dst_mask(dst, WRITEMASK_ZW);
393
394      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
395       */
396      emit_op(c,
397	      WM_LINTERP,
398	      dst,
399	      0,
400	      interp,
401	      deltas,
402	      src_undef());
403      break;
404   case FRAG_ATTRIB_COL0:
405   case FRAG_ATTRIB_COL1:
406      if (c->key.flat_shade) {
407	 emit_op(c,
408		 WM_CINTERP,
409		 dst,
410		 0,
411		 interp,
412		 src_undef(),
413		 src_undef());
414      }
415      else {
416	 /* perspective-corrected color interpolation */
417	 emit_op(c,
418		 WM_PINTERP,
419		 dst,
420		 0,
421		 interp,
422		 deltas,
423		 get_pixel_w(c));
424      }
425      break;
426   case FRAG_ATTRIB_FOGC:
427      /* Interpolate the fog coordinate */
428      emit_op(c,
429	      WM_PINTERP,
430	      dst_mask(dst, WRITEMASK_X),
431	      0,
432	      interp,
433	      deltas,
434	      get_pixel_w(c));
435
436      emit_op(c,
437	      OPCODE_MOV,
438	      dst_mask(dst, WRITEMASK_YZW),
439	      0,
440	      src_swizzle(interp,
441			  SWIZZLE_ZERO,
442			  SWIZZLE_ZERO,
443			  SWIZZLE_ZERO,
444			  SWIZZLE_ONE),
445	      src_undef(),
446	      src_undef());
447      break;
448
449   case FRAG_ATTRIB_FACE:
450      emit_op(c,
451              WM_FRONTFACING,
452              dst_mask(dst, WRITEMASK_X),
453              0,
454              src_undef(),
455              src_undef(),
456              src_undef());
457      break;
458
459   case FRAG_ATTRIB_PNTC:
460      /* XXX review/test this case */
461      emit_op(c,
462	      WM_PINTERP,
463	      dst_mask(dst, WRITEMASK_XY),
464	      0,
465	      interp,
466	      deltas,
467	      get_pixel_w(c));
468
469      emit_op(c,
470	      OPCODE_MOV,
471	      dst_mask(dst, WRITEMASK_ZW),
472	      0,
473	      src_swizzle(interp,
474			  SWIZZLE_ZERO,
475			  SWIZZLE_ZERO,
476			  SWIZZLE_ZERO,
477			  SWIZZLE_ONE),
478	      src_undef(),
479	      src_undef());
480      break;
481
482   default:
483      emit_op(c,
484	      WM_PINTERP,
485	      dst,
486	      0,
487	      interp,
488	      deltas,
489	      get_pixel_w(c));
490      break;
491   }
492
493   c->fp_interp_emitted |= 1<<idx;
494}
495
496/***********************************************************************
497 * Hacks to extend the program parameter and constant lists.
498 */
499
500/* Add the fog parameters to the parameter list of the original
501 * program, rather than creating a new list.  Doesn't really do any
502 * harm and it's not as if the parameter handling isn't a big hack
503 * anyway.
504 */
505static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
506                                                     GLint s0,
507                                                     GLint s1,
508                                                     GLint s2,
509                                                     GLint s3,
510                                                     GLint s4)
511{
512   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
513   gl_state_index tokens[STATE_LENGTH];
514   GLuint idx;
515   tokens[0] = s0;
516   tokens[1] = s1;
517   tokens[2] = s2;
518   tokens[3] = s3;
519   tokens[4] = s4;
520
521   idx = _mesa_add_state_reference( paramList, tokens );
522
523   return src_reg(PROGRAM_STATE_VAR, idx);
524}
525
526
527static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
528						     GLfloat s0,
529						     GLfloat s1,
530						     GLfloat s2,
531						     GLfloat s3)
532{
533   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
534   gl_constant_value values[4];
535   GLuint idx;
536   GLuint swizzle;
537   struct prog_src_register reg;
538
539   values[0].f = s0;
540   values[1].f = s1;
541   values[2].f = s2;
542   values[3].f = s3;
543
544   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
545   reg = src_reg(PROGRAM_STATE_VAR, idx);
546   reg.Swizzle = swizzle;
547
548   return reg;
549}
550
551
552
553/***********************************************************************
554 * Expand various instructions here to simpler forms.
555 */
556static void precalc_dst( struct brw_wm_compile *c,
557			       const struct prog_instruction *inst )
558{
559   struct prog_src_register src0 = inst->SrcReg[0];
560   struct prog_src_register src1 = inst->SrcReg[1];
561   struct prog_dst_register dst = inst->DstReg;
562   struct prog_dst_register temp = get_temp(c);
563
564   if (dst.WriteMask & WRITEMASK_Y) {
565      /* dst.y = mul src0.y, src1.y
566       */
567      emit_op(c,
568	      OPCODE_MUL,
569	      dst_mask(temp, WRITEMASK_Y),
570	      inst->SaturateMode,
571	      src0,
572	      src1,
573	      src_undef());
574   }
575
576   if (dst.WriteMask & WRITEMASK_XZ) {
577      struct prog_instruction *swz;
578      GLuint z = GET_SWZ(src0.Swizzle, Z);
579
580      /* dst.xz = swz src0.1zzz
581       */
582      swz = emit_op(c,
583		    OPCODE_SWZ,
584		    dst_mask(temp, WRITEMASK_XZ),
585		    inst->SaturateMode,
586		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
587		    src_undef(),
588		    src_undef());
589      /* Avoid letting negation flag of src0 affect our 1 constant. */
590      swz->SrcReg[0].Negate &= ~NEGATE_X;
591   }
592   if (dst.WriteMask & WRITEMASK_W) {
593      /* dst.w = mov src1.w
594       */
595      emit_op(c,
596	      OPCODE_MOV,
597	      dst_mask(temp, WRITEMASK_W),
598	      inst->SaturateMode,
599	      src1,
600	      src_undef(),
601	      src_undef());
602   }
603
604   /* This will get optimized out in general, but it ensures that we
605    * don't overwrite src operands in our channel-wise splitting
606    * above.  See piglit fp-dst-aliasing-[12].
607    */
608   emit_op(c,
609	   OPCODE_MOV,
610	   dst,
611	   0,
612	   src_reg_from_dst(temp),
613	   src_undef(),
614	   src_undef());
615
616   release_temp(c, temp);
617}
618
619
620static void precalc_lit( struct brw_wm_compile *c,
621			 const struct prog_instruction *inst )
622{
623   struct prog_src_register src0 = inst->SrcReg[0];
624   struct prog_dst_register dst = inst->DstReg;
625
626   if (dst.WriteMask & WRITEMASK_YZ) {
627      emit_op(c,
628	      OPCODE_LIT,
629	      dst_mask(dst, WRITEMASK_YZ),
630	      inst->SaturateMode,
631	      src0,
632	      src_undef(),
633	      src_undef());
634   }
635
636   if (dst.WriteMask & WRITEMASK_XW) {
637      struct prog_instruction *swz;
638
639      /* dst.xw = swz src0.1111
640       */
641      swz = emit_op(c,
642		    OPCODE_SWZ,
643		    dst_mask(dst, WRITEMASK_XW),
644		    0,
645		    src_swizzle1(src0, SWIZZLE_ONE),
646		    src_undef(),
647		    src_undef());
648      /* Avoid letting the negation flag of src0 affect our 1 constant. */
649      swz->SrcReg[0].Negate = NEGATE_NONE;
650   }
651}
652
653
654/**
655 * Some TEX instructions require extra code, cube map coordinate
656 * normalization, or coordinate scaling for RECT textures, etc.
657 * This function emits those extra instructions and the TEX
658 * instruction itself.
659 */
660static void precalc_tex( struct brw_wm_compile *c,
661			 const struct prog_instruction *inst )
662{
663   struct brw_compile *p = &c->func;
664   struct intel_context *intel = &p->brw->intel;
665   struct prog_src_register coord;
666   struct prog_dst_register tmpcoord = { 0 };
667   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
668   struct prog_dst_register unswizzled_tmp;
669
670   /* If we are doing EXT_texture_swizzle, we need to write our result into a
671    * temporary, otherwise writemasking of the real dst could lose some of our
672    * channels.
673    */
674   if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) {
675      unswizzled_tmp = get_temp(c);
676   } else {
677      unswizzled_tmp = inst->DstReg;
678   }
679
680   assert(unit < BRW_MAX_TEX_UNIT);
681
682   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
683       struct prog_instruction *out;
684       struct prog_dst_register tmp0 = get_temp(c);
685       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
686       struct prog_dst_register tmp1 = get_temp(c);
687       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
688       struct prog_src_register src0 = inst->SrcReg[0];
689
690       /* find longest component of coord vector and normalize it */
691       tmpcoord = get_temp(c);
692       coord = src_reg_from_dst(tmpcoord);
693
694       /* tmpcoord = src0 (i.e.: coord = src0) */
695       out = emit_op(c, OPCODE_MOV,
696                     tmpcoord,
697                     0,
698                     src0,
699                     src_undef(),
700                     src_undef());
701       out->SrcReg[0].Negate = NEGATE_NONE;
702       out->SrcReg[0].Abs = 1;
703
704       /* tmp0 = MAX(coord.X, coord.Y) */
705       emit_op(c, OPCODE_MAX,
706               tmp0,
707               0,
708               src_swizzle1(coord, X),
709               src_swizzle1(coord, Y),
710               src_undef());
711
712       /* tmp1 = MAX(tmp0, coord.Z) */
713       emit_op(c, OPCODE_MAX,
714               tmp1,
715               0,
716               tmp0src,
717               src_swizzle1(coord, Z),
718               src_undef());
719
720       /* tmp0 = 1 / tmp1 */
721       emit_op(c, OPCODE_RCP,
722               dst_mask(tmp0, WRITEMASK_X),
723               0,
724               tmp1src,
725               src_undef(),
726               src_undef());
727
728       /* tmpCoord = src0 * tmp0 */
729       emit_op(c, OPCODE_MUL,
730               tmpcoord,
731               0,
732               src0,
733               src_swizzle1(tmp0src, SWIZZLE_X),
734               src_undef());
735
736       release_temp(c, tmp0);
737       release_temp(c, tmp1);
738   }
739   else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
740      struct prog_src_register scale =
741	 search_or_add_param5( c,
742			       STATE_INTERNAL,
743			       STATE_TEXRECT_SCALE,
744			       unit,
745			       0,0 );
746
747      tmpcoord = get_temp(c);
748
749      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
750       */
751      emit_op(c,
752	      OPCODE_MUL,
753	      tmpcoord,
754	      0,
755	      inst->SrcReg[0],
756	      src_swizzle(scale,
757			  SWIZZLE_X,
758			  SWIZZLE_Y,
759			  SWIZZLE_ONE,
760			  SWIZZLE_ONE),
761	      src_undef());
762
763      coord = src_reg_from_dst(tmpcoord);
764   }
765   else {
766      coord = inst->SrcReg[0];
767   }
768
769   /* Need to emit YUV texture conversions by hand.  Probably need to
770    * do this here - the alternative is in brw_wm_emit.c, but the
771    * conversion requires allocating a temporary variable which we
772    * don't have the facility to do that late in the compilation.
773    */
774   if (c->key.tex.yuvtex_mask & (1 << unit)) {
775      /* convert ycbcr to RGBA */
776      bool swap_uv = c->key.tex.yuvtex_swap_mask & (1 << unit);
777
778      /*
779	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
780	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
781	 UYV     = TEX ...
782	 UYV.xyz = ADD UYV,     C0
783	 UYV.y   = MUL UYV.y,   C0.w
784 	 if (UV swaped)
785	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
786	 else
787	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y
788	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
789      */
790      struct prog_dst_register tmp = get_temp(c);
791      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
792      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
793      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
794
795      /* tmp     = TEX ...
796       */
797      emit_tex_op(c,
798                  OPCODE_TEX,
799                  tmp,
800                  inst->SaturateMode,
801                  unit,
802                  inst->TexSrcTarget,
803                  inst->TexShadow,
804                  coord,
805                  src_undef(),
806                  src_undef());
807
808      /* tmp.xyz =  ADD TMP, C0
809       */
810      emit_op(c,
811	      OPCODE_ADD,
812	      dst_mask(tmp, WRITEMASK_XYZ),
813	      0,
814	      tmpsrc,
815	      C0,
816	      src_undef());
817
818      /* YUV.y   = MUL YUV.y, C0.w
819       */
820
821      emit_op(c,
822	      OPCODE_MUL,
823	      dst_mask(tmp, WRITEMASK_Y),
824	      0,
825	      tmpsrc,
826	      src_swizzle1(C0, W),
827	      src_undef());
828
829      /*
830       * if (UV swaped)
831       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
832       * else
833       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
834       */
835
836      emit_op(c,
837	      OPCODE_MAD,
838	      dst_mask(unswizzled_tmp, WRITEMASK_XYZ),
839	      0,
840	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
841	      C1,
842	      src_swizzle1(tmpsrc, Y));
843
844      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
845       */
846      emit_op(c,
847	      OPCODE_MAD,
848	      dst_mask(unswizzled_tmp, WRITEMASK_Y),
849	      0,
850	      src_swizzle1(tmpsrc, Z),
851	      src_swizzle1(C1, W),
852	      src_swizzle1(src_reg_from_dst(unswizzled_tmp), Y));
853
854      release_temp(c, tmp);
855   }
856   else {
857      /* ordinary RGBA tex instruction */
858      emit_tex_op(c,
859                  OPCODE_TEX,
860                  unswizzled_tmp,
861                  inst->SaturateMode,
862                  unit,
863                  inst->TexSrcTarget,
864                  inst->TexShadow,
865                  coord,
866                  src_undef(),
867                  src_undef());
868   }
869
870   /* For GL_EXT_texture_swizzle: */
871   if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) {
872      /* swizzle the result of the TEX instruction */
873      struct prog_src_register tmpsrc = src_reg_from_dst(unswizzled_tmp);
874      emit_op(c, OPCODE_SWZ,
875              inst->DstReg,
876              SATURATE_OFF, /* saturate already done above */
877              src_swizzle4(tmpsrc, c->key.tex.swizzles[unit]),
878              src_undef(),
879              src_undef());
880   }
881
882   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
883       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
884      release_temp(c, tmpcoord);
885}
886
887
888/**
889 * Check if the given TXP instruction really needs the divide-by-W step.
890 */
891static bool
892projtex(struct brw_wm_compile *c, const struct prog_instruction *inst)
893{
894   const struct prog_src_register src = inst->SrcReg[0];
895   bool retVal;
896
897   assert(inst->Opcode == OPCODE_TXP);
898
899   /* Only try to detect the simplest cases.  Could detect (later)
900    * cases where we are trying to emit code like RCP {1.0}, MUL x,
901    * {1.0}, and so on.
902    *
903    * More complex cases than this typically only arise from
904    * user-provided fragment programs anyway:
905    */
906   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
907      retVal = false;  /* ut2004 gun rendering !?! */
908   else if (src.File == PROGRAM_INPUT &&
909	    GET_SWZ(src.Swizzle, W) == W &&
910            (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
911      retVal = false;
912   else
913      retVal = true;
914
915   return retVal;
916}
917
918
919/**
920 * Emit code for TXP.
921 */
922static void precalc_txp( struct brw_wm_compile *c,
923			       const struct prog_instruction *inst )
924{
925   struct prog_src_register src0 = inst->SrcReg[0];
926
927   if (projtex(c, inst)) {
928      struct prog_dst_register tmp = get_temp(c);
929      struct prog_instruction tmp_inst;
930
931      /* tmp0.w = RCP inst.arg[0][3]
932       */
933      emit_op(c,
934	      OPCODE_RCP,
935	      dst_mask(tmp, WRITEMASK_W),
936	      0,
937	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
938	      src_undef(),
939	      src_undef());
940
941      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
942       */
943      emit_op(c,
944	      OPCODE_MUL,
945	      dst_mask(tmp, WRITEMASK_XYZ),
946	      0,
947	      src0,
948	      src_swizzle1(src_reg_from_dst(tmp), W),
949	      src_undef());
950
951      /* dst = precalc(TEX tmp0)
952       */
953      tmp_inst = *inst;
954      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
955      precalc_tex(c, &tmp_inst);
956
957      release_temp(c, tmp);
958   }
959   else
960   {
961      /* dst = precalc(TEX src0)
962       */
963      precalc_tex(c, inst);
964   }
965}
966
967
968
969static void emit_render_target_writes( struct brw_wm_compile *c )
970{
971   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
972   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
973   struct prog_src_register outcolor;
974   GLuint i;
975
976   struct prog_instruction *inst = NULL;
977
978   /* The inst->Aux field is used for FB write target and the EOT marker */
979
980   for (i = 0; i < c->key.nr_color_regions; i++) {
981      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) {
982	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
983      } else {
984	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
985      }
986      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
987		     0, outcolor, payload_r0_depth, outdepth);
988      inst->Aux = INST_AUX_TARGET(i);
989   }
990
991   /* Mark the last FB write as final, or emit a dummy write if we had
992    * no render targets bound.
993    */
994   if (c->key.nr_color_regions != 0) {
995      inst->Aux |= INST_AUX_EOT;
996   } else {
997      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
998		     0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR),
999		     payload_r0_depth, outdepth);
1000      inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT;
1001   }
1002}
1003
1004
1005
1006
1007/***********************************************************************
1008 * Emit INTERP instructions ahead of first use of each attrib.
1009 */
1010
1011static void validate_src_regs( struct brw_wm_compile *c,
1012			       const struct prog_instruction *inst )
1013{
1014   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1015   GLuint i;
1016
1017   for (i = 0; i < nr_args; i++) {
1018      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1019	 GLuint idx = inst->SrcReg[i].Index;
1020	 if (!(c->fp_interp_emitted & (1<<idx))) {
1021	    emit_interp(c, idx);
1022	 }
1023      }
1024   }
1025}
1026
1027static void print_insns( const struct prog_instruction *insn,
1028			 GLuint nr )
1029{
1030   GLuint i;
1031   for (i = 0; i < nr; i++, insn++) {
1032      printf("%3d: ", i);
1033      if (insn->Opcode < MAX_OPCODE)
1034	 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
1035      else if (insn->Opcode < MAX_WM_OPCODE) {
1036	 GLuint idx = insn->Opcode - MAX_OPCODE;
1037
1038	 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
1039				      3, PROG_PRINT_DEBUG, NULL);
1040      }
1041      else
1042	 printf("965 Opcode %d\n", insn->Opcode);
1043   }
1044}
1045
1046
1047/**
1048 * Initial pass for fragment program code generation.
1049 * This function is used by both the GLSL and non-GLSL paths.
1050 */
1051void brw_wm_pass_fp( struct brw_wm_compile *c )
1052{
1053   struct intel_context *intel = &c->func.brw->intel;
1054   struct brw_fragment_program *fp = c->fp;
1055   GLuint insn;
1056
1057   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1058      printf("pre-fp:\n");
1059      _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
1060			       true);
1061      printf("\n");
1062   }
1063
1064   c->pixel_xy = src_undef();
1065   if (intel->gen >= 6) {
1066      /* The interpolation deltas come in as the perspective pixel
1067       * location barycentric params.
1068       */
1069      c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
1070   } else {
1071      c->delta_xy = src_undef();
1072   }
1073   c->pixel_w = src_undef();
1074   c->nr_fp_insns = 0;
1075
1076   /* Emit preamble instructions.  This is where special instructions such as
1077    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1078    * compute shader inputs from varying vars.
1079    */
1080   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1081      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1082      validate_src_regs(c, inst);
1083   }
1084
1085   /* Loop over all instructions doing assorted simplifications and
1086    * transformations.
1087    */
1088   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1089      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1090      struct prog_instruction *out;
1091
1092      /* Check for INPUT values, emit INTERP instructions where
1093       * necessary:
1094       */
1095
1096      switch (inst->Opcode) {
1097      case OPCODE_SWZ:
1098	 out = emit_insn(c, inst);
1099	 out->Opcode = OPCODE_MOV;
1100	 break;
1101
1102      case OPCODE_ABS:
1103	 out = emit_insn(c, inst);
1104	 out->Opcode = OPCODE_MOV;
1105	 out->SrcReg[0].Negate = NEGATE_NONE;
1106	 out->SrcReg[0].Abs = 1;
1107	 break;
1108
1109      case OPCODE_SUB:
1110	 out = emit_insn(c, inst);
1111	 out->Opcode = OPCODE_ADD;
1112	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1113	 break;
1114
1115      case OPCODE_SCS:
1116	 out = emit_insn(c, inst);
1117	 /* This should probably be done in the parser.
1118	  */
1119	 out->DstReg.WriteMask &= WRITEMASK_XY;
1120	 break;
1121
1122      case OPCODE_DST:
1123	 precalc_dst(c, inst);
1124	 break;
1125
1126      case OPCODE_LIT:
1127	 precalc_lit(c, inst);
1128	 break;
1129
1130      case OPCODE_RSQ:
1131	 out = emit_scalar_insn(c, inst);
1132	 out->SrcReg[0].Abs = true;
1133	 break;
1134
1135      case OPCODE_TEX:
1136	 precalc_tex(c, inst);
1137	 break;
1138
1139      case OPCODE_TXP:
1140	 precalc_txp(c, inst);
1141	 break;
1142
1143      case OPCODE_TXB:
1144	 out = emit_insn(c, inst);
1145	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1146         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1147	 break;
1148
1149      case OPCODE_XPD:
1150	 out = emit_insn(c, inst);
1151	 /* This should probably be done in the parser.
1152	  */
1153	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1154	 break;
1155
1156      case OPCODE_KIL:
1157	 out = emit_insn(c, inst);
1158	 /* This should probably be done in the parser.
1159	  */
1160	 out->DstReg.WriteMask = 0;
1161	 break;
1162      case OPCODE_END:
1163	 emit_render_target_writes(c);
1164	 break;
1165      case OPCODE_PRINT:
1166	 break;
1167      default:
1168	 if (brw_wm_is_scalar_result(inst->Opcode))
1169	    emit_scalar_insn(c, inst);
1170	 else
1171	    emit_insn(c, inst);
1172	 break;
1173      }
1174   }
1175
1176   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1177      printf("pass_fp:\n");
1178      print_insns( c->prog_instructions, c->nr_fp_insns );
1179      printf("\n");
1180   }
1181}
1182
1183