brw_wm_fp.c revision 3105bc1d885ea8ce083d2be85cbeac46d4d873a1
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "glheader.h"
34#include "macros.h"
35#include "enums.h"
36#include "brw_context.h"
37#include "brw_wm.h"
38#include "brw_util.h"
39
40#include "shader/prog_parameter.h"
41#include "shader/prog_print.h"
42#include "shader/prog_statevars.h"
43
44
45#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47#define X    0
48#define Y    1
49#define Z    2
50#define W    3
51
52
53static const char *wm_opcode_strings[] = {
54   "PIXELXY",
55   "DELTAXY",
56   "PIXELW",
57   "LINTERP",
58   "PINTERP",
59   "CINTERP",
60   "WPOSXY",
61   "FB_WRITE"
62};
63
64#if 0
65static const char *wm_file_strings[] = {
66   "PAYLOAD"
67};
68#endif
69
70
71/***********************************************************************
72 * Source regs
73 */
74
75static struct prog_src_register src_reg(GLuint file, GLuint idx)
76{
77   struct prog_src_register reg;
78   reg.File = file;
79   reg.Index = idx;
80   reg.Swizzle = SWIZZLE_NOOP;
81   reg.RelAddr = 0;
82   reg.NegateBase = 0;
83   reg.Abs = 0;
84   reg.NegateAbs = 0;
85   return reg;
86}
87
88static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89{
90   return src_reg(dst.File, dst.Index);
91}
92
93static struct prog_src_register src_undef( void )
94{
95   return src_reg(PROGRAM_UNDEFINED, 0);
96}
97
98static GLboolean src_is_undef(struct prog_src_register src)
99{
100   return src.File == PROGRAM_UNDEFINED;
101}
102
103static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104{
105   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106   return reg;
107}
108
109static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110{
111   return src_swizzle(reg, x, x, x, x);
112}
113
114
115/***********************************************************************
116 * Dest regs
117 */
118
119static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
120{
121   struct prog_dst_register reg;
122   reg.File = file;
123   reg.Index = idx;
124   reg.WriteMask = WRITEMASK_XYZW;
125   reg.CondMask = 0;
126   reg.CondSwizzle = 0;
127   reg.pad = 0;
128   reg.CondSrc = 0;
129   return reg;
130}
131
132static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
133{
134   reg.WriteMask &= mask;
135   return reg;
136}
137
138static struct prog_dst_register dst_undef( void )
139{
140   return dst_reg(PROGRAM_UNDEFINED, 0);
141}
142
143
144
145static struct prog_dst_register get_temp( struct brw_wm_compile *c )
146{
147   int bit = _mesa_ffs( ~c->fp_temp );
148
149   if (!bit) {
150      _mesa_printf("%s: out of temporaries\n", __FILE__);
151      exit(1);
152   }
153
154   c->fp_temp |= 1<<(bit-1);
155   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
156}
157
158
159static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
160{
161   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
162}
163
164
165/***********************************************************************
166 * Instructions
167 */
168
169static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
170{
171   return &c->prog_instructions[c->nr_fp_insns++];
172}
173
174static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
175					const struct prog_instruction *inst0)
176{
177   struct prog_instruction *inst = get_fp_inst(c);
178   *inst = *inst0;
179   inst->Data = (void *)inst0;
180   return inst;
181}
182
183static struct prog_instruction * emit_op(struct brw_wm_compile *c,
184				       GLuint op,
185				       struct prog_dst_register dest,
186				       GLuint saturate,
187				       GLuint tex_src_unit,
188				       GLuint tex_src_target,
189				       struct prog_src_register src0,
190				       struct prog_src_register src1,
191				       struct prog_src_register src2 )
192{
193   struct prog_instruction *inst = get_fp_inst(c);
194
195   memset(inst, 0, sizeof(*inst));
196
197   inst->Opcode = op;
198   inst->DstReg = dest;
199   inst->SaturateMode = saturate;
200   inst->TexSrcUnit = tex_src_unit;
201   inst->TexSrcTarget = tex_src_target;
202   inst->SrcReg[0] = src0;
203   inst->SrcReg[1] = src1;
204   inst->SrcReg[2] = src2;
205   return inst;
206}
207
208
209
210
211/***********************************************************************
212 * Special instructions for interpolation and other tasks
213 */
214
215static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
216{
217   if (src_is_undef(c->pixel_xy)) {
218      struct prog_dst_register pixel_xy = get_temp(c);
219      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
220
221
222      /* Emit the out calculations, and hold onto the results.  Use
223       * two instructions as a temporary is required.
224       */
225      /* pixel_xy.xy = PIXELXY payload[0];
226       */
227      emit_op(c,
228	      WM_PIXELXY,
229	      dst_mask(pixel_xy, WRITEMASK_XY),
230	      0, 0, 0,
231	      payload_r0_depth,
232	      src_undef(),
233	      src_undef());
234
235      c->pixel_xy = src_reg_from_dst(pixel_xy);
236   }
237
238   return c->pixel_xy;
239}
240
241static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
242{
243   if (src_is_undef(c->delta_xy)) {
244      struct prog_dst_register delta_xy = get_temp(c);
245      struct prog_src_register pixel_xy = get_pixel_xy(c);
246      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
247
248      /* deltas.xy = DELTAXY pixel_xy, payload[0]
249       */
250      emit_op(c,
251	      WM_DELTAXY,
252	      dst_mask(delta_xy, WRITEMASK_XY),
253	      0, 0, 0,
254	      pixel_xy,
255	      payload_r0_depth,
256	      src_undef());
257
258      c->delta_xy = src_reg_from_dst(delta_xy);
259   }
260
261   return c->delta_xy;
262}
263
264static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
265{
266   if (src_is_undef(c->pixel_w)) {
267      struct prog_dst_register pixel_w = get_temp(c);
268      struct prog_src_register deltas = get_delta_xy(c);
269      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
270
271
272      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
273       */
274      emit_op(c,
275	      WM_PIXELW,
276	      dst_mask(pixel_w, WRITEMASK_W),
277	      0, 0, 0,
278	      interp_wpos,
279	      deltas,
280	      src_undef());
281
282
283      c->pixel_w = src_reg_from_dst(pixel_w);
284   }
285
286   return c->pixel_w;
287}
288
289static void emit_interp( struct brw_wm_compile *c,
290			 GLuint idx )
291{
292   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
293   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
294   struct prog_src_register deltas = get_delta_xy(c);
295   struct prog_src_register arg2;
296   GLuint opcode;
297
298   /* Need to use PINTERP on attributes which have been
299    * multiplied by 1/W in the SF program, and LINTERP on those
300    * which have not:
301    */
302   switch (idx) {
303   case FRAG_ATTRIB_WPOS:
304      opcode = WM_LINTERP;
305      arg2 = src_undef();
306
307      /* Have to treat wpos.xy specially:
308       */
309      emit_op(c,
310	      WM_WPOSXY,
311	      dst_mask(dst, WRITEMASK_XY),
312	      0, 0, 0,
313	      get_pixel_xy(c),
314	      src_undef(),
315	      src_undef());
316
317      dst = dst_mask(dst, WRITEMASK_ZW);
318
319      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
320       */
321      emit_op(c,
322	      WM_LINTERP,
323	      dst,
324	      0, 0, 0,
325	      interp,
326	      deltas,
327	      arg2);
328      break;
329   case FRAG_ATTRIB_COL0:
330   case FRAG_ATTRIB_COL1:
331      if (c->key.flat_shade) {
332	 emit_op(c,
333		 WM_CINTERP,
334		 dst,
335		 0, 0, 0,
336		 interp,
337		 src_undef(),
338		 src_undef());
339      }
340      else {
341	 emit_op(c,
342		 WM_LINTERP,
343		 dst,
344		 0, 0, 0,
345		 interp,
346		 deltas,
347		 src_undef());
348      }
349      break;
350   default:
351      emit_op(c,
352	      WM_PINTERP,
353	      dst,
354	      0, 0, 0,
355	      interp,
356	      deltas,
357	      get_pixel_w(c));
358      break;
359   }
360
361   c->fp_interp_emitted |= 1<<idx;
362}
363
364static void emit_ddx( struct brw_wm_compile *c,
365        const struct prog_instruction *inst )
366{
367    GLuint idx = inst->SrcReg[0].Index;
368    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
369
370    c->fp_deriv_emitted |= 1<<idx;
371    emit_op(c,
372            OPCODE_DDX,
373            inst->DstReg,
374            0, 0, 0,
375            interp,
376            get_pixel_w(c),
377            src_undef());
378}
379
380static void emit_ddy( struct brw_wm_compile *c,
381        const struct prog_instruction *inst )
382{
383    GLuint idx = inst->SrcReg[0].Index;
384    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
385
386    c->fp_deriv_emitted |= 1<<idx;
387    emit_op(c,
388            OPCODE_DDY,
389            inst->DstReg,
390            0, 0, 0,
391            interp,
392            get_pixel_w(c),
393            src_undef());
394}
395
396/***********************************************************************
397 * Hacks to extend the program parameter and constant lists.
398 */
399
400/* Add the fog parameters to the parameter list of the original
401 * program, rather than creating a new list.  Doesn't really do any
402 * harm and it's not as if the parameter handling isn't a big hack
403 * anyway.
404 */
405static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
406                                                     GLint s0,
407                                                     GLint s1,
408                                                     GLint s2,
409                                                     GLint s3,
410                                                     GLint s4)
411{
412   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
413   gl_state_index tokens[STATE_LENGTH];
414   GLuint idx;
415   tokens[0] = s0;
416   tokens[1] = s1;
417   tokens[2] = s2;
418   tokens[3] = s3;
419   tokens[4] = s4;
420
421   for (idx = 0; idx < paramList->NumParameters; idx++) {
422      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
423	  memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
424	 return src_reg(PROGRAM_STATE_VAR, idx);
425   }
426
427   idx = _mesa_add_state_reference( paramList, tokens );
428
429   /* Recalculate state dependency:
430    */
431   c->fp->param_state = paramList->StateFlags;
432
433   return src_reg(PROGRAM_STATE_VAR, idx);
434}
435
436
437static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
438						     GLfloat s0,
439						     GLfloat s1,
440						     GLfloat s2,
441						     GLfloat s3)
442{
443   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
444   GLfloat values[4];
445   GLuint idx;
446   GLuint swizzle;
447
448   values[0] = s0;
449   values[1] = s1;
450   values[2] = s2;
451   values[3] = s3;
452
453   /* Have to search, otherwise multiple compilations will each grow
454    * the parameter list.
455    */
456   for (idx = 0; idx < paramList->NumParameters; idx++) {
457      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
458	  memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
459
460	 /* XXX: this mimics the mesa bug which puts all constants and
461	  * parameters into the "PROGRAM_STATE_VAR" category:
462	  */
463	 return src_reg(PROGRAM_STATE_VAR, idx);
464   }
465
466   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
467   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
468   return src_reg(PROGRAM_STATE_VAR, idx);
469}
470
471
472
473/***********************************************************************
474 * Expand various instructions here to simpler forms.
475 */
476static void precalc_dst( struct brw_wm_compile *c,
477			       const struct prog_instruction *inst )
478{
479   struct prog_src_register src0 = inst->SrcReg[0];
480   struct prog_src_register src1 = inst->SrcReg[1];
481   struct prog_dst_register dst = inst->DstReg;
482
483   if (dst.WriteMask & WRITEMASK_Y) {
484      /* dst.y = mul src0.y, src1.y
485       */
486      emit_op(c,
487	      OPCODE_MUL,
488	      dst_mask(dst, WRITEMASK_Y),
489	      inst->SaturateMode, 0, 0,
490	      src0,
491	      src1,
492	      src_undef());
493   }
494
495
496   if (dst.WriteMask & WRITEMASK_XZ) {
497      GLuint z = GET_SWZ(src0.Swizzle, Z);
498
499      /* dst.xz = swz src0.1zzz
500       */
501      emit_op(c,
502	      OPCODE_SWZ,
503	      dst_mask(dst, WRITEMASK_XZ),
504	      inst->SaturateMode, 0, 0,
505	      src_swizzle(src0, SWIZZLE_ONE, z, z, z),
506	      src_undef(),
507	      src_undef());
508   }
509   if (dst.WriteMask & WRITEMASK_W) {
510      /* dst.w = mov src1.w
511       */
512      emit_op(c,
513	      OPCODE_MOV,
514	      dst_mask(dst, WRITEMASK_W),
515	      inst->SaturateMode, 0, 0,
516	      src1,
517	      src_undef(),
518	      src_undef());
519   }
520}
521
522
523static void precalc_lit( struct brw_wm_compile *c,
524			 const struct prog_instruction *inst )
525{
526   struct prog_src_register src0 = inst->SrcReg[0];
527   struct prog_dst_register dst = inst->DstReg;
528
529   if (dst.WriteMask & WRITEMASK_XW) {
530      /* dst.xw = swz src0.1111
531       */
532      emit_op(c,
533	      OPCODE_SWZ,
534	      dst_mask(dst, WRITEMASK_XW),
535	      0, 0, 0,
536	      src_swizzle1(src0, SWIZZLE_ONE),
537	      src_undef(),
538	      src_undef());
539   }
540
541
542   if (dst.WriteMask & WRITEMASK_YZ) {
543      emit_op(c,
544	      OPCODE_LIT,
545	      dst_mask(dst, WRITEMASK_YZ),
546	      inst->SaturateMode, 0, 0,
547	      src0,
548	      src_undef(),
549	      src_undef());
550   }
551}
552
553static void precalc_tex( struct brw_wm_compile *c,
554			 const struct prog_instruction *inst )
555{
556   struct prog_src_register coord;
557   struct prog_dst_register tmpcoord;
558
559   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
560       struct prog_instruction *out;
561       struct prog_dst_register tmp0 = get_temp(c);
562       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
563       struct prog_dst_register tmp1 = get_temp(c);
564       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
565       struct prog_src_register src0 = inst->SrcReg[0];
566
567       tmpcoord = get_temp(c);
568       coord = src_reg_from_dst(tmpcoord);
569
570       out = emit_op(c, OPCODE_MOV,
571                     tmpcoord,
572                     0, 0, 0,
573                     src0,
574                     src_undef(),
575                     src_undef());
576       out->SrcReg[0].NegateBase = 0;
577       out->SrcReg[0].Abs = 1;
578
579       emit_op(c, OPCODE_MAX,
580               tmp0,
581               0, 0, 0,
582               src_swizzle1(coord, X),
583               src_swizzle1(coord, Y),
584               src_undef());
585
586       emit_op(c, OPCODE_MAX,
587               tmp1,
588               0, 0, 0,
589               tmp0src,
590               src_swizzle1(coord, Z),
591               src_undef());
592
593       emit_op(c, OPCODE_RCP,
594               tmp0,
595               0, 0, 0,
596               tmp1src,
597               src_undef(),
598               src_undef());
599
600       emit_op(c, OPCODE_MUL,
601               tmpcoord,
602               0, 0, 0,
603               src0,
604               tmp0src,
605               src_undef());
606
607       release_temp(c, tmp0);
608       release_temp(c, tmp1);
609   } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
610      struct prog_src_register scale =
611	 search_or_add_param5( c,
612			       STATE_INTERNAL,
613			       STATE_TEXRECT_SCALE,
614			       inst->TexSrcUnit,
615			       0,0 );
616
617      tmpcoord = get_temp(c);
618
619      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
620       */
621      emit_op(c,
622	      OPCODE_MUL,
623	      tmpcoord,
624	      0, 0, 0,
625	      inst->SrcReg[0],
626	      scale,
627	      src_undef());
628
629      coord = src_reg_from_dst(tmpcoord);
630   }
631   else {
632      coord = inst->SrcReg[0];
633   }
634
635   /* Need to emit YUV texture conversions by hand.  Probably need to
636    * do this here - the alternative is in brw_wm_emit.c, but the
637    * conversion requires allocating a temporary variable which we
638    * don't have the facility to do that late in the compilation.
639    */
640   if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) {
641      emit_op(c,
642	      OPCODE_TEX,
643	      inst->DstReg,
644	      inst->SaturateMode,
645	      inst->TexSrcUnit,
646	      inst->TexSrcTarget,
647	      coord,
648	      src_undef(),
649	      src_undef());
650   }
651   else {
652       GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<inst->TexSrcUnit);
653
654      /*
655	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
656	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
657	 UYV     = TEX ...
658	 UYV.xyz = ADD UYV,     C0
659	 UYV.y   = MUL UYV.y,   C0.w
660 	 if (UV swaped)
661	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
662	 else
663	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y
664	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
665      */
666      struct prog_dst_register dst = inst->DstReg;
667      struct prog_dst_register tmp = get_temp(c);
668      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
669      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
670      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
671
672      /* tmp     = TEX ...
673       */
674      emit_op(c,
675	      OPCODE_TEX,
676	      tmp,
677	      inst->SaturateMode,
678	      inst->TexSrcUnit,
679	      inst->TexSrcTarget,
680	      coord,
681	      src_undef(),
682	      src_undef());
683
684      /* tmp.xyz =  ADD TMP, C0
685       */
686      emit_op(c,
687	      OPCODE_ADD,
688	      dst_mask(tmp, WRITEMASK_XYZ),
689	      0, 0, 0,
690	      tmpsrc,
691	      C0,
692	      src_undef());
693
694      /* YUV.y   = MUL YUV.y, C0.w
695       */
696
697      emit_op(c,
698	      OPCODE_MUL,
699	      dst_mask(tmp, WRITEMASK_Y),
700	      0, 0, 0,
701	      tmpsrc,
702	      src_swizzle1(C0, W),
703	      src_undef());
704
705      /*
706       * if (UV swaped)
707       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
708       * else
709       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
710       */
711
712      emit_op(c,
713	      OPCODE_MAD,
714	      dst_mask(dst, WRITEMASK_XYZ),
715	      0, 0, 0,
716	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
717	      C1,
718	      src_swizzle1(tmpsrc, Y));
719
720      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
721       */
722      emit_op(c,
723	      OPCODE_MAD,
724	      dst_mask(dst, WRITEMASK_Y),
725	      0, 0, 0,
726	      src_swizzle1(tmpsrc, Z),
727	      src_swizzle1(C1, W),
728	      src_swizzle1(src_reg_from_dst(dst), Y));
729
730      release_temp(c, tmp);
731   }
732
733   if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV)
734      release_temp(c, tmpcoord);
735}
736
737
738static GLboolean projtex( struct brw_wm_compile *c,
739			  const struct prog_instruction *inst )
740{
741   struct prog_src_register src = inst->SrcReg[0];
742
743   /* Only try to detect the simplest cases.  Could detect (later)
744    * cases where we are trying to emit code like RCP {1.0}, MUL x,
745    * {1.0}, and so on.
746    *
747    * More complex cases than this typically only arise from
748    * user-provided fragment programs anyway:
749    */
750   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
751      return 0;  /* ut2004 gun rendering !?! */
752   else if (src.File == PROGRAM_INPUT &&
753	    GET_SWZ(src.Swizzle, W) == W &&
754           (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
755      return 0;
756   else
757      return 1;
758}
759
760
761static void precalc_txp( struct brw_wm_compile *c,
762			       const struct prog_instruction *inst )
763{
764   struct prog_src_register src0 = inst->SrcReg[0];
765
766   if (projtex(c, inst)) {
767      struct prog_dst_register tmp = get_temp(c);
768      struct prog_instruction tmp_inst;
769
770      /* tmp0.w = RCP inst.arg[0][3]
771       */
772      emit_op(c,
773	      OPCODE_RCP,
774	      dst_mask(tmp, WRITEMASK_W),
775	      0, 0, 0,
776	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
777	      src_undef(),
778	      src_undef());
779
780      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
781       */
782      emit_op(c,
783	      OPCODE_MUL,
784	      dst_mask(tmp, WRITEMASK_XYZ),
785	      0, 0, 0,
786	      src0,
787	      src_swizzle1(src_reg_from_dst(tmp), W),
788	      src_undef());
789
790      /* dst = precalc(TEX tmp0)
791       */
792      tmp_inst = *inst;
793      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
794      precalc_tex(c, &tmp_inst);
795
796      release_temp(c, tmp);
797   }
798   else
799   {
800      /* dst = precalc(TEX src0)
801       */
802      precalc_tex(c, inst);
803   }
804}
805
806
807
808
809
810/***********************************************************************
811 * Add instructions to perform fog blending
812 */
813
814static void fog_blend( struct brw_wm_compile *c,
815			     struct prog_src_register fog_factor )
816{
817   struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
818   struct prog_src_register fogcolor = search_or_add_param5( c, STATE_FOG_COLOR, 0,0,0,0 );
819
820   /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
821
822   emit_op(c,
823	   OPCODE_LRP,
824	   dst_mask(outcolor, WRITEMASK_XYZ),
825	   0, 0, 0,
826	   fog_factor,
827	   src_reg_from_dst(outcolor),
828	   fogcolor);
829}
830
831
832
833/* This one is simple - just take the interpolated fog coordinate and
834 * use it as the fog blend factor.
835 */
836static void fog_interpolated( struct brw_wm_compile *c )
837{
838   struct prog_src_register fogc = src_reg(PROGRAM_INPUT, FRAG_ATTRIB_FOGC);
839
840   if (!(c->fp_interp_emitted & (1<<FRAG_ATTRIB_FOGC)))
841      emit_interp(c, FRAG_ATTRIB_FOGC);
842
843   fog_blend( c, src_swizzle1(fogc, GET_SWZ(fogc.Swizzle,X)));
844}
845
846static void emit_fog( struct brw_wm_compile *c )
847{
848   if (!c->fp->program.FogOption)
849      return;
850
851   if (1)
852      fog_interpolated( c );
853   else {
854      /* TODO: per-pixel fog */
855      assert(0);
856   }
857}
858
859static void emit_fb_write( struct brw_wm_compile *c )
860{
861   struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
862   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
863   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
864   GLuint i;
865
866   struct prog_instruction *inst;
867   struct brw_context *brw = c->func.brw;
868
869   /* inst->Sampler is not used by backend,
870      use it for fb write target and eot */
871
872   inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
873           0, 0, 0, outcolor, payload_r0_depth, outdepth);
874   inst->Sampler = (brw->state.nr_draw_regions > 1 ? 0: 1)|(0<<1);
875
876   if (brw->state.nr_draw_regions > 1) {
877       for (i = 0 ; i < brw->state.nr_draw_regions; i++) {
878	   outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
879	   inst = emit_op(c,
880		   WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0,
881		   outcolor, payload_r0_depth, outdepth);
882	   inst->Sampler = ((i == brw->state.nr_draw_regions - 1) ? 1: 0);
883	   inst->Sampler |= (i<<1);
884       }
885   }
886}
887
888
889
890
891/***********************************************************************
892 * Emit INTERP instructions ahead of first use of each attrib.
893 */
894
895static void validate_src_regs( struct brw_wm_compile *c,
896			       const struct prog_instruction *inst )
897{
898   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
899   GLuint i;
900
901   for (i = 0; i < nr_args; i++) {
902      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
903	 GLuint idx = inst->SrcReg[i].Index;
904	 if (!(c->fp_interp_emitted & (1<<idx))) {
905	    emit_interp(c, idx);
906	 }
907      }
908   }
909}
910
911
912
913static void print_insns( const struct prog_instruction *insn,
914			 GLuint nr )
915{
916   GLuint i;
917   for (i = 0; i < nr; i++, insn++) {
918      _mesa_printf("%3d: ", i);
919      if (insn->Opcode < MAX_OPCODE)
920	 _mesa_print_instruction(insn);
921      else if (insn->Opcode < MAX_WM_OPCODE) {
922	 GLuint idx = insn->Opcode - MAX_OPCODE;
923
924	 _mesa_print_alu_instruction(insn,
925				     wm_opcode_strings[idx],
926				     3);
927      }
928      else
929	 _mesa_printf("UNKNOWN\n");
930
931   }
932}
933
934void brw_wm_pass_fp( struct brw_wm_compile *c )
935{
936   struct brw_fragment_program *fp = c->fp;
937   GLuint insn;
938
939   if (INTEL_DEBUG & DEBUG_WM) {
940      _mesa_printf("\n\n\npre-fp:\n");
941      _mesa_print_program(&fp->program.Base);
942      _mesa_printf("\n");
943   }
944
945   c->pixel_xy = src_undef();
946   c->delta_xy = src_undef();
947   c->pixel_w = src_undef();
948   c->nr_fp_insns = 0;
949
950   /* Emit preamble instructions:
951    */
952
953
954   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
955      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
956      struct prog_instruction *out;
957
958      /* Check for INPUT values, emit INTERP instructions where
959       * necessary:
960       */
961      validate_src_regs(c, inst);
962
963
964      switch (inst->Opcode) {
965      case OPCODE_SWZ:
966	 out = emit_insn(c, inst);
967	 out->Opcode = OPCODE_MOV;
968	 break;
969
970      case OPCODE_ABS:
971	 out = emit_insn(c, inst);
972	 out->Opcode = OPCODE_MOV;
973	 out->SrcReg[0].NegateBase = 0;
974	 out->SrcReg[0].Abs = 1;
975	 break;
976
977      case OPCODE_SUB:
978	 out = emit_insn(c, inst);
979	 out->Opcode = OPCODE_ADD;
980	 out->SrcReg[1].NegateBase ^= 0xf;
981	 break;
982
983      case OPCODE_SCS:
984	 out = emit_insn(c, inst);
985	 /* This should probably be done in the parser.
986	  */
987	 out->DstReg.WriteMask &= WRITEMASK_XY;
988	 break;
989
990      case OPCODE_DST:
991	 precalc_dst(c, inst);
992	 break;
993
994      case OPCODE_LIT:
995	 precalc_lit(c, inst);
996	 break;
997
998      case OPCODE_TXP:
999	 precalc_txp(c, inst);
1000	 break;
1001
1002      case OPCODE_XPD:
1003	 out = emit_insn(c, inst);
1004	 /* This should probably be done in the parser.
1005	  */
1006	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1007	 break;
1008
1009      case OPCODE_KIL:
1010	 out = emit_insn(c, inst);
1011	 /* This should probably be done in the parser.
1012	  */
1013	 out->DstReg.WriteMask = 0;
1014	 break;
1015      case OPCODE_DDX:
1016	 emit_ddx(c, inst);
1017	 break;
1018      case OPCODE_DDY:
1019         emit_ddy(c, inst);
1020	break;
1021      case OPCODE_END:
1022	 emit_fog(c);
1023	 emit_fb_write(c);
1024	 break;
1025      case OPCODE_PRINT:
1026	 break;
1027
1028      default:
1029	 emit_insn(c, inst);
1030	 break;
1031      }
1032   }
1033
1034   if (INTEL_DEBUG & DEBUG_WM) {
1035	   _mesa_printf("\n\n\npass_fp:\n");
1036	   print_insns( c->prog_instructions, c->nr_fp_insns );
1037	   _mesa_printf("\n");
1038   }
1039}
1040
1041