brw_wm_fp.c revision 9e7903e492ad842481a166484e0474dd4f3100ba
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36#include "brw_context.h"
37#include "brw_wm.h"
38#include "brw_util.h"
39
40#include "shader/prog_parameter.h"
41#include "shader/prog_print.h"
42#include "shader/prog_statevars.h"
43
44
45#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47#define X    0
48#define Y    1
49#define Z    2
50#define W    3
51
52
53static const char *wm_opcode_strings[] = {
54   "PIXELXY",
55   "DELTAXY",
56   "PIXELW",
57   "LINTERP",
58   "PINTERP",
59   "CINTERP",
60   "WPOSXY",
61   "FB_WRITE"
62};
63
64#if 0
65static const char *wm_file_strings[] = {
66   "PAYLOAD"
67};
68#endif
69
70
71/***********************************************************************
72 * Source regs
73 */
74
75static struct prog_src_register src_reg(GLuint file, GLuint idx)
76{
77   struct prog_src_register reg;
78   reg.File = file;
79   reg.Index = idx;
80   reg.Swizzle = SWIZZLE_NOOP;
81   reg.RelAddr = 0;
82   reg.NegateBase = 0;
83   reg.Abs = 0;
84   reg.NegateAbs = 0;
85   return reg;
86}
87
88static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89{
90   return src_reg(dst.File, dst.Index);
91}
92
93static struct prog_src_register src_undef( void )
94{
95   return src_reg(PROGRAM_UNDEFINED, 0);
96}
97
98static GLboolean src_is_undef(struct prog_src_register src)
99{
100   return src.File == PROGRAM_UNDEFINED;
101}
102
103static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104{
105   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106   return reg;
107}
108
109static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110{
111   return src_swizzle(reg, x, x, x, x);
112}
113
114
115/***********************************************************************
116 * Dest regs
117 */
118
119static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
120{
121   struct prog_dst_register reg;
122   reg.File = file;
123   reg.Index = idx;
124   reg.WriteMask = WRITEMASK_XYZW;
125   reg.RelAddr = 0;
126   reg.CondMask = 0;
127   reg.CondSwizzle = 0;
128   reg.CondSrc = 0;
129   reg.pad = 0;
130   return reg;
131}
132
133static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
134{
135   reg.WriteMask &= mask;
136   return reg;
137}
138
139static struct prog_dst_register dst_undef( void )
140{
141   return dst_reg(PROGRAM_UNDEFINED, 0);
142}
143
144
145
146static struct prog_dst_register get_temp( struct brw_wm_compile *c )
147{
148   int bit = _mesa_ffs( ~c->fp_temp );
149
150   if (!bit) {
151      _mesa_printf("%s: out of temporaries\n", __FILE__);
152      exit(1);
153   }
154
155   c->fp_temp |= 1<<(bit-1);
156   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
157}
158
159
160static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
161{
162   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
163}
164
165
166/***********************************************************************
167 * Instructions
168 */
169
170static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
171{
172   return &c->prog_instructions[c->nr_fp_insns++];
173}
174
175static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
176					const struct prog_instruction *inst0)
177{
178   struct prog_instruction *inst = get_fp_inst(c);
179   *inst = *inst0;
180   inst->Data = (void *)inst0;
181   return inst;
182}
183
184static struct prog_instruction * emit_op(struct brw_wm_compile *c,
185				       GLuint op,
186				       struct prog_dst_register dest,
187				       GLuint saturate,
188				       GLuint tex_src_unit,
189				       GLuint tex_src_target,
190				       struct prog_src_register src0,
191				       struct prog_src_register src1,
192				       struct prog_src_register src2 )
193{
194   struct prog_instruction *inst = get_fp_inst(c);
195
196   memset(inst, 0, sizeof(*inst));
197
198   inst->Opcode = op;
199   inst->DstReg = dest;
200   inst->SaturateMode = saturate;
201   inst->TexSrcUnit = tex_src_unit;
202   inst->TexSrcTarget = tex_src_target;
203   inst->SrcReg[0] = src0;
204   inst->SrcReg[1] = src1;
205   inst->SrcReg[2] = src2;
206   return inst;
207}
208
209
210
211
212/***********************************************************************
213 * Special instructions for interpolation and other tasks
214 */
215
216static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
217{
218   if (src_is_undef(c->pixel_xy)) {
219      struct prog_dst_register pixel_xy = get_temp(c);
220      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
221
222
223      /* Emit the out calculations, and hold onto the results.  Use
224       * two instructions as a temporary is required.
225       */
226      /* pixel_xy.xy = PIXELXY payload[0];
227       */
228      emit_op(c,
229	      WM_PIXELXY,
230	      dst_mask(pixel_xy, WRITEMASK_XY),
231	      0, 0, 0,
232	      payload_r0_depth,
233	      src_undef(),
234	      src_undef());
235
236      c->pixel_xy = src_reg_from_dst(pixel_xy);
237   }
238
239   return c->pixel_xy;
240}
241
242static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
243{
244   if (src_is_undef(c->delta_xy)) {
245      struct prog_dst_register delta_xy = get_temp(c);
246      struct prog_src_register pixel_xy = get_pixel_xy(c);
247      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
248
249      /* deltas.xy = DELTAXY pixel_xy, payload[0]
250       */
251      emit_op(c,
252	      WM_DELTAXY,
253	      dst_mask(delta_xy, WRITEMASK_XY),
254	      0, 0, 0,
255	      pixel_xy,
256	      payload_r0_depth,
257	      src_undef());
258
259      c->delta_xy = src_reg_from_dst(delta_xy);
260   }
261
262   return c->delta_xy;
263}
264
265static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
266{
267   if (src_is_undef(c->pixel_w)) {
268      struct prog_dst_register pixel_w = get_temp(c);
269      struct prog_src_register deltas = get_delta_xy(c);
270      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
271
272
273      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
274       */
275      emit_op(c,
276	      WM_PIXELW,
277	      dst_mask(pixel_w, WRITEMASK_W),
278	      0, 0, 0,
279	      interp_wpos,
280	      deltas,
281	      src_undef());
282
283
284      c->pixel_w = src_reg_from_dst(pixel_w);
285   }
286
287   return c->pixel_w;
288}
289
290static void emit_interp( struct brw_wm_compile *c,
291			 GLuint idx )
292{
293   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
294   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
295   struct prog_src_register deltas = get_delta_xy(c);
296   struct prog_src_register arg2;
297   GLuint opcode;
298
299   /* Need to use PINTERP on attributes which have been
300    * multiplied by 1/W in the SF program, and LINTERP on those
301    * which have not:
302    */
303   switch (idx) {
304   case FRAG_ATTRIB_WPOS:
305      opcode = WM_LINTERP;
306      arg2 = src_undef();
307
308      /* Have to treat wpos.xy specially:
309       */
310      emit_op(c,
311	      WM_WPOSXY,
312	      dst_mask(dst, WRITEMASK_XY),
313	      0, 0, 0,
314	      get_pixel_xy(c),
315	      src_undef(),
316	      src_undef());
317
318      dst = dst_mask(dst, WRITEMASK_ZW);
319
320      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
321       */
322      emit_op(c,
323	      WM_LINTERP,
324	      dst,
325	      0, 0, 0,
326	      interp,
327	      deltas,
328	      arg2);
329      break;
330   case FRAG_ATTRIB_COL0:
331   case FRAG_ATTRIB_COL1:
332      if (c->key.flat_shade) {
333	 emit_op(c,
334		 WM_CINTERP,
335		 dst,
336		 0, 0, 0,
337		 interp,
338		 src_undef(),
339		 src_undef());
340      }
341      else {
342	 emit_op(c,
343		 WM_LINTERP,
344		 dst,
345		 0, 0, 0,
346		 interp,
347		 deltas,
348		 src_undef());
349      }
350      break;
351   default:
352      emit_op(c,
353	      WM_PINTERP,
354	      dst,
355	      0, 0, 0,
356	      interp,
357	      deltas,
358	      get_pixel_w(c));
359      break;
360   }
361
362   c->fp_interp_emitted |= 1<<idx;
363}
364
365static void emit_ddx( struct brw_wm_compile *c,
366        const struct prog_instruction *inst )
367{
368    GLuint idx = inst->SrcReg[0].Index;
369    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
370
371    c->fp_deriv_emitted |= 1<<idx;
372    emit_op(c,
373            OPCODE_DDX,
374            inst->DstReg,
375            0, 0, 0,
376            interp,
377            get_pixel_w(c),
378            src_undef());
379}
380
381static void emit_ddy( struct brw_wm_compile *c,
382        const struct prog_instruction *inst )
383{
384    GLuint idx = inst->SrcReg[0].Index;
385    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
386
387    c->fp_deriv_emitted |= 1<<idx;
388    emit_op(c,
389            OPCODE_DDY,
390            inst->DstReg,
391            0, 0, 0,
392            interp,
393            get_pixel_w(c),
394            src_undef());
395}
396
397/***********************************************************************
398 * Hacks to extend the program parameter and constant lists.
399 */
400
401/* Add the fog parameters to the parameter list of the original
402 * program, rather than creating a new list.  Doesn't really do any
403 * harm and it's not as if the parameter handling isn't a big hack
404 * anyway.
405 */
406static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
407                                                     GLint s0,
408                                                     GLint s1,
409                                                     GLint s2,
410                                                     GLint s3,
411                                                     GLint s4)
412{
413   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
414   gl_state_index tokens[STATE_LENGTH];
415   GLuint idx;
416   tokens[0] = s0;
417   tokens[1] = s1;
418   tokens[2] = s2;
419   tokens[3] = s3;
420   tokens[4] = s4;
421
422   for (idx = 0; idx < paramList->NumParameters; idx++) {
423      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
424	  memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
425	 return src_reg(PROGRAM_STATE_VAR, idx);
426   }
427
428   idx = _mesa_add_state_reference( paramList, tokens );
429
430   return src_reg(PROGRAM_STATE_VAR, idx);
431}
432
433
434static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
435						     GLfloat s0,
436						     GLfloat s1,
437						     GLfloat s2,
438						     GLfloat s3)
439{
440   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
441   GLfloat values[4];
442   GLuint idx;
443   GLuint swizzle;
444
445   values[0] = s0;
446   values[1] = s1;
447   values[2] = s2;
448   values[3] = s3;
449
450   /* Have to search, otherwise multiple compilations will each grow
451    * the parameter list.
452    */
453   for (idx = 0; idx < paramList->NumParameters; idx++) {
454      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
455	  memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
456
457	 /* XXX: this mimics the mesa bug which puts all constants and
458	  * parameters into the "PROGRAM_STATE_VAR" category:
459	  */
460	 return src_reg(PROGRAM_STATE_VAR, idx);
461   }
462
463   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
464   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
465   return src_reg(PROGRAM_STATE_VAR, idx);
466}
467
468
469
470/***********************************************************************
471 * Expand various instructions here to simpler forms.
472 */
473static void precalc_dst( struct brw_wm_compile *c,
474			       const struct prog_instruction *inst )
475{
476   struct prog_src_register src0 = inst->SrcReg[0];
477   struct prog_src_register src1 = inst->SrcReg[1];
478   struct prog_dst_register dst = inst->DstReg;
479
480   if (dst.WriteMask & WRITEMASK_Y) {
481      /* dst.y = mul src0.y, src1.y
482       */
483      emit_op(c,
484	      OPCODE_MUL,
485	      dst_mask(dst, WRITEMASK_Y),
486	      inst->SaturateMode, 0, 0,
487	      src0,
488	      src1,
489	      src_undef());
490   }
491
492
493   if (dst.WriteMask & WRITEMASK_XZ) {
494      struct prog_instruction *swz;
495      GLuint z = GET_SWZ(src0.Swizzle, Z);
496
497      /* dst.xz = swz src0.1zzz
498       */
499      swz = emit_op(c,
500		    OPCODE_SWZ,
501		    dst_mask(dst, WRITEMASK_XZ),
502		    inst->SaturateMode, 0, 0,
503		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
504		    src_undef(),
505		    src_undef());
506      /* Avoid letting negation flag of src0 affect our 1 constant. */
507      swz->SrcReg[0].NegateBase &= ~NEGATE_X;
508   }
509   if (dst.WriteMask & WRITEMASK_W) {
510      /* dst.w = mov src1.w
511       */
512      emit_op(c,
513	      OPCODE_MOV,
514	      dst_mask(dst, WRITEMASK_W),
515	      inst->SaturateMode, 0, 0,
516	      src1,
517	      src_undef(),
518	      src_undef());
519   }
520}
521
522
523static void precalc_lit( struct brw_wm_compile *c,
524			 const struct prog_instruction *inst )
525{
526   struct prog_src_register src0 = inst->SrcReg[0];
527   struct prog_dst_register dst = inst->DstReg;
528
529   if (dst.WriteMask & WRITEMASK_XW) {
530      struct prog_instruction *swz;
531
532      /* dst.xw = swz src0.1111
533       */
534      swz = emit_op(c,
535		    OPCODE_SWZ,
536		    dst_mask(dst, WRITEMASK_XW),
537		    0, 0, 0,
538		    src_swizzle1(src0, SWIZZLE_ONE),
539		    src_undef(),
540		    src_undef());
541      /* Avoid letting the negation flag of src0 affect our 1 constant. */
542      swz->SrcReg[0].NegateBase = 0;
543   }
544
545
546   if (dst.WriteMask & WRITEMASK_YZ) {
547      emit_op(c,
548	      OPCODE_LIT,
549	      dst_mask(dst, WRITEMASK_YZ),
550	      inst->SaturateMode, 0, 0,
551	      src0,
552	      src_undef(),
553	      src_undef());
554   }
555}
556
557
558/**
559 * Some TEX instructions require extra code, cube map coordinate
560 * normalization, or coordinate scaling for RECT textures, etc.
561 * This function emits those extra instructions and the TEX
562 * instruction itself.
563 */
564static void precalc_tex( struct brw_wm_compile *c,
565			 const struct prog_instruction *inst )
566{
567   struct prog_src_register coord;
568   struct prog_dst_register tmpcoord;
569   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
570
571   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
572       struct prog_instruction *out;
573       struct prog_dst_register tmp0 = get_temp(c);
574       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
575       struct prog_dst_register tmp1 = get_temp(c);
576       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
577       struct prog_src_register src0 = inst->SrcReg[0];
578
579       /* find longest component of coord vector and normalize it */
580       tmpcoord = get_temp(c);
581       coord = src_reg_from_dst(tmpcoord);
582
583       /* tmpcoord = src0 (i.e.: coord = src0) */
584       out = emit_op(c, OPCODE_MOV,
585                     tmpcoord,
586                     0, 0, 0,
587                     src0,
588                     src_undef(),
589                     src_undef());
590       out->SrcReg[0].NegateBase = 0;
591       out->SrcReg[0].Abs = 1;
592
593       /* tmp0 = MAX(coord.X, coord.Y) */
594       emit_op(c, OPCODE_MAX,
595               tmp0,
596               0, 0, 0,
597               src_swizzle1(coord, X),
598               src_swizzle1(coord, Y),
599               src_undef());
600
601       /* tmp1 = MAX(tmp0, coord.Z) */
602       emit_op(c, OPCODE_MAX,
603               tmp1,
604               0, 0, 0,
605               tmp0src,
606               src_swizzle1(coord, Z),
607               src_undef());
608
609       /* tmp0 = 1 / tmp1 */
610       emit_op(c, OPCODE_RCP,
611               tmp0,
612               0, 0, 0,
613               tmp1src,
614               src_undef(),
615               src_undef());
616
617       /* tmpCoord = src0 * tmp0 */
618       emit_op(c, OPCODE_MUL,
619               tmpcoord,
620               0, 0, 0,
621               src0,
622               tmp0src,
623               src_undef());
624
625       release_temp(c, tmp0);
626       release_temp(c, tmp1);
627   }
628   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
629      struct prog_src_register scale =
630	 search_or_add_param5( c,
631			       STATE_INTERNAL,
632			       STATE_TEXRECT_SCALE,
633			       unit,
634			       0,0 );
635
636      tmpcoord = get_temp(c);
637
638      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
639       */
640      emit_op(c,
641	      OPCODE_MUL,
642	      tmpcoord,
643	      0, 0, 0,
644	      inst->SrcReg[0],
645	      scale,
646	      src_undef());
647
648      coord = src_reg_from_dst(tmpcoord);
649   }
650   else {
651      coord = inst->SrcReg[0];
652   }
653
654   /* Need to emit YUV texture conversions by hand.  Probably need to
655    * do this here - the alternative is in brw_wm_emit.c, but the
656    * conversion requires allocating a temporary variable which we
657    * don't have the facility to do that late in the compilation.
658    */
659   if (c->key.yuvtex_mask & (1 << unit)) {
660      /* convert ycbcr to RGBA */
661      GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
662
663      /*
664	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
665	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
666	 UYV     = TEX ...
667	 UYV.xyz = ADD UYV,     C0
668	 UYV.y   = MUL UYV.y,   C0.w
669 	 if (UV swaped)
670	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
671	 else
672	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y
673	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
674      */
675      struct prog_dst_register dst = inst->DstReg;
676      struct prog_dst_register tmp = get_temp(c);
677      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
678      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
679      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
680
681      /* tmp     = TEX ...
682       */
683      emit_op(c,
684	      OPCODE_TEX,
685	      tmp,
686	      inst->SaturateMode,
687	      unit,
688	      inst->TexSrcTarget,
689	      coord,
690	      src_undef(),
691	      src_undef());
692
693      /* tmp.xyz =  ADD TMP, C0
694       */
695      emit_op(c,
696	      OPCODE_ADD,
697	      dst_mask(tmp, WRITEMASK_XYZ),
698	      0, 0, 0,
699	      tmpsrc,
700	      C0,
701	      src_undef());
702
703      /* YUV.y   = MUL YUV.y, C0.w
704       */
705
706      emit_op(c,
707	      OPCODE_MUL,
708	      dst_mask(tmp, WRITEMASK_Y),
709	      0, 0, 0,
710	      tmpsrc,
711	      src_swizzle1(C0, W),
712	      src_undef());
713
714      /*
715       * if (UV swaped)
716       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
717       * else
718       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
719       */
720
721      emit_op(c,
722	      OPCODE_MAD,
723	      dst_mask(dst, WRITEMASK_XYZ),
724	      0, 0, 0,
725	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
726	      C1,
727	      src_swizzle1(tmpsrc, Y));
728
729      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
730       */
731      emit_op(c,
732	      OPCODE_MAD,
733	      dst_mask(dst, WRITEMASK_Y),
734	      0, 0, 0,
735	      src_swizzle1(tmpsrc, Z),
736	      src_swizzle1(C1, W),
737	      src_swizzle1(src_reg_from_dst(dst), Y));
738
739      release_temp(c, tmp);
740   }
741   else {
742      /* ordinary RGBA tex instruction */
743      emit_op(c,
744	      OPCODE_TEX,
745	      inst->DstReg,
746	      inst->SaturateMode,
747	      unit,
748	      inst->TexSrcTarget,
749	      coord,
750	      src_undef(),
751	      src_undef());
752   }
753
754   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
755       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
756      release_temp(c, tmpcoord);
757}
758
759
760static GLboolean projtex( struct brw_wm_compile *c,
761			  const struct prog_instruction *inst )
762{
763   struct prog_src_register src = inst->SrcReg[0];
764
765   /* Only try to detect the simplest cases.  Could detect (later)
766    * cases where we are trying to emit code like RCP {1.0}, MUL x,
767    * {1.0}, and so on.
768    *
769    * More complex cases than this typically only arise from
770    * user-provided fragment programs anyway:
771    */
772   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
773      return 0;  /* ut2004 gun rendering !?! */
774   else if (src.File == PROGRAM_INPUT &&
775	    GET_SWZ(src.Swizzle, W) == W &&
776           (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
777      return 0;
778   else
779      return 1;
780}
781
782
783static void precalc_txp( struct brw_wm_compile *c,
784			       const struct prog_instruction *inst )
785{
786   struct prog_src_register src0 = inst->SrcReg[0];
787
788   if (projtex(c, inst)) {
789      struct prog_dst_register tmp = get_temp(c);
790      struct prog_instruction tmp_inst;
791
792      /* tmp0.w = RCP inst.arg[0][3]
793       */
794      emit_op(c,
795	      OPCODE_RCP,
796	      dst_mask(tmp, WRITEMASK_W),
797	      0, 0, 0,
798	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
799	      src_undef(),
800	      src_undef());
801
802      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
803       */
804      emit_op(c,
805	      OPCODE_MUL,
806	      dst_mask(tmp, WRITEMASK_XYZ),
807	      0, 0, 0,
808	      src0,
809	      src_swizzle1(src_reg_from_dst(tmp), W),
810	      src_undef());
811
812      /* dst = precalc(TEX tmp0)
813       */
814      tmp_inst = *inst;
815      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
816      precalc_tex(c, &tmp_inst);
817
818      release_temp(c, tmp);
819   }
820   else
821   {
822      /* dst = precalc(TEX src0)
823       */
824      precalc_tex(c, inst);
825   }
826}
827
828
829
830static void emit_fb_write( struct brw_wm_compile *c )
831{
832   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
833   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
834   struct prog_src_register outcolor;
835   GLuint i;
836
837   struct prog_instruction *inst, *last_inst;
838   struct brw_context *brw = c->func.brw;
839
840   /* inst->Sampler is not used by backend,
841      use it for fb write target and eot */
842
843   if (brw->state.nr_draw_regions > 1) {
844       for (i = 0 ; i < brw->state.nr_draw_regions; i++) {
845	   outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
846	   last_inst = inst = emit_op(c,
847		   WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0,
848		   outcolor, payload_r0_depth, outdepth);
849	   inst->Sampler = (i<<1);
850	   if (c->fp_fragcolor_emitted) {
851	       outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
852	       last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
853		       0, 0, 0, outcolor, payload_r0_depth, outdepth);
854	       inst->Sampler = (i<<1);
855	   }
856       }
857       last_inst->Sampler |= 1; //eot
858   }
859   else {
860      /* if gl_FragData[0] is written, use it, else use gl_FragColor */
861      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
862         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
863      else
864         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
865
866       inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
867	       0, 0, 0, outcolor, payload_r0_depth, outdepth);
868       inst->Sampler = 1|(0<<1);
869   }
870}
871
872
873
874
875/***********************************************************************
876 * Emit INTERP instructions ahead of first use of each attrib.
877 */
878
879static void validate_src_regs( struct brw_wm_compile *c,
880			       const struct prog_instruction *inst )
881{
882   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
883   GLuint i;
884
885   for (i = 0; i < nr_args; i++) {
886      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
887	 GLuint idx = inst->SrcReg[i].Index;
888	 if (!(c->fp_interp_emitted & (1<<idx))) {
889	    emit_interp(c, idx);
890	 }
891      }
892   }
893}
894
895static void validate_dst_regs( struct brw_wm_compile *c,
896			       const struct prog_instruction *inst )
897{
898   if (inst->DstReg.File == PROGRAM_OUTPUT) {
899       GLuint idx = inst->DstReg.Index;
900       if (idx == FRAG_RESULT_COLR)
901	   c->fp_fragcolor_emitted = 1;
902   }
903}
904
905static void print_insns( const struct prog_instruction *insn,
906			 GLuint nr )
907{
908   GLuint i;
909   for (i = 0; i < nr; i++, insn++) {
910      _mesa_printf("%3d: ", i);
911      if (insn->Opcode < MAX_OPCODE)
912	 _mesa_print_instruction(insn);
913      else if (insn->Opcode < MAX_WM_OPCODE) {
914	 GLuint idx = insn->Opcode - MAX_OPCODE;
915
916	 _mesa_print_alu_instruction(insn,
917				     wm_opcode_strings[idx],
918				     3);
919      }
920      else
921	 _mesa_printf("UNKNOWN\n");
922
923   }
924}
925
926void brw_wm_pass_fp( struct brw_wm_compile *c )
927{
928   struct brw_fragment_program *fp = c->fp;
929   GLuint insn;
930
931   if (INTEL_DEBUG & DEBUG_WM) {
932      _mesa_printf("pre-fp:\n");
933      _mesa_print_program(&fp->program.Base);
934      _mesa_printf("\n");
935   }
936
937   c->pixel_xy = src_undef();
938   c->delta_xy = src_undef();
939   c->pixel_w = src_undef();
940   c->nr_fp_insns = 0;
941
942   /* Emit preamble instructions:
943    */
944
945
946   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
947      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
948      validate_src_regs(c, inst);
949      validate_dst_regs(c, inst);
950   }
951   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
952      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
953      struct prog_instruction *out;
954
955      /* Check for INPUT values, emit INTERP instructions where
956       * necessary:
957       */
958
959
960      switch (inst->Opcode) {
961      case OPCODE_SWZ:
962	 out = emit_insn(c, inst);
963	 out->Opcode = OPCODE_MOV;
964	 break;
965
966      case OPCODE_ABS:
967	 out = emit_insn(c, inst);
968	 out->Opcode = OPCODE_MOV;
969	 out->SrcReg[0].NegateBase = 0;
970	 out->SrcReg[0].Abs = 1;
971	 break;
972
973      case OPCODE_SUB:
974	 out = emit_insn(c, inst);
975	 out->Opcode = OPCODE_ADD;
976	 out->SrcReg[1].NegateBase ^= 0xf;
977	 break;
978
979      case OPCODE_SCS:
980	 out = emit_insn(c, inst);
981	 /* This should probably be done in the parser.
982	  */
983	 out->DstReg.WriteMask &= WRITEMASK_XY;
984	 break;
985
986      case OPCODE_DST:
987	 precalc_dst(c, inst);
988	 break;
989
990      case OPCODE_LIT:
991	 precalc_lit(c, inst);
992	 break;
993
994      case OPCODE_TEX:
995	 precalc_tex(c, inst);
996	 break;
997
998      case OPCODE_TXP:
999	 precalc_txp(c, inst);
1000	 break;
1001
1002      case OPCODE_TXB:
1003	 out = emit_insn(c, inst);
1004	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1005	 break;
1006
1007      case OPCODE_XPD:
1008	 out = emit_insn(c, inst);
1009	 /* This should probably be done in the parser.
1010	  */
1011	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1012	 break;
1013
1014      case OPCODE_KIL:
1015	 out = emit_insn(c, inst);
1016	 /* This should probably be done in the parser.
1017	  */
1018	 out->DstReg.WriteMask = 0;
1019	 break;
1020      case OPCODE_DDX:
1021	 emit_ddx(c, inst);
1022	 break;
1023      case OPCODE_DDY:
1024         emit_ddy(c, inst);
1025	break;
1026      case OPCODE_END:
1027	 emit_fb_write(c);
1028	 break;
1029      case OPCODE_PRINT:
1030	 break;
1031
1032      default:
1033	 emit_insn(c, inst);
1034	 break;
1035      }
1036   }
1037
1038   if (INTEL_DEBUG & DEBUG_WM) {
1039	   _mesa_printf("pass_fp:\n");
1040	   print_insns( c->prog_instructions, c->nr_fp_insns );
1041	   _mesa_printf("\n");
1042   }
1043}
1044
1045