brw_wm_fp.c revision 8ae7e7749b708fc5a46180d3de2503ba7e2ab1f3
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36#include "brw_context.h"
37#include "brw_wm.h"
38#include "brw_util.h"
39
40#include "shader/prog_parameter.h"
41#include "shader/prog_print.h"
42#include "shader/prog_statevars.h"
43
44
45#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47#define X    0
48#define Y    1
49#define Z    2
50#define W    3
51
52
53static const char *wm_opcode_strings[] = {
54   "PIXELXY",
55   "DELTAXY",
56   "PIXELW",
57   "LINTERP",
58   "PINTERP",
59   "CINTERP",
60   "WPOSXY",
61   "FB_WRITE"
62};
63
64#if 0
65static const char *wm_file_strings[] = {
66   "PAYLOAD"
67};
68#endif
69
70
71/***********************************************************************
72 * Source regs
73 */
74
75static struct prog_src_register src_reg(GLuint file, GLuint idx)
76{
77   struct prog_src_register reg;
78   reg.File = file;
79   reg.Index = idx;
80   reg.Swizzle = SWIZZLE_NOOP;
81   reg.RelAddr = 0;
82   reg.NegateBase = 0;
83   reg.Abs = 0;
84   reg.NegateAbs = 0;
85   return reg;
86}
87
88static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89{
90   return src_reg(dst.File, dst.Index);
91}
92
93static struct prog_src_register src_undef( void )
94{
95   return src_reg(PROGRAM_UNDEFINED, 0);
96}
97
98static GLboolean src_is_undef(struct prog_src_register src)
99{
100   return src.File == PROGRAM_UNDEFINED;
101}
102
103static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104{
105   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106   return reg;
107}
108
109static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110{
111   return src_swizzle(reg, x, x, x, x);
112}
113
114static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
115{
116   reg.Swizzle = swizzle;
117   return reg;
118}
119
120
121/***********************************************************************
122 * Dest regs
123 */
124
125static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
126{
127   struct prog_dst_register reg;
128   reg.File = file;
129   reg.Index = idx;
130   reg.WriteMask = WRITEMASK_XYZW;
131   reg.RelAddr = 0;
132   reg.CondMask = 0;
133   reg.CondSwizzle = 0;
134   reg.CondSrc = 0;
135   reg.pad = 0;
136   return reg;
137}
138
139static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
140{
141   reg.WriteMask &= mask;
142   return reg;
143}
144
145static struct prog_dst_register dst_undef( void )
146{
147   return dst_reg(PROGRAM_UNDEFINED, 0);
148}
149
150
151
152static struct prog_dst_register get_temp( struct brw_wm_compile *c )
153{
154   int bit = _mesa_ffs( ~c->fp_temp );
155
156   if (!bit) {
157      _mesa_printf("%s: out of temporaries\n", __FILE__);
158      exit(1);
159   }
160
161   c->fp_temp |= 1<<(bit-1);
162   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
163}
164
165
166static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
167{
168   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
169}
170
171
172/***********************************************************************
173 * Instructions
174 */
175
176static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
177{
178   return &c->prog_instructions[c->nr_fp_insns++];
179}
180
181static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
182					const struct prog_instruction *inst0)
183{
184   struct prog_instruction *inst = get_fp_inst(c);
185   *inst = *inst0;
186   return inst;
187}
188
189static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
190				       GLuint op,
191				       struct prog_dst_register dest,
192				       GLuint saturate,
193				       GLuint tex_src_unit,
194				       GLuint tex_src_target,
195				       GLuint tex_shadow,
196				       struct prog_src_register src0,
197				       struct prog_src_register src1,
198				       struct prog_src_register src2 )
199{
200   struct prog_instruction *inst = get_fp_inst(c);
201
202   memset(inst, 0, sizeof(*inst));
203
204   inst->Opcode = op;
205   inst->DstReg = dest;
206   inst->SaturateMode = saturate;
207   inst->TexSrcUnit = tex_src_unit;
208   inst->TexSrcTarget = tex_src_target;
209   inst->TexShadow = tex_shadow;
210   inst->SrcReg[0] = src0;
211   inst->SrcReg[1] = src1;
212   inst->SrcReg[2] = src2;
213   return inst;
214}
215
216
217static struct prog_instruction * emit_op(struct brw_wm_compile *c,
218				       GLuint op,
219				       struct prog_dst_register dest,
220				       GLuint saturate,
221				       struct prog_src_register src0,
222				       struct prog_src_register src1,
223				       struct prog_src_register src2 )
224{
225   return emit_tex_op(c, op, dest, saturate,
226                      0, 0, 0,  /* tex unit, target, shadow */
227                      src0, src1, src2);
228}
229
230
231
232
233/***********************************************************************
234 * Special instructions for interpolation and other tasks
235 */
236
237static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
238{
239   if (src_is_undef(c->pixel_xy)) {
240      struct prog_dst_register pixel_xy = get_temp(c);
241      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
242
243
244      /* Emit the out calculations, and hold onto the results.  Use
245       * two instructions as a temporary is required.
246       */
247      /* pixel_xy.xy = PIXELXY payload[0];
248       */
249      emit_op(c,
250	      WM_PIXELXY,
251	      dst_mask(pixel_xy, WRITEMASK_XY),
252	      0,
253	      payload_r0_depth,
254	      src_undef(),
255	      src_undef());
256
257      c->pixel_xy = src_reg_from_dst(pixel_xy);
258   }
259
260   return c->pixel_xy;
261}
262
263static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
264{
265   if (src_is_undef(c->delta_xy)) {
266      struct prog_dst_register delta_xy = get_temp(c);
267      struct prog_src_register pixel_xy = get_pixel_xy(c);
268      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
269
270      /* deltas.xy = DELTAXY pixel_xy, payload[0]
271       */
272      emit_op(c,
273	      WM_DELTAXY,
274	      dst_mask(delta_xy, WRITEMASK_XY),
275	      0,
276	      pixel_xy,
277	      payload_r0_depth,
278	      src_undef());
279
280      c->delta_xy = src_reg_from_dst(delta_xy);
281   }
282
283   return c->delta_xy;
284}
285
286static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
287{
288   if (src_is_undef(c->pixel_w)) {
289      struct prog_dst_register pixel_w = get_temp(c);
290      struct prog_src_register deltas = get_delta_xy(c);
291      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
292
293      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
294       */
295      emit_op(c,
296	      WM_PIXELW,
297	      dst_mask(pixel_w, WRITEMASK_W),
298	      0,
299	      interp_wpos,
300	      deltas,
301	      src_undef());
302
303
304      c->pixel_w = src_reg_from_dst(pixel_w);
305   }
306
307   return c->pixel_w;
308}
309
310static void emit_interp( struct brw_wm_compile *c,
311			 GLuint idx )
312{
313   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
314   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
315   struct prog_src_register deltas = get_delta_xy(c);
316   struct prog_src_register arg2;
317   GLuint opcode;
318
319   /* Need to use PINTERP on attributes which have been
320    * multiplied by 1/W in the SF program, and LINTERP on those
321    * which have not:
322    */
323   switch (idx) {
324   case FRAG_ATTRIB_WPOS:
325      opcode = WM_LINTERP;
326      arg2 = src_undef();
327
328      /* Have to treat wpos.xy specially:
329       */
330      emit_op(c,
331	      WM_WPOSXY,
332	      dst_mask(dst, WRITEMASK_XY),
333	      0,
334	      get_pixel_xy(c),
335	      src_undef(),
336	      src_undef());
337
338      dst = dst_mask(dst, WRITEMASK_ZW);
339
340      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
341       */
342      emit_op(c,
343	      WM_LINTERP,
344	      dst,
345	      0,
346	      interp,
347	      deltas,
348	      arg2);
349      break;
350   case FRAG_ATTRIB_COL0:
351   case FRAG_ATTRIB_COL1:
352      if (c->key.flat_shade) {
353	 emit_op(c,
354		 WM_CINTERP,
355		 dst,
356		 0,
357		 interp,
358		 src_undef(),
359		 src_undef());
360      }
361      else {
362	 emit_op(c,
363		 WM_LINTERP,
364		 dst,
365		 0,
366		 interp,
367		 deltas,
368		 src_undef());
369      }
370      break;
371   default:
372      emit_op(c,
373	      WM_PINTERP,
374	      dst,
375	      0,
376	      interp,
377	      deltas,
378	      get_pixel_w(c));
379      break;
380   }
381
382   c->fp_interp_emitted |= 1<<idx;
383}
384
385static void emit_ddx( struct brw_wm_compile *c,
386        const struct prog_instruction *inst )
387{
388    GLuint idx = inst->SrcReg[0].Index;
389    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
390
391    c->fp_deriv_emitted |= 1<<idx;
392    emit_op(c,
393            OPCODE_DDX,
394            inst->DstReg,
395            0,
396            interp,
397            get_pixel_w(c),
398            src_undef());
399}
400
401static void emit_ddy( struct brw_wm_compile *c,
402        const struct prog_instruction *inst )
403{
404    GLuint idx = inst->SrcReg[0].Index;
405    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
406
407    c->fp_deriv_emitted |= 1<<idx;
408    emit_op(c,
409            OPCODE_DDY,
410            inst->DstReg,
411            0,
412            interp,
413            get_pixel_w(c),
414            src_undef());
415}
416
417/***********************************************************************
418 * Hacks to extend the program parameter and constant lists.
419 */
420
421/* Add the fog parameters to the parameter list of the original
422 * program, rather than creating a new list.  Doesn't really do any
423 * harm and it's not as if the parameter handling isn't a big hack
424 * anyway.
425 */
426static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
427                                                     GLint s0,
428                                                     GLint s1,
429                                                     GLint s2,
430                                                     GLint s3,
431                                                     GLint s4)
432{
433   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
434   gl_state_index tokens[STATE_LENGTH];
435   GLuint idx;
436   tokens[0] = s0;
437   tokens[1] = s1;
438   tokens[2] = s2;
439   tokens[3] = s3;
440   tokens[4] = s4;
441
442   for (idx = 0; idx < paramList->NumParameters; idx++) {
443      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
444	  memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
445	 return src_reg(PROGRAM_STATE_VAR, idx);
446   }
447
448   idx = _mesa_add_state_reference( paramList, tokens );
449
450   return src_reg(PROGRAM_STATE_VAR, idx);
451}
452
453
454static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
455						     GLfloat s0,
456						     GLfloat s1,
457						     GLfloat s2,
458						     GLfloat s3)
459{
460   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
461   GLfloat values[4];
462   GLuint idx;
463   GLuint swizzle;
464
465   values[0] = s0;
466   values[1] = s1;
467   values[2] = s2;
468   values[3] = s3;
469
470   /* Have to search, otherwise multiple compilations will each grow
471    * the parameter list.
472    */
473   for (idx = 0; idx < paramList->NumParameters; idx++) {
474      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
475	  memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
476
477	 /* XXX: this mimics the mesa bug which puts all constants and
478	  * parameters into the "PROGRAM_STATE_VAR" category:
479	  */
480	 return src_reg(PROGRAM_STATE_VAR, idx);
481   }
482
483   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
484   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
485   return src_reg(PROGRAM_STATE_VAR, idx);
486}
487
488
489
490/***********************************************************************
491 * Expand various instructions here to simpler forms.
492 */
493static void precalc_dst( struct brw_wm_compile *c,
494			       const struct prog_instruction *inst )
495{
496   struct prog_src_register src0 = inst->SrcReg[0];
497   struct prog_src_register src1 = inst->SrcReg[1];
498   struct prog_dst_register dst = inst->DstReg;
499
500   if (dst.WriteMask & WRITEMASK_Y) {
501      /* dst.y = mul src0.y, src1.y
502       */
503      emit_op(c,
504	      OPCODE_MUL,
505	      dst_mask(dst, WRITEMASK_Y),
506	      inst->SaturateMode,
507	      src0,
508	      src1,
509	      src_undef());
510   }
511
512   if (dst.WriteMask & WRITEMASK_XZ) {
513      struct prog_instruction *swz;
514      GLuint z = GET_SWZ(src0.Swizzle, Z);
515
516      /* dst.xz = swz src0.1zzz
517       */
518      swz = emit_op(c,
519		    OPCODE_SWZ,
520		    dst_mask(dst, WRITEMASK_XZ),
521		    inst->SaturateMode,
522		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
523		    src_undef(),
524		    src_undef());
525      /* Avoid letting negation flag of src0 affect our 1 constant. */
526      swz->SrcReg[0].NegateBase &= ~NEGATE_X;
527   }
528   if (dst.WriteMask & WRITEMASK_W) {
529      /* dst.w = mov src1.w
530       */
531      emit_op(c,
532	      OPCODE_MOV,
533	      dst_mask(dst, WRITEMASK_W),
534	      inst->SaturateMode,
535	      src1,
536	      src_undef(),
537	      src_undef());
538   }
539}
540
541
542static void precalc_lit( struct brw_wm_compile *c,
543			 const struct prog_instruction *inst )
544{
545   struct prog_src_register src0 = inst->SrcReg[0];
546   struct prog_dst_register dst = inst->DstReg;
547
548   if (dst.WriteMask & WRITEMASK_XW) {
549      struct prog_instruction *swz;
550
551      /* dst.xw = swz src0.1111
552       */
553      swz = emit_op(c,
554		    OPCODE_SWZ,
555		    dst_mask(dst, WRITEMASK_XW),
556		    0,
557		    src_swizzle1(src0, SWIZZLE_ONE),
558		    src_undef(),
559		    src_undef());
560      /* Avoid letting the negation flag of src0 affect our 1 constant. */
561      swz->SrcReg[0].NegateBase = 0;
562   }
563
564   if (dst.WriteMask & WRITEMASK_YZ) {
565      emit_op(c,
566	      OPCODE_LIT,
567	      dst_mask(dst, WRITEMASK_YZ),
568	      inst->SaturateMode,
569	      src0,
570	      src_undef(),
571	      src_undef());
572   }
573}
574
575
576/**
577 * Some TEX instructions require extra code, cube map coordinate
578 * normalization, or coordinate scaling for RECT textures, etc.
579 * This function emits those extra instructions and the TEX
580 * instruction itself.
581 */
582static void precalc_tex( struct brw_wm_compile *c,
583			 const struct prog_instruction *inst )
584{
585   struct prog_src_register coord;
586   struct prog_dst_register tmpcoord;
587   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
588
589   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
590       struct prog_instruction *out;
591       struct prog_dst_register tmp0 = get_temp(c);
592       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
593       struct prog_dst_register tmp1 = get_temp(c);
594       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
595       struct prog_src_register src0 = inst->SrcReg[0];
596
597       /* find longest component of coord vector and normalize it */
598       tmpcoord = get_temp(c);
599       coord = src_reg_from_dst(tmpcoord);
600
601       /* tmpcoord = src0 (i.e.: coord = src0) */
602       out = emit_op(c, OPCODE_MOV,
603                     tmpcoord,
604                     0,
605                     src0,
606                     src_undef(),
607                     src_undef());
608       out->SrcReg[0].NegateBase = 0;
609       out->SrcReg[0].Abs = 1;
610
611       /* tmp0 = MAX(coord.X, coord.Y) */
612       emit_op(c, OPCODE_MAX,
613               tmp0,
614               0,
615               src_swizzle1(coord, X),
616               src_swizzle1(coord, Y),
617               src_undef());
618
619       /* tmp1 = MAX(tmp0, coord.Z) */
620       emit_op(c, OPCODE_MAX,
621               tmp1,
622               0,
623               tmp0src,
624               src_swizzle1(coord, Z),
625               src_undef());
626
627       /* tmp0 = 1 / tmp1 */
628       emit_op(c, OPCODE_RCP,
629               tmp0,
630               0,
631               tmp1src,
632               src_undef(),
633               src_undef());
634
635       /* tmpCoord = src0 * tmp0 */
636       emit_op(c, OPCODE_MUL,
637               tmpcoord,
638               0,
639               src0,
640               tmp0src,
641               src_undef());
642
643       release_temp(c, tmp0);
644       release_temp(c, tmp1);
645   }
646   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
647      struct prog_src_register scale =
648	 search_or_add_param5( c,
649			       STATE_INTERNAL,
650			       STATE_TEXRECT_SCALE,
651			       unit,
652			       0,0 );
653
654      tmpcoord = get_temp(c);
655
656      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
657       */
658      emit_op(c,
659	      OPCODE_MUL,
660	      tmpcoord,
661	      0,
662	      inst->SrcReg[0],
663	      scale,
664	      src_undef());
665
666      coord = src_reg_from_dst(tmpcoord);
667   }
668   else {
669      coord = inst->SrcReg[0];
670   }
671
672   /* Need to emit YUV texture conversions by hand.  Probably need to
673    * do this here - the alternative is in brw_wm_emit.c, but the
674    * conversion requires allocating a temporary variable which we
675    * don't have the facility to do that late in the compilation.
676    */
677   if (c->key.yuvtex_mask & (1 << unit)) {
678      /* convert ycbcr to RGBA */
679      GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
680
681      /*
682	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
683	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
684	 UYV     = TEX ...
685	 UYV.xyz = ADD UYV,     C0
686	 UYV.y   = MUL UYV.y,   C0.w
687 	 if (UV swaped)
688	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
689	 else
690	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y
691	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
692      */
693      struct prog_dst_register dst = inst->DstReg;
694      struct prog_dst_register tmp = get_temp(c);
695      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
696      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
697      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
698
699      /* tmp     = TEX ...
700       */
701      emit_tex_op(c,
702                  OPCODE_TEX,
703                  tmp,
704                  inst->SaturateMode,
705                  unit,
706                  inst->TexSrcTarget,
707                  inst->TexShadow,
708                  coord,
709                  src_undef(),
710                  src_undef());
711
712      /* tmp.xyz =  ADD TMP, C0
713       */
714      emit_op(c,
715	      OPCODE_ADD,
716	      dst_mask(tmp, WRITEMASK_XYZ),
717	      0,
718	      tmpsrc,
719	      C0,
720	      src_undef());
721
722      /* YUV.y   = MUL YUV.y, C0.w
723       */
724
725      emit_op(c,
726	      OPCODE_MUL,
727	      dst_mask(tmp, WRITEMASK_Y),
728	      0,
729	      tmpsrc,
730	      src_swizzle1(C0, W),
731	      src_undef());
732
733      /*
734       * if (UV swaped)
735       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
736       * else
737       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
738       */
739
740      emit_op(c,
741	      OPCODE_MAD,
742	      dst_mask(dst, WRITEMASK_XYZ),
743	      0,
744	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
745	      C1,
746	      src_swizzle1(tmpsrc, Y));
747
748      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
749       */
750      emit_op(c,
751	      OPCODE_MAD,
752	      dst_mask(dst, WRITEMASK_Y),
753	      0,
754	      src_swizzle1(tmpsrc, Z),
755	      src_swizzle1(C1, W),
756	      src_swizzle1(src_reg_from_dst(dst), Y));
757
758      release_temp(c, tmp);
759   }
760   else {
761      /* ordinary RGBA tex instruction */
762      emit_tex_op(c,
763                  OPCODE_TEX,
764                  inst->DstReg,
765                  inst->SaturateMode,
766                  unit,
767                  inst->TexSrcTarget,
768                  inst->TexShadow,
769                  coord,
770                  src_undef(),
771                  src_undef());
772   }
773
774   /* For GL_EXT_texture_swizzle: */
775   if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
776      /* swizzle the result of the TEX instruction */
777      struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
778      emit_op(c, OPCODE_SWZ,
779              inst->DstReg,
780              SATURATE_OFF, /* saturate already done above */
781              src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
782              src_undef(),
783              src_undef());
784   }
785
786   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
787       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
788      release_temp(c, tmpcoord);
789}
790
791
792static GLboolean projtex( struct brw_wm_compile *c,
793			  const struct prog_instruction *inst )
794{
795   struct prog_src_register src = inst->SrcReg[0];
796
797   /* Only try to detect the simplest cases.  Could detect (later)
798    * cases where we are trying to emit code like RCP {1.0}, MUL x,
799    * {1.0}, and so on.
800    *
801    * More complex cases than this typically only arise from
802    * user-provided fragment programs anyway:
803    */
804   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
805      return 0;  /* ut2004 gun rendering !?! */
806   else if (src.File == PROGRAM_INPUT &&
807	    GET_SWZ(src.Swizzle, W) == W &&
808           (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
809      return 0;
810   else
811      return 1;
812}
813
814
815static void precalc_txp( struct brw_wm_compile *c,
816			       const struct prog_instruction *inst )
817{
818   struct prog_src_register src0 = inst->SrcReg[0];
819
820   if (projtex(c, inst)) {
821      struct prog_dst_register tmp = get_temp(c);
822      struct prog_instruction tmp_inst;
823
824      /* tmp0.w = RCP inst.arg[0][3]
825       */
826      emit_op(c,
827	      OPCODE_RCP,
828	      dst_mask(tmp, WRITEMASK_W),
829	      0,
830	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
831	      src_undef(),
832	      src_undef());
833
834      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
835       */
836      emit_op(c,
837	      OPCODE_MUL,
838	      dst_mask(tmp, WRITEMASK_XYZ),
839	      0,
840	      src0,
841	      src_swizzle1(src_reg_from_dst(tmp), W),
842	      src_undef());
843
844      /* dst = precalc(TEX tmp0)
845       */
846      tmp_inst = *inst;
847      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
848      precalc_tex(c, &tmp_inst);
849
850      release_temp(c, tmp);
851   }
852   else
853   {
854      /* dst = precalc(TEX src0)
855       */
856      precalc_tex(c, inst);
857   }
858}
859
860
861
862static void emit_fb_write( struct brw_wm_compile *c )
863{
864   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
865   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
866   struct prog_src_register outcolor;
867   GLuint i;
868
869   struct prog_instruction *inst, *last_inst;
870   struct brw_context *brw = c->func.brw;
871
872   /* The inst->Aux field is used for FB write target and the EOT marker */
873
874   if (brw->state.nr_draw_regions > 1) {
875      for (i = 0 ; i < brw->state.nr_draw_regions; i++) {
876         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
877         last_inst = inst = emit_op(c,
878                                    WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
879                                    outcolor, payload_r0_depth, outdepth);
880         inst->Aux = (i<<1);
881         if (c->fp_fragcolor_emitted) {
882            outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
883            last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
884                                       0, outcolor, payload_r0_depth, outdepth);
885            inst->Aux = (i<<1);
886         }
887      }
888      last_inst->Aux |= 1; //eot
889   }
890   else {
891      /* if gl_FragData[0] is written, use it, else use gl_FragColor */
892      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
893         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
894      else
895         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
896
897      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
898                     0, outcolor, payload_r0_depth, outdepth);
899      inst->Aux = 1|(0<<1);
900   }
901}
902
903
904
905
906/***********************************************************************
907 * Emit INTERP instructions ahead of first use of each attrib.
908 */
909
910static void validate_src_regs( struct brw_wm_compile *c,
911			       const struct prog_instruction *inst )
912{
913   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
914   GLuint i;
915
916   for (i = 0; i < nr_args; i++) {
917      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
918	 GLuint idx = inst->SrcReg[i].Index;
919	 if (!(c->fp_interp_emitted & (1<<idx))) {
920	    emit_interp(c, idx);
921	 }
922      }
923   }
924}
925
926static void validate_dst_regs( struct brw_wm_compile *c,
927			       const struct prog_instruction *inst )
928{
929   if (inst->DstReg.File == PROGRAM_OUTPUT) {
930      GLuint idx = inst->DstReg.Index;
931      if (idx == FRAG_RESULT_COLR)
932         c->fp_fragcolor_emitted = 1;
933   }
934}
935
936static void print_insns( const struct prog_instruction *insn,
937			 GLuint nr )
938{
939   GLuint i;
940   for (i = 0; i < nr; i++, insn++) {
941      _mesa_printf("%3d: ", i);
942      if (insn->Opcode < MAX_OPCODE)
943	 _mesa_print_instruction(insn);
944      else if (insn->Opcode < MAX_WM_OPCODE) {
945	 GLuint idx = insn->Opcode - MAX_OPCODE;
946
947	 _mesa_print_alu_instruction(insn,
948				     wm_opcode_strings[idx],
949				     3);
950      }
951      else
952	 _mesa_printf("UNKNOWN\n");
953   }
954}
955
956
957/**
958 * Initial pass for fragment program code generation.
959 * This function is used by both the GLSL and non-GLSL paths.
960 */
961void brw_wm_pass_fp( struct brw_wm_compile *c )
962{
963   struct brw_fragment_program *fp = c->fp;
964   GLuint insn;
965
966   if (INTEL_DEBUG & DEBUG_WM) {
967      _mesa_printf("pre-fp:\n");
968      _mesa_print_program(&fp->program.Base);
969      _mesa_printf("\n");
970   }
971
972   c->pixel_xy = src_undef();
973   c->delta_xy = src_undef();
974   c->pixel_w = src_undef();
975   c->nr_fp_insns = 0;
976
977   /* Emit preamble instructions.  This is where special instructions such as
978    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
979    * compute shader inputs from varying vars.
980    */
981   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
982      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
983      validate_src_regs(c, inst);
984      validate_dst_regs(c, inst);
985   }
986
987   /* Loop over all instructions doing assorted simplifications and
988    * transformations.
989    */
990   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
991      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
992      struct prog_instruction *out;
993
994      /* Check for INPUT values, emit INTERP instructions where
995       * necessary:
996       */
997
998      switch (inst->Opcode) {
999      case OPCODE_SWZ:
1000	 out = emit_insn(c, inst);
1001	 out->Opcode = OPCODE_MOV;
1002	 break;
1003
1004      case OPCODE_ABS:
1005	 out = emit_insn(c, inst);
1006	 out->Opcode = OPCODE_MOV;
1007	 out->SrcReg[0].NegateBase = 0;
1008	 out->SrcReg[0].Abs = 1;
1009	 break;
1010
1011      case OPCODE_SUB:
1012	 out = emit_insn(c, inst);
1013	 out->Opcode = OPCODE_ADD;
1014	 out->SrcReg[1].NegateBase ^= 0xf;
1015	 break;
1016
1017      case OPCODE_SCS:
1018	 out = emit_insn(c, inst);
1019	 /* This should probably be done in the parser.
1020	  */
1021	 out->DstReg.WriteMask &= WRITEMASK_XY;
1022	 break;
1023
1024      case OPCODE_DST:
1025	 precalc_dst(c, inst);
1026	 break;
1027
1028      case OPCODE_LIT:
1029	 precalc_lit(c, inst);
1030	 break;
1031
1032      case OPCODE_TEX:
1033	 precalc_tex(c, inst);
1034	 break;
1035
1036      case OPCODE_TXP:
1037	 precalc_txp(c, inst);
1038	 break;
1039
1040      case OPCODE_TXB:
1041	 out = emit_insn(c, inst);
1042	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1043	 break;
1044
1045      case OPCODE_XPD:
1046	 out = emit_insn(c, inst);
1047	 /* This should probably be done in the parser.
1048	  */
1049	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1050	 break;
1051
1052      case OPCODE_KIL:
1053	 out = emit_insn(c, inst);
1054	 /* This should probably be done in the parser.
1055	  */
1056	 out->DstReg.WriteMask = 0;
1057	 break;
1058      case OPCODE_DDX:
1059	 emit_ddx(c, inst);
1060	 break;
1061      case OPCODE_DDY:
1062         emit_ddy(c, inst);
1063	break;
1064      case OPCODE_END:
1065	 emit_fb_write(c);
1066	 break;
1067      case OPCODE_PRINT:
1068	 break;
1069
1070      default:
1071	 emit_insn(c, inst);
1072	 break;
1073      }
1074   }
1075
1076   if (INTEL_DEBUG & DEBUG_WM) {
1077      _mesa_printf("pass_fp:\n");
1078      print_insns( c->prog_instructions, c->nr_fp_insns );
1079      _mesa_printf("\n");
1080   }
1081}
1082
1083