brw_wm_fp.c revision f147599ef4b0d14c25a7e0d3f9f1c9b0229bb6fc
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36#include "brw_context.h"
37#include "brw_wm.h"
38#include "brw_util.h"
39
40#include "program/prog_parameter.h"
41#include "program/prog_print.h"
42#include "program/prog_statevars.h"
43
44
45/** An invalid texture target */
46#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48/** An invalid texture unit */
49#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53#define X    0
54#define Y    1
55#define Z    2
56#define W    3
57
58
59static const char *wm_opcode_strings[] = {
60   "PIXELXY",
61   "DELTAXY",
62   "PIXELW",
63   "LINTERP",
64   "PINTERP",
65   "CINTERP",
66   "WPOSXY",
67   "FB_WRITE",
68   "FRONTFACING",
69};
70
71#if 0
72static const char *wm_file_strings[] = {
73   "PAYLOAD"
74};
75#endif
76
77
78/***********************************************************************
79 * Source regs
80 */
81
82static struct prog_src_register src_reg(GLuint file, GLuint idx)
83{
84   struct prog_src_register reg;
85   reg.File = file;
86   reg.Index = idx;
87   reg.Swizzle = SWIZZLE_NOOP;
88   reg.RelAddr = 0;
89   reg.Negate = NEGATE_NONE;
90   reg.Abs = 0;
91   reg.HasIndex2 = 0;
92   reg.RelAddr2 = 0;
93   reg.Index2 = 0;
94   return reg;
95}
96
97static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
98{
99   return src_reg(dst.File, dst.Index);
100}
101
102static struct prog_src_register src_undef( void )
103{
104   return src_reg(PROGRAM_UNDEFINED, 0);
105}
106
107static GLboolean src_is_undef(struct prog_src_register src)
108{
109   return src.File == PROGRAM_UNDEFINED;
110}
111
112static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
113{
114   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
115   return reg;
116}
117
118static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
119{
120   return src_swizzle(reg, x, x, x, x);
121}
122
123static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
124{
125   reg.Swizzle = swizzle;
126   return reg;
127}
128
129
130/***********************************************************************
131 * Dest regs
132 */
133
134static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
135{
136   struct prog_dst_register reg;
137   reg.File = file;
138   reg.Index = idx;
139   reg.WriteMask = WRITEMASK_XYZW;
140   reg.RelAddr = 0;
141   reg.CondMask = COND_TR;
142   reg.CondSwizzle = 0;
143   reg.CondSrc = 0;
144   return reg;
145}
146
147static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
148{
149   reg.WriteMask &= mask;
150   return reg;
151}
152
153static struct prog_dst_register dst_undef( void )
154{
155   return dst_reg(PROGRAM_UNDEFINED, 0);
156}
157
158
159
160static struct prog_dst_register get_temp( struct brw_wm_compile *c )
161{
162   int bit = _mesa_ffs( ~c->fp_temp );
163
164   if (!bit) {
165      printf("%s: out of temporaries\n", __FILE__);
166      exit(1);
167   }
168
169   c->fp_temp |= 1<<(bit-1);
170   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
171}
172
173
174static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
175{
176   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
177}
178
179
180/***********************************************************************
181 * Instructions
182 */
183
184static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
185{
186   assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
187   memset(&c->prog_instructions[c->nr_fp_insns], 0,
188	  sizeof(*c->prog_instructions));
189   return &c->prog_instructions[c->nr_fp_insns++];
190}
191
192static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
193					const struct prog_instruction *inst0)
194{
195   struct prog_instruction *inst = get_fp_inst(c);
196   *inst = *inst0;
197   return inst;
198}
199
200static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
201				       GLuint op,
202				       struct prog_dst_register dest,
203				       GLuint saturate,
204				       GLuint tex_src_unit,
205				       GLuint tex_src_target,
206				       GLuint tex_shadow,
207				       struct prog_src_register src0,
208				       struct prog_src_register src1,
209				       struct prog_src_register src2 )
210{
211   struct prog_instruction *inst = get_fp_inst(c);
212
213   assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
214          tex_src_unit == TEX_UNIT_NONE);
215   assert(tex_src_target < NUM_TEXTURE_TARGETS ||
216          tex_src_target == TEX_TARGET_NONE);
217
218   /* update mask of which texture units are referenced by this program */
219   if (tex_src_unit != TEX_UNIT_NONE)
220      c->fp->tex_units_used |= (1 << tex_src_unit);
221
222   memset(inst, 0, sizeof(*inst));
223
224   inst->Opcode = op;
225   inst->DstReg = dest;
226   inst->SaturateMode = saturate;
227   inst->TexSrcUnit = tex_src_unit;
228   inst->TexSrcTarget = tex_src_target;
229   inst->TexShadow = tex_shadow;
230   inst->SrcReg[0] = src0;
231   inst->SrcReg[1] = src1;
232   inst->SrcReg[2] = src2;
233   return inst;
234}
235
236
237static struct prog_instruction * emit_op(struct brw_wm_compile *c,
238				       GLuint op,
239				       struct prog_dst_register dest,
240				       GLuint saturate,
241				       struct prog_src_register src0,
242				       struct prog_src_register src1,
243				       struct prog_src_register src2 )
244{
245   return emit_tex_op(c, op, dest, saturate,
246                      TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */
247                      src0, src1, src2);
248}
249
250
251/* Many Mesa opcodes produce the same value across all the result channels.
252 * We'd rather not have to support that splatting in the opcode implementations,
253 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
254 * anyway.  We can easily get both by emitting the opcode to one channel, and
255 * then MOVing it to the others, which brw_wm_pass*.c already understands.
256 */
257static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
258						 const struct prog_instruction *inst0)
259{
260   struct prog_instruction *inst;
261   unsigned int dst_chan;
262   unsigned int other_channel_mask;
263
264   if (inst0->DstReg.WriteMask == 0)
265      return NULL;
266
267   dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
268   inst = get_fp_inst(c);
269   *inst = *inst0;
270   inst->DstReg.WriteMask = 1 << dst_chan;
271
272   other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
273   if (other_channel_mask != 0) {
274      inst = emit_op(c,
275		     OPCODE_MOV,
276		     dst_mask(inst0->DstReg, other_channel_mask),
277		     0,
278		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
279		     src_undef(),
280		     src_undef());
281   }
282   return inst;
283}
284
285
286/***********************************************************************
287 * Special instructions for interpolation and other tasks
288 */
289
290static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
291{
292   if (src_is_undef(c->pixel_xy)) {
293      struct prog_dst_register pixel_xy = get_temp(c);
294      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
295
296
297      /* Emit the out calculations, and hold onto the results.  Use
298       * two instructions as a temporary is required.
299       */
300      /* pixel_xy.xy = PIXELXY payload[0];
301       */
302      emit_op(c,
303	      WM_PIXELXY,
304	      dst_mask(pixel_xy, WRITEMASK_XY),
305	      0,
306	      payload_r0_depth,
307	      src_undef(),
308	      src_undef());
309
310      c->pixel_xy = src_reg_from_dst(pixel_xy);
311   }
312
313   return c->pixel_xy;
314}
315
316static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
317{
318   if (src_is_undef(c->delta_xy)) {
319      struct prog_dst_register delta_xy = get_temp(c);
320      struct prog_src_register pixel_xy = get_pixel_xy(c);
321      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
322
323      /* deltas.xy = DELTAXY pixel_xy, payload[0]
324       */
325      emit_op(c,
326	      WM_DELTAXY,
327	      dst_mask(delta_xy, WRITEMASK_XY),
328	      0,
329	      pixel_xy,
330	      payload_r0_depth,
331	      src_undef());
332
333      c->delta_xy = src_reg_from_dst(delta_xy);
334   }
335
336   return c->delta_xy;
337}
338
339static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
340{
341   /* This is called for producing 1/w in pre-gen6 interp.  for gen6,
342    * the interp opcodes don't use this argument.  But to keep the
343    * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
344    * into the slot.
345    */
346   if (c->func.brw->intel.gen >= 6)
347      return c->delta_xy;
348
349   if (src_is_undef(c->pixel_w)) {
350      struct prog_dst_register pixel_w = get_temp(c);
351      struct prog_src_register deltas = get_delta_xy(c);
352      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
353
354      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
355       */
356      emit_op(c,
357	      WM_PIXELW,
358	      dst_mask(pixel_w, WRITEMASK_W),
359	      0,
360	      interp_wpos,
361	      deltas,
362	      src_undef());
363
364
365      c->pixel_w = src_reg_from_dst(pixel_w);
366   }
367
368   return c->pixel_w;
369}
370
371static void emit_interp( struct brw_wm_compile *c,
372			 GLuint idx )
373{
374   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
375   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
376   struct prog_src_register deltas;
377
378   deltas = get_delta_xy(c);
379
380   /* Need to use PINTERP on attributes which have been
381    * multiplied by 1/W in the SF program, and LINTERP on those
382    * which have not:
383    */
384   switch (idx) {
385   case FRAG_ATTRIB_WPOS:
386      /* Have to treat wpos.xy specially:
387       */
388      emit_op(c,
389	      WM_WPOSXY,
390	      dst_mask(dst, WRITEMASK_XY),
391	      0,
392	      get_pixel_xy(c),
393	      src_undef(),
394	      src_undef());
395
396      dst = dst_mask(dst, WRITEMASK_ZW);
397
398      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
399       */
400      emit_op(c,
401	      WM_LINTERP,
402	      dst,
403	      0,
404	      interp,
405	      deltas,
406	      src_undef());
407      break;
408   case FRAG_ATTRIB_COL0:
409   case FRAG_ATTRIB_COL1:
410      if (c->key.flat_shade) {
411	 emit_op(c,
412		 WM_CINTERP,
413		 dst,
414		 0,
415		 interp,
416		 src_undef(),
417		 src_undef());
418      }
419      else {
420	 /* perspective-corrected color interpolation */
421	 emit_op(c,
422		 WM_PINTERP,
423		 dst,
424		 0,
425		 interp,
426		 deltas,
427		 get_pixel_w(c));
428      }
429      break;
430   case FRAG_ATTRIB_FOGC:
431      /* Interpolate the fog coordinate */
432      emit_op(c,
433	      WM_PINTERP,
434	      dst_mask(dst, WRITEMASK_X),
435	      0,
436	      interp,
437	      deltas,
438	      get_pixel_w(c));
439
440      emit_op(c,
441	      OPCODE_MOV,
442	      dst_mask(dst, WRITEMASK_YZW),
443	      0,
444	      src_swizzle(interp,
445			  SWIZZLE_ZERO,
446			  SWIZZLE_ZERO,
447			  SWIZZLE_ZERO,
448			  SWIZZLE_ONE),
449	      src_undef(),
450	      src_undef());
451      break;
452
453   case FRAG_ATTRIB_FACE:
454      emit_op(c,
455              WM_FRONTFACING,
456              dst_mask(dst, WRITEMASK_X),
457              0,
458              src_undef(),
459              src_undef(),
460              src_undef());
461      break;
462
463   case FRAG_ATTRIB_PNTC:
464      /* XXX review/test this case */
465      emit_op(c,
466	      WM_PINTERP,
467	      dst_mask(dst, WRITEMASK_XY),
468	      0,
469	      interp,
470	      deltas,
471	      get_pixel_w(c));
472
473      emit_op(c,
474	      OPCODE_MOV,
475	      dst_mask(dst, WRITEMASK_ZW),
476	      0,
477	      src_swizzle(interp,
478			  SWIZZLE_ZERO,
479			  SWIZZLE_ZERO,
480			  SWIZZLE_ZERO,
481			  SWIZZLE_ONE),
482	      src_undef(),
483	      src_undef());
484      break;
485
486   default:
487      emit_op(c,
488	      WM_PINTERP,
489	      dst,
490	      0,
491	      interp,
492	      deltas,
493	      get_pixel_w(c));
494      break;
495   }
496
497   c->fp_interp_emitted |= 1<<idx;
498}
499
500/***********************************************************************
501 * Hacks to extend the program parameter and constant lists.
502 */
503
504/* Add the fog parameters to the parameter list of the original
505 * program, rather than creating a new list.  Doesn't really do any
506 * harm and it's not as if the parameter handling isn't a big hack
507 * anyway.
508 */
509static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
510                                                     GLint s0,
511                                                     GLint s1,
512                                                     GLint s2,
513                                                     GLint s3,
514                                                     GLint s4)
515{
516   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
517   gl_state_index tokens[STATE_LENGTH];
518   GLuint idx;
519   tokens[0] = s0;
520   tokens[1] = s1;
521   tokens[2] = s2;
522   tokens[3] = s3;
523   tokens[4] = s4;
524
525   idx = _mesa_add_state_reference( paramList, tokens );
526
527   return src_reg(PROGRAM_STATE_VAR, idx);
528}
529
530
531static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
532						     GLfloat s0,
533						     GLfloat s1,
534						     GLfloat s2,
535						     GLfloat s3)
536{
537   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
538   GLfloat values[4];
539   GLuint idx;
540   GLuint swizzle;
541   struct prog_src_register reg;
542
543   values[0] = s0;
544   values[1] = s1;
545   values[2] = s2;
546   values[3] = s3;
547
548   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
549   reg = src_reg(PROGRAM_STATE_VAR, idx);
550   reg.Swizzle = swizzle;
551
552   return reg;
553}
554
555
556
557/***********************************************************************
558 * Expand various instructions here to simpler forms.
559 */
560static void precalc_dst( struct brw_wm_compile *c,
561			       const struct prog_instruction *inst )
562{
563   struct prog_src_register src0 = inst->SrcReg[0];
564   struct prog_src_register src1 = inst->SrcReg[1];
565   struct prog_dst_register dst = inst->DstReg;
566
567   if (dst.WriteMask & WRITEMASK_Y) {
568      /* dst.y = mul src0.y, src1.y
569       */
570      emit_op(c,
571	      OPCODE_MUL,
572	      dst_mask(dst, WRITEMASK_Y),
573	      inst->SaturateMode,
574	      src0,
575	      src1,
576	      src_undef());
577   }
578
579   if (dst.WriteMask & WRITEMASK_XZ) {
580      struct prog_instruction *swz;
581      GLuint z = GET_SWZ(src0.Swizzle, Z);
582
583      /* dst.xz = swz src0.1zzz
584       */
585      swz = emit_op(c,
586		    OPCODE_SWZ,
587		    dst_mask(dst, WRITEMASK_XZ),
588		    inst->SaturateMode,
589		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
590		    src_undef(),
591		    src_undef());
592      /* Avoid letting negation flag of src0 affect our 1 constant. */
593      swz->SrcReg[0].Negate &= ~NEGATE_X;
594   }
595   if (dst.WriteMask & WRITEMASK_W) {
596      /* dst.w = mov src1.w
597       */
598      emit_op(c,
599	      OPCODE_MOV,
600	      dst_mask(dst, WRITEMASK_W),
601	      inst->SaturateMode,
602	      src1,
603	      src_undef(),
604	      src_undef());
605   }
606}
607
608
609static void precalc_lit( struct brw_wm_compile *c,
610			 const struct prog_instruction *inst )
611{
612   struct prog_src_register src0 = inst->SrcReg[0];
613   struct prog_dst_register dst = inst->DstReg;
614
615   if (dst.WriteMask & WRITEMASK_XW) {
616      struct prog_instruction *swz;
617
618      /* dst.xw = swz src0.1111
619       */
620      swz = emit_op(c,
621		    OPCODE_SWZ,
622		    dst_mask(dst, WRITEMASK_XW),
623		    0,
624		    src_swizzle1(src0, SWIZZLE_ONE),
625		    src_undef(),
626		    src_undef());
627      /* Avoid letting the negation flag of src0 affect our 1 constant. */
628      swz->SrcReg[0].Negate = NEGATE_NONE;
629   }
630
631   if (dst.WriteMask & WRITEMASK_YZ) {
632      emit_op(c,
633	      OPCODE_LIT,
634	      dst_mask(dst, WRITEMASK_YZ),
635	      inst->SaturateMode,
636	      src0,
637	      src_undef(),
638	      src_undef());
639   }
640}
641
642
643/**
644 * Some TEX instructions require extra code, cube map coordinate
645 * normalization, or coordinate scaling for RECT textures, etc.
646 * This function emits those extra instructions and the TEX
647 * instruction itself.
648 */
649static void precalc_tex( struct brw_wm_compile *c,
650			 const struct prog_instruction *inst )
651{
652   struct prog_src_register coord;
653   struct prog_dst_register tmpcoord = { 0 };
654   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
655
656   assert(unit < BRW_MAX_TEX_UNIT);
657
658   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
659       struct prog_instruction *out;
660       struct prog_dst_register tmp0 = get_temp(c);
661       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
662       struct prog_dst_register tmp1 = get_temp(c);
663       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
664       struct prog_src_register src0 = inst->SrcReg[0];
665
666       /* find longest component of coord vector and normalize it */
667       tmpcoord = get_temp(c);
668       coord = src_reg_from_dst(tmpcoord);
669
670       /* tmpcoord = src0 (i.e.: coord = src0) */
671       out = emit_op(c, OPCODE_MOV,
672                     tmpcoord,
673                     0,
674                     src0,
675                     src_undef(),
676                     src_undef());
677       out->SrcReg[0].Negate = NEGATE_NONE;
678       out->SrcReg[0].Abs = 1;
679
680       /* tmp0 = MAX(coord.X, coord.Y) */
681       emit_op(c, OPCODE_MAX,
682               tmp0,
683               0,
684               src_swizzle1(coord, X),
685               src_swizzle1(coord, Y),
686               src_undef());
687
688       /* tmp1 = MAX(tmp0, coord.Z) */
689       emit_op(c, OPCODE_MAX,
690               tmp1,
691               0,
692               tmp0src,
693               src_swizzle1(coord, Z),
694               src_undef());
695
696       /* tmp0 = 1 / tmp1 */
697       emit_op(c, OPCODE_RCP,
698               dst_mask(tmp0, WRITEMASK_X),
699               0,
700               tmp1src,
701               src_undef(),
702               src_undef());
703
704       /* tmpCoord = src0 * tmp0 */
705       emit_op(c, OPCODE_MUL,
706               tmpcoord,
707               0,
708               src0,
709               src_swizzle1(tmp0src, SWIZZLE_X),
710               src_undef());
711
712       release_temp(c, tmp0);
713       release_temp(c, tmp1);
714   }
715   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
716      struct prog_src_register scale =
717	 search_or_add_param5( c,
718			       STATE_INTERNAL,
719			       STATE_TEXRECT_SCALE,
720			       unit,
721			       0,0 );
722
723      tmpcoord = get_temp(c);
724
725      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
726       */
727      emit_op(c,
728	      OPCODE_MUL,
729	      tmpcoord,
730	      0,
731	      inst->SrcReg[0],
732	      src_swizzle(scale,
733			  SWIZZLE_X,
734			  SWIZZLE_Y,
735			  SWIZZLE_ONE,
736			  SWIZZLE_ONE),
737	      src_undef());
738
739      coord = src_reg_from_dst(tmpcoord);
740   }
741   else {
742      coord = inst->SrcReg[0];
743   }
744
745   /* Need to emit YUV texture conversions by hand.  Probably need to
746    * do this here - the alternative is in brw_wm_emit.c, but the
747    * conversion requires allocating a temporary variable which we
748    * don't have the facility to do that late in the compilation.
749    */
750   if (c->key.yuvtex_mask & (1 << unit)) {
751      /* convert ycbcr to RGBA */
752      GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
753
754      /*
755	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
756	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
757	 UYV     = TEX ...
758	 UYV.xyz = ADD UYV,     C0
759	 UYV.y   = MUL UYV.y,   C0.w
760 	 if (UV swaped)
761	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
762	 else
763	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y
764	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
765      */
766      struct prog_dst_register dst = inst->DstReg;
767      struct prog_dst_register tmp = get_temp(c);
768      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
769      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
770      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
771
772      /* tmp     = TEX ...
773       */
774      emit_tex_op(c,
775                  OPCODE_TEX,
776                  tmp,
777                  inst->SaturateMode,
778                  unit,
779                  inst->TexSrcTarget,
780                  inst->TexShadow,
781                  coord,
782                  src_undef(),
783                  src_undef());
784
785      /* tmp.xyz =  ADD TMP, C0
786       */
787      emit_op(c,
788	      OPCODE_ADD,
789	      dst_mask(tmp, WRITEMASK_XYZ),
790	      0,
791	      tmpsrc,
792	      C0,
793	      src_undef());
794
795      /* YUV.y   = MUL YUV.y, C0.w
796       */
797
798      emit_op(c,
799	      OPCODE_MUL,
800	      dst_mask(tmp, WRITEMASK_Y),
801	      0,
802	      tmpsrc,
803	      src_swizzle1(C0, W),
804	      src_undef());
805
806      /*
807       * if (UV swaped)
808       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
809       * else
810       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
811       */
812
813      emit_op(c,
814	      OPCODE_MAD,
815	      dst_mask(dst, WRITEMASK_XYZ),
816	      0,
817	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
818	      C1,
819	      src_swizzle1(tmpsrc, Y));
820
821      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
822       */
823      emit_op(c,
824	      OPCODE_MAD,
825	      dst_mask(dst, WRITEMASK_Y),
826	      0,
827	      src_swizzle1(tmpsrc, Z),
828	      src_swizzle1(C1, W),
829	      src_swizzle1(src_reg_from_dst(dst), Y));
830
831      release_temp(c, tmp);
832   }
833   else {
834      /* ordinary RGBA tex instruction */
835      emit_tex_op(c,
836                  OPCODE_TEX,
837                  inst->DstReg,
838                  inst->SaturateMode,
839                  unit,
840                  inst->TexSrcTarget,
841                  inst->TexShadow,
842                  coord,
843                  src_undef(),
844                  src_undef());
845   }
846
847   /* For GL_EXT_texture_swizzle: */
848   if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
849      /* swizzle the result of the TEX instruction */
850      struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
851      emit_op(c, OPCODE_SWZ,
852              inst->DstReg,
853              SATURATE_OFF, /* saturate already done above */
854              src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
855              src_undef(),
856              src_undef());
857   }
858
859   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
860       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
861      release_temp(c, tmpcoord);
862}
863
864
865/**
866 * Check if the given TXP instruction really needs the divide-by-W step.
867 */
868static GLboolean projtex( struct brw_wm_compile *c,
869			  const struct prog_instruction *inst )
870{
871   const struct prog_src_register src = inst->SrcReg[0];
872   GLboolean retVal;
873
874   assert(inst->Opcode == OPCODE_TXP);
875
876   /* Only try to detect the simplest cases.  Could detect (later)
877    * cases where we are trying to emit code like RCP {1.0}, MUL x,
878    * {1.0}, and so on.
879    *
880    * More complex cases than this typically only arise from
881    * user-provided fragment programs anyway:
882    */
883   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
884      retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
885   else if (src.File == PROGRAM_INPUT &&
886	    GET_SWZ(src.Swizzle, W) == W &&
887            (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
888      retVal = GL_FALSE;
889   else
890      retVal = GL_TRUE;
891
892   return retVal;
893}
894
895
896/**
897 * Emit code for TXP.
898 */
899static void precalc_txp( struct brw_wm_compile *c,
900			       const struct prog_instruction *inst )
901{
902   struct prog_src_register src0 = inst->SrcReg[0];
903
904   if (projtex(c, inst)) {
905      struct prog_dst_register tmp = get_temp(c);
906      struct prog_instruction tmp_inst;
907
908      /* tmp0.w = RCP inst.arg[0][3]
909       */
910      emit_op(c,
911	      OPCODE_RCP,
912	      dst_mask(tmp, WRITEMASK_W),
913	      0,
914	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
915	      src_undef(),
916	      src_undef());
917
918      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
919       */
920      emit_op(c,
921	      OPCODE_MUL,
922	      dst_mask(tmp, WRITEMASK_XYZ),
923	      0,
924	      src0,
925	      src_swizzle1(src_reg_from_dst(tmp), W),
926	      src_undef());
927
928      /* dst = precalc(TEX tmp0)
929       */
930      tmp_inst = *inst;
931      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
932      precalc_tex(c, &tmp_inst);
933
934      release_temp(c, tmp);
935   }
936   else
937   {
938      /* dst = precalc(TEX src0)
939       */
940      precalc_tex(c, inst);
941   }
942}
943
944
945
946static void emit_render_target_writes( struct brw_wm_compile *c )
947{
948   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
949   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
950   struct prog_src_register outcolor;
951   GLuint i;
952
953   struct prog_instruction *inst = NULL;
954
955   /* The inst->Aux field is used for FB write target and the EOT marker */
956
957   for (i = 0; i < c->key.nr_color_regions; i++) {
958      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) {
959	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
960      } else {
961	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
962      }
963      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
964		     0, outcolor, payload_r0_depth, outdepth);
965      inst->Aux = INST_AUX_TARGET(i);
966   }
967
968   /* Mark the last FB write as final, or emit a dummy write if we had
969    * no render targets bound.
970    */
971   if (c->key.nr_color_regions != 0) {
972      inst->Aux |= INST_AUX_EOT;
973   } else {
974      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
975		     0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR),
976		     payload_r0_depth, outdepth);
977      inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT;
978   }
979}
980
981
982
983
984/***********************************************************************
985 * Emit INTERP instructions ahead of first use of each attrib.
986 */
987
988static void validate_src_regs( struct brw_wm_compile *c,
989			       const struct prog_instruction *inst )
990{
991   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
992   GLuint i;
993
994   for (i = 0; i < nr_args; i++) {
995      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
996	 GLuint idx = inst->SrcReg[i].Index;
997	 if (!(c->fp_interp_emitted & (1<<idx))) {
998	    emit_interp(c, idx);
999	 }
1000      }
1001   }
1002}
1003
1004static void print_insns( const struct prog_instruction *insn,
1005			 GLuint nr )
1006{
1007   GLuint i;
1008   for (i = 0; i < nr; i++, insn++) {
1009      printf("%3d: ", i);
1010      if (insn->Opcode < MAX_OPCODE)
1011	 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
1012      else if (insn->Opcode < MAX_WM_OPCODE) {
1013	 GLuint idx = insn->Opcode - MAX_OPCODE;
1014
1015	 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
1016				      3, PROG_PRINT_DEBUG, NULL);
1017      }
1018      else
1019	 printf("965 Opcode %d\n", insn->Opcode);
1020   }
1021}
1022
1023
1024/**
1025 * Initial pass for fragment program code generation.
1026 * This function is used by both the GLSL and non-GLSL paths.
1027 */
1028void brw_wm_pass_fp( struct brw_wm_compile *c )
1029{
1030   struct intel_context *intel = &c->func.brw->intel;
1031   struct brw_fragment_program *fp = c->fp;
1032   GLuint insn;
1033
1034   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1035      printf("pre-fp:\n");
1036      _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
1037			       GL_TRUE);
1038      printf("\n");
1039   }
1040
1041   c->pixel_xy = src_undef();
1042   if (intel->gen >= 6) {
1043      /* The interpolation deltas come in as the perspective pixel
1044       * location barycentric params.
1045       */
1046      c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
1047   } else {
1048      c->delta_xy = src_undef();
1049   }
1050   c->pixel_w = src_undef();
1051   c->nr_fp_insns = 0;
1052   c->fp->tex_units_used = 0x0;
1053
1054   /* Emit preamble instructions.  This is where special instructions such as
1055    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1056    * compute shader inputs from varying vars.
1057    */
1058   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1059      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1060      validate_src_regs(c, inst);
1061   }
1062
1063   /* Loop over all instructions doing assorted simplifications and
1064    * transformations.
1065    */
1066   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1067      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1068      struct prog_instruction *out;
1069
1070      /* Check for INPUT values, emit INTERP instructions where
1071       * necessary:
1072       */
1073
1074      switch (inst->Opcode) {
1075      case OPCODE_SWZ:
1076	 out = emit_insn(c, inst);
1077	 out->Opcode = OPCODE_MOV;
1078	 break;
1079
1080      case OPCODE_ABS:
1081	 out = emit_insn(c, inst);
1082	 out->Opcode = OPCODE_MOV;
1083	 out->SrcReg[0].Negate = NEGATE_NONE;
1084	 out->SrcReg[0].Abs = 1;
1085	 break;
1086
1087      case OPCODE_SUB:
1088	 out = emit_insn(c, inst);
1089	 out->Opcode = OPCODE_ADD;
1090	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1091	 break;
1092
1093      case OPCODE_SCS:
1094	 out = emit_insn(c, inst);
1095	 /* This should probably be done in the parser.
1096	  */
1097	 out->DstReg.WriteMask &= WRITEMASK_XY;
1098	 break;
1099
1100      case OPCODE_DST:
1101	 precalc_dst(c, inst);
1102	 break;
1103
1104      case OPCODE_LIT:
1105	 precalc_lit(c, inst);
1106	 break;
1107
1108      case OPCODE_RSQ:
1109	 out = emit_scalar_insn(c, inst);
1110	 out->SrcReg[0].Abs = GL_TRUE;
1111	 break;
1112
1113      case OPCODE_TEX:
1114	 precalc_tex(c, inst);
1115	 break;
1116
1117      case OPCODE_TXP:
1118	 precalc_txp(c, inst);
1119	 break;
1120
1121      case OPCODE_TXB:
1122	 out = emit_insn(c, inst);
1123	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1124         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1125	 break;
1126
1127      case OPCODE_XPD:
1128	 out = emit_insn(c, inst);
1129	 /* This should probably be done in the parser.
1130	  */
1131	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1132	 break;
1133
1134      case OPCODE_KIL:
1135	 out = emit_insn(c, inst);
1136	 /* This should probably be done in the parser.
1137	  */
1138	 out->DstReg.WriteMask = 0;
1139	 break;
1140      case OPCODE_END:
1141	 emit_render_target_writes(c);
1142	 break;
1143      case OPCODE_PRINT:
1144	 break;
1145      default:
1146	 if (brw_wm_is_scalar_result(inst->Opcode))
1147	    emit_scalar_insn(c, inst);
1148	 else
1149	    emit_insn(c, inst);
1150	 break;
1151      }
1152   }
1153
1154   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1155      printf("pass_fp:\n");
1156      print_insns( c->prog_instructions, c->nr_fp_insns );
1157      printf("\n");
1158   }
1159}
1160
1161