brw_wm_fp.c revision cfa927766ab610a9a76730d337d77008d876ebbd
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36#include "brw_context.h"
37#include "brw_wm.h"
38#include "brw_util.h"
39
40#include "shader/prog_parameter.h"
41#include "shader/prog_print.h"
42#include "shader/prog_statevars.h"
43
44
45/** An invalid texture target */
46#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48/** An invalid texture unit */
49#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53#define X    0
54#define Y    1
55#define Z    2
56#define W    3
57
58
59static const char *wm_opcode_strings[] = {
60   "PIXELXY",
61   "DELTAXY",
62   "PIXELW",
63   "LINTERP",
64   "PINTERP",
65   "CINTERP",
66   "WPOSXY",
67   "FB_WRITE",
68   "FRONTFACING",
69};
70
71#if 0
72static const char *wm_file_strings[] = {
73   "PAYLOAD"
74};
75#endif
76
77
78/***********************************************************************
79 * Source regs
80 */
81
82static struct prog_src_register src_reg(GLuint file, GLuint idx)
83{
84   struct prog_src_register reg;
85   reg.File = file;
86   reg.Index = idx;
87   reg.Swizzle = SWIZZLE_NOOP;
88   reg.RelAddr = 0;
89   reg.Negate = NEGATE_NONE;
90   reg.Abs = 0;
91   return reg;
92}
93
94static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
95{
96   return src_reg(dst.File, dst.Index);
97}
98
99static struct prog_src_register src_undef( void )
100{
101   return src_reg(PROGRAM_UNDEFINED, 0);
102}
103
104static GLboolean src_is_undef(struct prog_src_register src)
105{
106   return src.File == PROGRAM_UNDEFINED;
107}
108
109static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
110{
111   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
112   return reg;
113}
114
115static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
116{
117   return src_swizzle(reg, x, x, x, x);
118}
119
120static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
121{
122   reg.Swizzle = swizzle;
123   return reg;
124}
125
126
127/***********************************************************************
128 * Dest regs
129 */
130
131static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
132{
133   struct prog_dst_register reg;
134   reg.File = file;
135   reg.Index = idx;
136   reg.WriteMask = WRITEMASK_XYZW;
137   reg.RelAddr = 0;
138   reg.CondMask = COND_TR;
139   reg.CondSwizzle = 0;
140   reg.CondSrc = 0;
141   reg.pad = 0;
142   return reg;
143}
144
145static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
146{
147   reg.WriteMask &= mask;
148   return reg;
149}
150
151static struct prog_dst_register dst_undef( void )
152{
153   return dst_reg(PROGRAM_UNDEFINED, 0);
154}
155
156
157
158static struct prog_dst_register get_temp( struct brw_wm_compile *c )
159{
160   int bit = _mesa_ffs( ~c->fp_temp );
161
162   if (!bit) {
163      _mesa_printf("%s: out of temporaries\n", __FILE__);
164      exit(1);
165   }
166
167   c->fp_temp |= 1<<(bit-1);
168   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
169}
170
171
172static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
173{
174   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
175}
176
177
178/***********************************************************************
179 * Instructions
180 */
181
182static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
183{
184   assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
185   return &c->prog_instructions[c->nr_fp_insns++];
186}
187
188static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
189					const struct prog_instruction *inst0)
190{
191   struct prog_instruction *inst = get_fp_inst(c);
192   *inst = *inst0;
193   return inst;
194}
195
196static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
197				       GLuint op,
198				       struct prog_dst_register dest,
199				       GLuint saturate,
200				       GLuint tex_src_unit,
201				       GLuint tex_src_target,
202				       GLuint tex_shadow,
203				       struct prog_src_register src0,
204				       struct prog_src_register src1,
205				       struct prog_src_register src2 )
206{
207   struct prog_instruction *inst = get_fp_inst(c);
208
209   assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
210          tex_src_unit == TEX_UNIT_NONE);
211   assert(tex_src_target < NUM_TEXTURE_TARGETS ||
212          tex_src_target == TEX_TARGET_NONE);
213
214   /* update mask of which texture units are referenced by this program */
215   if (tex_src_unit != TEX_UNIT_NONE)
216      c->fp->tex_units_used |= (1 << tex_src_unit);
217
218   memset(inst, 0, sizeof(*inst));
219
220   inst->Opcode = op;
221   inst->DstReg = dest;
222   inst->SaturateMode = saturate;
223   inst->TexSrcUnit = tex_src_unit;
224   inst->TexSrcTarget = tex_src_target;
225   inst->TexShadow = tex_shadow;
226   inst->SrcReg[0] = src0;
227   inst->SrcReg[1] = src1;
228   inst->SrcReg[2] = src2;
229   return inst;
230}
231
232
233static struct prog_instruction * emit_op(struct brw_wm_compile *c,
234				       GLuint op,
235				       struct prog_dst_register dest,
236				       GLuint saturate,
237				       struct prog_src_register src0,
238				       struct prog_src_register src1,
239				       struct prog_src_register src2 )
240{
241   return emit_tex_op(c, op, dest, saturate,
242                      TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */
243                      src0, src1, src2);
244}
245
246
247/* Many Mesa opcodes produce the same value across all the result channels.
248 * We'd rather not have to support that splatting in the opcode implementations,
249 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
250 * anyway.  We can easily get both by emitting the opcode to one channel, and
251 * then MOVing it to the others, which brw_wm_pass*.c already understands.
252 */
253static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
254						 const struct prog_instruction *inst0)
255{
256   struct prog_instruction *inst;
257   unsigned int dst_chan;
258   unsigned int other_channel_mask;
259
260   if (inst0->DstReg.WriteMask == 0)
261      return NULL;
262
263   dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
264   inst = get_fp_inst(c);
265   *inst = *inst0;
266   inst->DstReg.WriteMask = 1 << dst_chan;
267
268   other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
269   if (other_channel_mask != 0) {
270      inst = emit_op(c,
271		     OPCODE_MOV,
272		     dst_mask(inst0->DstReg, other_channel_mask),
273		     0,
274		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
275		     src_undef(),
276		     src_undef());
277   }
278   return inst;
279}
280
281
282/***********************************************************************
283 * Special instructions for interpolation and other tasks
284 */
285
286static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
287{
288   if (src_is_undef(c->pixel_xy)) {
289      struct prog_dst_register pixel_xy = get_temp(c);
290      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
291
292
293      /* Emit the out calculations, and hold onto the results.  Use
294       * two instructions as a temporary is required.
295       */
296      /* pixel_xy.xy = PIXELXY payload[0];
297       */
298      emit_op(c,
299	      WM_PIXELXY,
300	      dst_mask(pixel_xy, WRITEMASK_XY),
301	      0,
302	      payload_r0_depth,
303	      src_undef(),
304	      src_undef());
305
306      c->pixel_xy = src_reg_from_dst(pixel_xy);
307   }
308
309   return c->pixel_xy;
310}
311
312static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
313{
314   if (src_is_undef(c->delta_xy)) {
315      struct prog_dst_register delta_xy = get_temp(c);
316      struct prog_src_register pixel_xy = get_pixel_xy(c);
317      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
318
319      /* deltas.xy = DELTAXY pixel_xy, payload[0]
320       */
321      emit_op(c,
322	      WM_DELTAXY,
323	      dst_mask(delta_xy, WRITEMASK_XY),
324	      0,
325	      pixel_xy,
326	      payload_r0_depth,
327	      src_undef());
328
329      c->delta_xy = src_reg_from_dst(delta_xy);
330   }
331
332   return c->delta_xy;
333}
334
335static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
336{
337   if (src_is_undef(c->pixel_w)) {
338      struct prog_dst_register pixel_w = get_temp(c);
339      struct prog_src_register deltas = get_delta_xy(c);
340      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
341
342      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
343       */
344      emit_op(c,
345	      WM_PIXELW,
346	      dst_mask(pixel_w, WRITEMASK_W),
347	      0,
348	      interp_wpos,
349	      deltas,
350	      src_undef());
351
352
353      c->pixel_w = src_reg_from_dst(pixel_w);
354   }
355
356   return c->pixel_w;
357}
358
359static void emit_interp( struct brw_wm_compile *c,
360			 GLuint idx )
361{
362   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
363   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
364   struct prog_src_register deltas = get_delta_xy(c);
365
366   /* Need to use PINTERP on attributes which have been
367    * multiplied by 1/W in the SF program, and LINTERP on those
368    * which have not:
369    */
370   switch (idx) {
371   case FRAG_ATTRIB_WPOS:
372      /* Have to treat wpos.xy specially:
373       */
374      emit_op(c,
375	      WM_WPOSXY,
376	      dst_mask(dst, WRITEMASK_XY),
377	      0,
378	      get_pixel_xy(c),
379	      src_undef(),
380	      src_undef());
381
382      dst = dst_mask(dst, WRITEMASK_ZW);
383
384      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
385       */
386      emit_op(c,
387	      WM_LINTERP,
388	      dst,
389	      0,
390	      interp,
391	      deltas,
392	      src_undef());
393      break;
394   case FRAG_ATTRIB_COL0:
395   case FRAG_ATTRIB_COL1:
396      if (c->key.flat_shade) {
397	 emit_op(c,
398		 WM_CINTERP,
399		 dst,
400		 0,
401		 interp,
402		 src_undef(),
403		 src_undef());
404      }
405      else {
406         if (c->key.linear_color) {
407            emit_op(c,
408                    WM_LINTERP,
409                    dst,
410                    0,
411                    interp,
412                    deltas,
413                    src_undef());
414         }
415         else {
416            /* perspective-corrected color interpolation */
417            emit_op(c,
418                    WM_PINTERP,
419                    dst,
420                    0,
421                    interp,
422                    deltas,
423                    get_pixel_w(c));
424         }
425      }
426      break;
427   case FRAG_ATTRIB_FOGC:
428      /* Interpolate the fog coordinate */
429      emit_op(c,
430	      WM_PINTERP,
431	      dst_mask(dst, WRITEMASK_X),
432	      0,
433	      interp,
434	      deltas,
435	      get_pixel_w(c));
436
437      emit_op(c,
438	      OPCODE_MOV,
439	      dst_mask(dst, WRITEMASK_YZW),
440	      0,
441	      src_swizzle(interp,
442			  SWIZZLE_ZERO,
443			  SWIZZLE_ZERO,
444			  SWIZZLE_ZERO,
445			  SWIZZLE_ONE),
446	      src_undef(),
447	      src_undef());
448      break;
449
450   case FRAG_ATTRIB_FACE:
451      emit_op(c,
452              WM_FRONTFACING,
453              dst_mask(dst, WRITEMASK_X),
454              0,
455              src_undef(),
456              src_undef(),
457              src_undef());
458      break;
459
460   case FRAG_ATTRIB_PNTC:
461      /* XXX review/test this case */
462      emit_op(c,
463	      WM_PINTERP,
464	      dst_mask(dst, WRITEMASK_XY),
465	      0,
466	      interp,
467	      deltas,
468	      get_pixel_w(c));
469
470      emit_op(c,
471	      OPCODE_MOV,
472	      dst_mask(dst, WRITEMASK_ZW),
473	      0,
474	      src_swizzle(interp,
475			  SWIZZLE_ZERO,
476			  SWIZZLE_ZERO,
477			  SWIZZLE_ZERO,
478			  SWIZZLE_ONE),
479	      src_undef(),
480	      src_undef());
481      break;
482
483   default:
484      emit_op(c,
485	      WM_PINTERP,
486	      dst,
487	      0,
488	      interp,
489	      deltas,
490	      get_pixel_w(c));
491      break;
492   }
493
494   c->fp_interp_emitted |= 1<<idx;
495}
496
497/***********************************************************************
498 * Hacks to extend the program parameter and constant lists.
499 */
500
501/* Add the fog parameters to the parameter list of the original
502 * program, rather than creating a new list.  Doesn't really do any
503 * harm and it's not as if the parameter handling isn't a big hack
504 * anyway.
505 */
506static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
507                                                     GLint s0,
508                                                     GLint s1,
509                                                     GLint s2,
510                                                     GLint s3,
511                                                     GLint s4)
512{
513   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
514   gl_state_index tokens[STATE_LENGTH];
515   GLuint idx;
516   tokens[0] = s0;
517   tokens[1] = s1;
518   tokens[2] = s2;
519   tokens[3] = s3;
520   tokens[4] = s4;
521
522   for (idx = 0; idx < paramList->NumParameters; idx++) {
523      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
524	  memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
525	 return src_reg(PROGRAM_STATE_VAR, idx);
526   }
527
528   idx = _mesa_add_state_reference( paramList, tokens );
529
530   return src_reg(PROGRAM_STATE_VAR, idx);
531}
532
533
534static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
535						     GLfloat s0,
536						     GLfloat s1,
537						     GLfloat s2,
538						     GLfloat s3)
539{
540   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
541   GLfloat values[4];
542   GLuint idx;
543   GLuint swizzle;
544
545   values[0] = s0;
546   values[1] = s1;
547   values[2] = s2;
548   values[3] = s3;
549
550   /* Have to search, otherwise multiple compilations will each grow
551    * the parameter list.
552    */
553   for (idx = 0; idx < paramList->NumParameters; idx++) {
554      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
555	  memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
556
557	 /* XXX: this mimics the mesa bug which puts all constants and
558	  * parameters into the "PROGRAM_STATE_VAR" category:
559	  */
560	 return src_reg(PROGRAM_STATE_VAR, idx);
561   }
562
563   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
564   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
565   return src_reg(PROGRAM_STATE_VAR, idx);
566}
567
568
569
570/***********************************************************************
571 * Expand various instructions here to simpler forms.
572 */
573static void precalc_dst( struct brw_wm_compile *c,
574			       const struct prog_instruction *inst )
575{
576   struct prog_src_register src0 = inst->SrcReg[0];
577   struct prog_src_register src1 = inst->SrcReg[1];
578   struct prog_dst_register dst = inst->DstReg;
579
580   if (dst.WriteMask & WRITEMASK_Y) {
581      /* dst.y = mul src0.y, src1.y
582       */
583      emit_op(c,
584	      OPCODE_MUL,
585	      dst_mask(dst, WRITEMASK_Y),
586	      inst->SaturateMode,
587	      src0,
588	      src1,
589	      src_undef());
590   }
591
592   if (dst.WriteMask & WRITEMASK_XZ) {
593      struct prog_instruction *swz;
594      GLuint z = GET_SWZ(src0.Swizzle, Z);
595
596      /* dst.xz = swz src0.1zzz
597       */
598      swz = emit_op(c,
599		    OPCODE_SWZ,
600		    dst_mask(dst, WRITEMASK_XZ),
601		    inst->SaturateMode,
602		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
603		    src_undef(),
604		    src_undef());
605      /* Avoid letting negation flag of src0 affect our 1 constant. */
606      swz->SrcReg[0].Negate &= ~NEGATE_X;
607   }
608   if (dst.WriteMask & WRITEMASK_W) {
609      /* dst.w = mov src1.w
610       */
611      emit_op(c,
612	      OPCODE_MOV,
613	      dst_mask(dst, WRITEMASK_W),
614	      inst->SaturateMode,
615	      src1,
616	      src_undef(),
617	      src_undef());
618   }
619}
620
621
622static void precalc_lit( struct brw_wm_compile *c,
623			 const struct prog_instruction *inst )
624{
625   struct prog_src_register src0 = inst->SrcReg[0];
626   struct prog_dst_register dst = inst->DstReg;
627
628   if (dst.WriteMask & WRITEMASK_XW) {
629      struct prog_instruction *swz;
630
631      /* dst.xw = swz src0.1111
632       */
633      swz = emit_op(c,
634		    OPCODE_SWZ,
635		    dst_mask(dst, WRITEMASK_XW),
636		    0,
637		    src_swizzle1(src0, SWIZZLE_ONE),
638		    src_undef(),
639		    src_undef());
640      /* Avoid letting the negation flag of src0 affect our 1 constant. */
641      swz->SrcReg[0].Negate = NEGATE_NONE;
642   }
643
644   if (dst.WriteMask & WRITEMASK_YZ) {
645      emit_op(c,
646	      OPCODE_LIT,
647	      dst_mask(dst, WRITEMASK_YZ),
648	      inst->SaturateMode,
649	      src0,
650	      src_undef(),
651	      src_undef());
652   }
653}
654
655
656/**
657 * Some TEX instructions require extra code, cube map coordinate
658 * normalization, or coordinate scaling for RECT textures, etc.
659 * This function emits those extra instructions and the TEX
660 * instruction itself.
661 */
662static void precalc_tex( struct brw_wm_compile *c,
663			 const struct prog_instruction *inst )
664{
665   struct prog_src_register coord;
666   struct prog_dst_register tmpcoord;
667   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
668
669   assert(unit < BRW_MAX_TEX_UNIT);
670
671   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
672       struct prog_instruction *out;
673       struct prog_dst_register tmp0 = get_temp(c);
674       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
675       struct prog_dst_register tmp1 = get_temp(c);
676       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
677       struct prog_src_register src0 = inst->SrcReg[0];
678
679       /* find longest component of coord vector and normalize it */
680       tmpcoord = get_temp(c);
681       coord = src_reg_from_dst(tmpcoord);
682
683       /* tmpcoord = src0 (i.e.: coord = src0) */
684       out = emit_op(c, OPCODE_MOV,
685                     tmpcoord,
686                     0,
687                     src0,
688                     src_undef(),
689                     src_undef());
690       out->SrcReg[0].Negate = NEGATE_NONE;
691       out->SrcReg[0].Abs = 1;
692
693       /* tmp0 = MAX(coord.X, coord.Y) */
694       emit_op(c, OPCODE_MAX,
695               tmp0,
696               0,
697               src_swizzle1(coord, X),
698               src_swizzle1(coord, Y),
699               src_undef());
700
701       /* tmp1 = MAX(tmp0, coord.Z) */
702       emit_op(c, OPCODE_MAX,
703               tmp1,
704               0,
705               tmp0src,
706               src_swizzle1(coord, Z),
707               src_undef());
708
709       /* tmp0 = 1 / tmp1 */
710       emit_op(c, OPCODE_RCP,
711               dst_mask(tmp0, WRITEMASK_X),
712               0,
713               tmp1src,
714               src_undef(),
715               src_undef());
716
717       /* tmpCoord = src0 * tmp0 */
718       emit_op(c, OPCODE_MUL,
719               tmpcoord,
720               0,
721               src0,
722               src_swizzle1(tmp0src, SWIZZLE_X),
723               src_undef());
724
725       release_temp(c, tmp0);
726       release_temp(c, tmp1);
727   }
728   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
729      struct prog_src_register scale =
730	 search_or_add_param5( c,
731			       STATE_INTERNAL,
732			       STATE_TEXRECT_SCALE,
733			       unit,
734			       0,0 );
735
736      tmpcoord = get_temp(c);
737
738      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
739       */
740      emit_op(c,
741	      OPCODE_MUL,
742	      tmpcoord,
743	      0,
744	      inst->SrcReg[0],
745	      src_swizzle(scale,
746			  SWIZZLE_X,
747			  SWIZZLE_Y,
748			  SWIZZLE_ONE,
749			  SWIZZLE_ONE),
750	      src_undef());
751
752      coord = src_reg_from_dst(tmpcoord);
753   }
754   else {
755      coord = inst->SrcReg[0];
756   }
757
758   /* Need to emit YUV texture conversions by hand.  Probably need to
759    * do this here - the alternative is in brw_wm_emit.c, but the
760    * conversion requires allocating a temporary variable which we
761    * don't have the facility to do that late in the compilation.
762    */
763   if (c->key.yuvtex_mask & (1 << unit)) {
764      /* convert ycbcr to RGBA */
765      GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
766
767      /*
768	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
769	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
770	 UYV     = TEX ...
771	 UYV.xyz = ADD UYV,     C0
772	 UYV.y   = MUL UYV.y,   C0.w
773 	 if (UV swaped)
774	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
775	 else
776	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y
777	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
778      */
779      struct prog_dst_register dst = inst->DstReg;
780      struct prog_dst_register tmp = get_temp(c);
781      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
782      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
783      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
784
785      /* tmp     = TEX ...
786       */
787      emit_tex_op(c,
788                  OPCODE_TEX,
789                  tmp,
790                  inst->SaturateMode,
791                  unit,
792                  inst->TexSrcTarget,
793                  inst->TexShadow,
794                  coord,
795                  src_undef(),
796                  src_undef());
797
798      /* tmp.xyz =  ADD TMP, C0
799       */
800      emit_op(c,
801	      OPCODE_ADD,
802	      dst_mask(tmp, WRITEMASK_XYZ),
803	      0,
804	      tmpsrc,
805	      C0,
806	      src_undef());
807
808      /* YUV.y   = MUL YUV.y, C0.w
809       */
810
811      emit_op(c,
812	      OPCODE_MUL,
813	      dst_mask(tmp, WRITEMASK_Y),
814	      0,
815	      tmpsrc,
816	      src_swizzle1(C0, W),
817	      src_undef());
818
819      /*
820       * if (UV swaped)
821       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
822       * else
823       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
824       */
825
826      emit_op(c,
827	      OPCODE_MAD,
828	      dst_mask(dst, WRITEMASK_XYZ),
829	      0,
830	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
831	      C1,
832	      src_swizzle1(tmpsrc, Y));
833
834      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
835       */
836      emit_op(c,
837	      OPCODE_MAD,
838	      dst_mask(dst, WRITEMASK_Y),
839	      0,
840	      src_swizzle1(tmpsrc, Z),
841	      src_swizzle1(C1, W),
842	      src_swizzle1(src_reg_from_dst(dst), Y));
843
844      release_temp(c, tmp);
845   }
846   else {
847      /* ordinary RGBA tex instruction */
848      emit_tex_op(c,
849                  OPCODE_TEX,
850                  inst->DstReg,
851                  inst->SaturateMode,
852                  unit,
853                  inst->TexSrcTarget,
854                  inst->TexShadow,
855                  coord,
856                  src_undef(),
857                  src_undef());
858   }
859
860   /* For GL_EXT_texture_swizzle: */
861   if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
862      /* swizzle the result of the TEX instruction */
863      struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
864      emit_op(c, OPCODE_SWZ,
865              inst->DstReg,
866              SATURATE_OFF, /* saturate already done above */
867              src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
868              src_undef(),
869              src_undef());
870   }
871
872   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
873       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
874      release_temp(c, tmpcoord);
875}
876
877
878/**
879 * Check if the given TXP instruction really needs the divide-by-W step.
880 */
881static GLboolean projtex( struct brw_wm_compile *c,
882			  const struct prog_instruction *inst )
883{
884   const struct prog_src_register src = inst->SrcReg[0];
885   GLboolean retVal;
886
887   assert(inst->Opcode == OPCODE_TXP);
888
889   /* Only try to detect the simplest cases.  Could detect (later)
890    * cases where we are trying to emit code like RCP {1.0}, MUL x,
891    * {1.0}, and so on.
892    *
893    * More complex cases than this typically only arise from
894    * user-provided fragment programs anyway:
895    */
896   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
897      retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
898   else if (src.File == PROGRAM_INPUT &&
899	    GET_SWZ(src.Swizzle, W) == W &&
900            (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
901      retVal = GL_FALSE;
902   else
903      retVal = GL_TRUE;
904
905   return retVal;
906}
907
908
909/**
910 * Emit code for TXP.
911 */
912static void precalc_txp( struct brw_wm_compile *c,
913			       const struct prog_instruction *inst )
914{
915   struct prog_src_register src0 = inst->SrcReg[0];
916
917   if (projtex(c, inst)) {
918      struct prog_dst_register tmp = get_temp(c);
919      struct prog_instruction tmp_inst;
920
921      /* tmp0.w = RCP inst.arg[0][3]
922       */
923      emit_op(c,
924	      OPCODE_RCP,
925	      dst_mask(tmp, WRITEMASK_W),
926	      0,
927	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
928	      src_undef(),
929	      src_undef());
930
931      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
932       */
933      emit_op(c,
934	      OPCODE_MUL,
935	      dst_mask(tmp, WRITEMASK_XYZ),
936	      0,
937	      src0,
938	      src_swizzle1(src_reg_from_dst(tmp), W),
939	      src_undef());
940
941      /* dst = precalc(TEX tmp0)
942       */
943      tmp_inst = *inst;
944      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
945      precalc_tex(c, &tmp_inst);
946
947      release_temp(c, tmp);
948   }
949   else
950   {
951      /* dst = precalc(TEX src0)
952       */
953      precalc_tex(c, inst);
954   }
955}
956
957
958
959static void emit_render_target_writes( struct brw_wm_compile *c )
960{
961   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
962   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
963   struct prog_src_register outcolor;
964   GLuint i;
965
966   struct prog_instruction *inst, *last_inst;
967
968   /* The inst->Aux field is used for FB write target and the EOT marker */
969
970   if (c->key.nr_color_regions > 1) {
971      for (i = 0 ; i < c->key.nr_color_regions; i++) {
972         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
973         last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
974                                    0, outcolor, payload_r0_depth, outdepth);
975         inst->Aux = INST_AUX_TARGET(i);
976         if (c->fp_fragcolor_emitted) {
977            outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
978            last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
979                                       0, outcolor, payload_r0_depth, outdepth);
980            inst->Aux = INST_AUX_TARGET(i);
981         }
982      }
983      last_inst->Aux |= INST_AUX_EOT;
984   }
985   else {
986      /* if gl_FragData[0] is written, use it, else use gl_FragColor */
987      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
988         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
989      else
990         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
991
992      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
993                     0, outcolor, payload_r0_depth, outdepth);
994      inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0);
995   }
996}
997
998
999
1000
1001/***********************************************************************
1002 * Emit INTERP instructions ahead of first use of each attrib.
1003 */
1004
1005static void validate_src_regs( struct brw_wm_compile *c,
1006			       const struct prog_instruction *inst )
1007{
1008   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1009   GLuint i;
1010
1011   for (i = 0; i < nr_args; i++) {
1012      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1013	 GLuint idx = inst->SrcReg[i].Index;
1014	 if (!(c->fp_interp_emitted & (1<<idx))) {
1015	    emit_interp(c, idx);
1016	 }
1017      }
1018   }
1019}
1020
1021static void validate_dst_regs( struct brw_wm_compile *c,
1022			       const struct prog_instruction *inst )
1023{
1024   if (inst->DstReg.File == PROGRAM_OUTPUT) {
1025      GLuint idx = inst->DstReg.Index;
1026      if (idx == FRAG_RESULT_COLOR)
1027         c->fp_fragcolor_emitted = 1;
1028   }
1029}
1030
1031static void print_insns( const struct prog_instruction *insn,
1032			 GLuint nr )
1033{
1034   GLuint i;
1035   for (i = 0; i < nr; i++, insn++) {
1036      _mesa_printf("%3d: ", i);
1037      if (insn->Opcode < MAX_OPCODE)
1038	 _mesa_print_instruction(insn);
1039      else if (insn->Opcode < MAX_WM_OPCODE) {
1040	 GLuint idx = insn->Opcode - MAX_OPCODE;
1041
1042	 _mesa_print_alu_instruction(insn,
1043				     wm_opcode_strings[idx],
1044				     3);
1045      }
1046      else
1047	 _mesa_printf("965 Opcode %d\n", insn->Opcode);
1048   }
1049}
1050
1051
1052/**
1053 * Initial pass for fragment program code generation.
1054 * This function is used by both the GLSL and non-GLSL paths.
1055 */
1056void brw_wm_pass_fp( struct brw_wm_compile *c )
1057{
1058   struct brw_fragment_program *fp = c->fp;
1059   GLuint insn;
1060
1061   if (INTEL_DEBUG & DEBUG_WM) {
1062      _mesa_printf("pre-fp:\n");
1063      _mesa_print_program(&fp->program.Base);
1064      _mesa_printf("\n");
1065   }
1066
1067   c->pixel_xy = src_undef();
1068   c->delta_xy = src_undef();
1069   c->pixel_w = src_undef();
1070   c->nr_fp_insns = 0;
1071   c->fp->tex_units_used = 0x0;
1072
1073   /* Emit preamble instructions.  This is where special instructions such as
1074    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1075    * compute shader inputs from varying vars.
1076    */
1077   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1078      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1079      validate_src_regs(c, inst);
1080      validate_dst_regs(c, inst);
1081   }
1082
1083   /* Loop over all instructions doing assorted simplifications and
1084    * transformations.
1085    */
1086   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1087      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1088      struct prog_instruction *out;
1089
1090      /* Check for INPUT values, emit INTERP instructions where
1091       * necessary:
1092       */
1093
1094      switch (inst->Opcode) {
1095      case OPCODE_SWZ:
1096	 out = emit_insn(c, inst);
1097	 out->Opcode = OPCODE_MOV;
1098	 break;
1099
1100      case OPCODE_ABS:
1101	 out = emit_insn(c, inst);
1102	 out->Opcode = OPCODE_MOV;
1103	 out->SrcReg[0].Negate = NEGATE_NONE;
1104	 out->SrcReg[0].Abs = 1;
1105	 break;
1106
1107      case OPCODE_SUB:
1108	 out = emit_insn(c, inst);
1109	 out->Opcode = OPCODE_ADD;
1110	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1111	 break;
1112
1113      case OPCODE_SCS:
1114	 out = emit_insn(c, inst);
1115	 /* This should probably be done in the parser.
1116	  */
1117	 out->DstReg.WriteMask &= WRITEMASK_XY;
1118	 break;
1119
1120      case OPCODE_DST:
1121	 precalc_dst(c, inst);
1122	 break;
1123
1124      case OPCODE_LIT:
1125	 precalc_lit(c, inst);
1126	 break;
1127
1128      case OPCODE_TEX:
1129	 precalc_tex(c, inst);
1130	 break;
1131
1132      case OPCODE_TXP:
1133	 precalc_txp(c, inst);
1134	 break;
1135
1136      case OPCODE_TXB:
1137	 out = emit_insn(c, inst);
1138	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1139         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1140	 break;
1141
1142      case OPCODE_XPD:
1143	 out = emit_insn(c, inst);
1144	 /* This should probably be done in the parser.
1145	  */
1146	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1147	 break;
1148
1149      case OPCODE_KIL:
1150	 out = emit_insn(c, inst);
1151	 /* This should probably be done in the parser.
1152	  */
1153	 out->DstReg.WriteMask = 0;
1154	 break;
1155      case OPCODE_END:
1156	 emit_render_target_writes(c);
1157	 break;
1158      case OPCODE_PRINT:
1159	 break;
1160      default:
1161	 if (brw_wm_is_scalar_result(inst->Opcode))
1162	    emit_scalar_insn(c, inst);
1163	 else
1164	    emit_insn(c, inst);
1165	 break;
1166      }
1167   }
1168
1169   if (INTEL_DEBUG & DEBUG_WM) {
1170      _mesa_printf("pass_fp:\n");
1171      print_insns( c->prog_instructions, c->nr_fp_insns );
1172      _mesa_printf("\n");
1173   }
1174}
1175
1176