s_atifragshader.c revision b30898f4ab533085d97a33638ad0a1cf9ddb1d67
1/*
2 * Copyright (C) 2004  David Airlie   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22#include "main/glheader.h"
23#include "main/colormac.h"
24#include "main/context.h"
25#include "main/macros.h"
26#include "shader/atifragshader.h"
27#include "swrast/s_atifragshader.h"
28
29
30/**
31 * State for executing ATI fragment shader.
32 */
33struct atifs_machine
34{
35   GLfloat Registers[6][4];         /** six temporary registers */
36   GLfloat PrevPassRegisters[6][4];
37   GLfloat Inputs[2][4];   /** Primary, secondary input colors */
38};
39
40
41
42/**
43 * Fetch a texel.
44 */
45static void
46fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
47	    GLuint unit, GLfloat color[4])
48{
49   SWcontext *swrast = SWRAST_CONTEXT(ctx);
50
51   /* XXX use a float-valued TextureSample routine here!!! */
52   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
53                               1, (const GLfloat(*)[4]) texcoord,
54                               &lambda, (GLfloat (*)[4]) color);
55}
56
57static void
58apply_swizzle(GLfloat values[4], GLuint swizzle)
59{
60   GLfloat s, t, r, q;
61
62   s = values[0];
63   t = values[1];
64   r = values[2];
65   q = values[3];
66
67   switch (swizzle) {
68   case GL_SWIZZLE_STR_ATI:
69      values[0] = s;
70      values[1] = t;
71      values[2] = r;
72      break;
73   case GL_SWIZZLE_STQ_ATI:
74      values[0] = s;
75      values[1] = t;
76      values[2] = q;
77      break;
78   case GL_SWIZZLE_STR_DR_ATI:
79      values[0] = s / r;
80      values[1] = t / r;
81      values[2] = 1 / r;
82      break;
83   case GL_SWIZZLE_STQ_DQ_ATI:
84/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
85      if (q == 0.0F)
86         q = 0.000000001F;
87      values[0] = s / q;
88      values[1] = t / q;
89      values[2] = 1.0F / q;
90      break;
91   }
92   values[3] = 0.0;
93}
94
95static void
96apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
97{
98   GLint i;
99   GLint start, end;
100   if (!rep)
101      return;
102
103   start = optype ? 3 : 0;
104   end = 4;
105
106   for (i = start; i < end; i++) {
107      switch (rep) {
108      case GL_RED:
109	 val[i] = val[0];
110	 break;
111      case GL_GREEN:
112	 val[i] = val[1];
113	 break;
114      case GL_BLUE:
115	 val[i] = val[2];
116	 break;
117      case GL_ALPHA:
118	 val[i] = val[3];
119	 break;
120      }
121   }
122}
123
124static void
125apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
126{
127   GLint i;
128   GLint start, end;
129
130   if (!mod)
131      return;
132
133   start = optype ? 3 : 0;
134   end = 4;
135
136   for (i = start; i < end; i++) {
137      if (mod & GL_COMP_BIT_ATI)
138	 val[i] = 1 - val[i];
139
140      if (mod & GL_BIAS_BIT_ATI)
141	 val[i] = val[i] - 0.5F;
142
143      if (mod & GL_2X_BIT_ATI)
144	 val[i] = 2 * val[i];
145
146      if (mod & GL_NEGATE_BIT_ATI)
147	 val[i] = -val[i];
148   }
149}
150
151static void
152apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
153{
154   GLint i;
155   GLint has_sat = mod & GL_SATURATE_BIT_ATI;
156   GLint start, end;
157
158   mod &= ~GL_SATURATE_BIT_ATI;
159
160   start = optype ? 3 : 0;
161   end = optype ? 4 : 3;
162
163   for (i = start; i < end; i++) {
164      switch (mod) {
165      case GL_2X_BIT_ATI:
166	 val[i] = 2 * val[i];
167	 break;
168      case GL_4X_BIT_ATI:
169	 val[i] = 4 * val[i];
170	 break;
171      case GL_8X_BIT_ATI:
172	 val[i] = 8 * val[i];
173	 break;
174      case GL_HALF_BIT_ATI:
175	 val[i] = val[i] * 0.5F;
176	 break;
177      case GL_QUARTER_BIT_ATI:
178	 val[i] = val[i] * 0.25F;
179	 break;
180      case GL_EIGHTH_BIT_ATI:
181	 val[i] = val[i] * 0.125F;
182	 break;
183      }
184
185      if (has_sat) {
186	 if (val[i] < 0.0F)
187	    val[i] = 0.0F;
188	 else if (val[i] > 1.0F)
189	    val[i] = 1.0F;
190      }
191      else {
192	 if (val[i] < -8.0F)
193	    val[i] = -8.0F;
194	 else if (val[i] > 8.0F)
195	    val[i] = 8.0F;
196      }
197   }
198}
199
200
201static void
202write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
203	       GLfloat * dst)
204{
205   GLint i;
206   apply_dst_mod(optype, mod, src);
207
208   if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
209      if (mask) {
210	 if (mask & GL_RED_BIT_ATI)
211	    dst[0] = src[0];
212
213	 if (mask & GL_GREEN_BIT_ATI)
214	    dst[1] = src[1];
215
216	 if (mask & GL_BLUE_BIT_ATI)
217	    dst[2] = src[2];
218      }
219      else {
220	 for (i = 0; i < 3; i++)
221	    dst[i] = src[i];
222      }
223   }
224   else
225      dst[3] = src[3];
226}
227
228static void
229finish_pass(struct atifs_machine *machine)
230{
231   GLint i;
232
233   for (i = 0; i < 6; i++) {
234      COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
235   }
236}
237
238struct ati_fs_opcode_st ati_fs_opcodes[] = {
239   {GL_ADD_ATI, 2},
240   {GL_SUB_ATI, 2},
241   {GL_MUL_ATI, 2},
242   {GL_MAD_ATI, 3},
243   {GL_LERP_ATI, 3},
244   {GL_MOV_ATI, 1},
245   {GL_CND_ATI, 3},
246   {GL_CND0_ATI, 3},
247   {GL_DOT2_ADD_ATI, 3},
248   {GL_DOT3_ATI, 2},
249   {GL_DOT4_ATI, 2}
250};
251
252
253
254static void
255handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
256	       const SWspan *span, GLuint column, GLuint idx)
257{
258   GLuint swizzle = texinst->swizzle;
259   GLuint pass_tex = texinst->src;
260
261   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
262      pass_tex -= GL_TEXTURE0_ARB;
263      COPY_4V(machine->Registers[idx],
264	      span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
265   }
266   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
267      pass_tex -= GL_REG_0_ATI;
268      COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
269   }
270   apply_swizzle(machine->Registers[idx], swizzle);
271
272}
273
274static void
275handle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
276		 struct atifs_setupinst *texinst, const SWspan *span,
277		 GLuint column, GLuint idx)
278{
279/* sample from unit idx using texinst->src as coords */
280   GLuint swizzle = texinst->swizzle;
281   GLuint coord_source = texinst->src;
282   GLfloat tex_coords[4] = { 0 };
283
284   if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
285      coord_source -= GL_TEXTURE0_ARB;
286      COPY_4V(tex_coords,
287              span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
288   }
289   else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
290      coord_source -= GL_REG_0_ATI;
291      COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
292   }
293   apply_swizzle(tex_coords, swizzle);
294   fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
295}
296
297#define SETUP_SRC_REG(optype, i, x)		\
298do {						\
299   COPY_4V(src[optype][i], x); 			\
300} while (0)
301
302
303
304/**
305 * Execute the given fragment shader.
306 * NOTE: we do everything in single-precision floating point
307 * \param ctx - rendering context
308 * \param shader - the shader to execute
309 * \param machine - virtual machine state
310 * \param span - the SWspan we're operating on
311 * \param column - which pixel [i] we're operating on in the span
312 */
313static void
314execute_shader(GLcontext *ctx, const struct ati_fragment_shader *shader,
315	       struct atifs_machine *machine, const SWspan *span,
316               GLuint column)
317{
318   GLuint pc;
319   struct atifs_instruction *inst;
320   struct atifs_setupinst *texinst;
321   GLint optype;
322   GLuint i;
323   GLint j, pass;
324   GLint dstreg;
325   GLfloat src[2][3][4];
326   GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
327   GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
328   GLfloat dst[2][4], *dstp;
329
330   for (pass = 0; pass < shader->NumPasses; pass++) {
331      if (pass > 0)
332	 finish_pass(machine);
333      for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
334	 texinst = &shader->SetupInst[pass][j];
335	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
336	    handle_pass_op(machine, texinst, span, column, j);
337	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
338	    handle_sample_op(ctx, machine, texinst, span, column, j);
339      }
340
341      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
342	 inst = &shader->Instructions[pass][pc];
343
344	 /* setup the source registers for color and alpha ops */
345	 for (optype = 0; optype < 2; optype++) {
346 	    for (i = 0; i < inst->ArgCount[optype]; i++) {
347	       GLint index = inst->SrcReg[optype][i].Index;
348
349	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
350		  SETUP_SRC_REG(optype, i,
351				machine->Registers[index - GL_REG_0_ATI]);
352	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
353		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
354		     SETUP_SRC_REG(optype, i,
355				shader->Constants[index - GL_CON_0_ATI]);
356		  } else {
357		     SETUP_SRC_REG(optype, i,
358				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
359		  }
360	       }
361	       else if (index == GL_ONE)
362		  SETUP_SRC_REG(optype, i, ones);
363	       else if (index == GL_ZERO)
364		  SETUP_SRC_REG(optype, i, zeros);
365	       else if (index == GL_PRIMARY_COLOR_EXT)
366		  SETUP_SRC_REG(optype, i,
367				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
368	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
369		  SETUP_SRC_REG(optype, i,
370				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
371
372	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
373			     src[optype][i]);
374	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
375			     src[optype][i]);
376	    }
377	 }
378
379	 /* Execute the operations - color then alpha */
380	 for (optype = 0; optype < 2; optype++) {
381	    if (inst->Opcode[optype]) {
382	       switch (inst->Opcode[optype]) {
383	       case GL_ADD_ATI:
384		  if (!optype)
385		     for (i = 0; i < 3; i++) {
386			dst[optype][i] =
387			   src[optype][0][i] + src[optype][1][i];
388		     }
389		  else
390		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
391		  break;
392	       case GL_SUB_ATI:
393		  if (!optype)
394		     for (i = 0; i < 3; i++) {
395			dst[optype][i] =
396			   src[optype][0][i] - src[optype][1][i];
397		     }
398		  else
399		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
400		  break;
401	       case GL_MUL_ATI:
402		  if (!optype)
403		     for (i = 0; i < 3; i++) {
404			dst[optype][i] =
405			   src[optype][0][i] * src[optype][1][i];
406		     }
407		  else
408		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
409		  break;
410	       case GL_MAD_ATI:
411		  if (!optype)
412		     for (i = 0; i < 3; i++) {
413			dst[optype][i] =
414			   src[optype][0][i] * src[optype][1][i] +
415			   src[optype][2][i];
416		     }
417		  else
418		     dst[optype][3] =
419			src[optype][0][3] * src[optype][1][3] +
420			src[optype][2][3];
421		  break;
422	       case GL_LERP_ATI:
423		  if (!optype)
424		     for (i = 0; i < 3; i++) {
425			dst[optype][i] =
426			   src[optype][0][i] * src[optype][1][i] + (1 -
427								    src
428								    [optype]
429								    [0][i]) *
430			   src[optype][2][i];
431		     }
432		  else
433		     dst[optype][3] =
434			src[optype][0][3] * src[optype][1][3] + (1 -
435								 src[optype]
436								 [0][3]) *
437			src[optype][2][3];
438		  break;
439
440	       case GL_MOV_ATI:
441		  if (!optype)
442		     for (i = 0; i < 3; i++) {
443			dst[optype][i] = src[optype][0][i];
444		     }
445		  else
446		     dst[optype][3] = src[optype][0][3];
447		  break;
448	       case GL_CND_ATI:
449		  if (!optype) {
450		     for (i = 0; i < 3; i++) {
451			dst[optype][i] =
452			   (src[optype][2][i] >
453			    0.5) ? src[optype][0][i] : src[optype][1][i];
454		     }
455		  }
456		  else {
457		     dst[optype][3] =
458			(src[optype][2][3] >
459			 0.5) ? src[optype][0][3] : src[optype][1][3];
460		  }
461		  break;
462
463	       case GL_CND0_ATI:
464		  if (!optype)
465		     for (i = 0; i < 3; i++) {
466			dst[optype][i] =
467			   (src[optype][2][i] >=
468			    0) ? src[optype][0][i] : src[optype][1][i];
469		     }
470		  else {
471		     dst[optype][3] =
472			(src[optype][2][3] >=
473			 0) ? src[optype][0][3] : src[optype][1][3];
474		  }
475		  break;
476	       case GL_DOT2_ADD_ATI:
477		  {
478		     GLfloat result;
479
480		     /* DOT 2 always uses the source from the color op */
481		     /* could save recalculation of dot products for alpha inst */
482		     result = src[0][0][0] * src[0][1][0] +
483			src[0][0][1] * src[0][1][1] + src[0][2][2];
484		     if (!optype) {
485			for (i = 0; i < 3; i++) {
486			   dst[optype][i] = result;
487			}
488		     }
489		     else
490			dst[optype][3] = result;
491		  }
492		  break;
493	       case GL_DOT3_ATI:
494		  {
495		     GLfloat result;
496
497		     /* DOT 3 always uses the source from the color op */
498		     result = src[0][0][0] * src[0][1][0] +
499			src[0][0][1] * src[0][1][1] +
500			src[0][0][2] * src[0][1][2];
501
502		     if (!optype) {
503			for (i = 0; i < 3; i++) {
504			   dst[optype][i] = result;
505			}
506		     }
507		     else
508			dst[optype][3] = result;
509		  }
510		  break;
511	       case GL_DOT4_ATI:
512		  {
513		     GLfloat result;
514
515		     /* DOT 4 always uses the source from the color op */
516		     result = src[0][0][0] * src[0][1][0] +
517			src[0][0][1] * src[0][1][1] +
518			src[0][0][2] * src[0][1][2] +
519			src[0][0][3] * src[0][1][3];
520		     if (!optype) {
521			for (i = 0; i < 3; i++) {
522			   dst[optype][i] = result;
523			}
524		     }
525		     else
526			dst[optype][3] = result;
527		  }
528		  break;
529
530	       }
531	    }
532	 }
533
534	 /* write out the destination registers */
535	 for (optype = 0; optype < 2; optype++) {
536	    if (inst->Opcode[optype]) {
537	       dstreg = inst->DstReg[optype].Index;
538	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
539
540	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
541		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
542	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
543			      inst->DstReg[optype].dstMask, dst[optype],
544			      dstp);
545	       else
546		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
547	    }
548	 }
549      }
550   }
551}
552
553
554/**
555 * Init fragment shader virtual machine state.
556 */
557static void
558init_machine(GLcontext * ctx, struct atifs_machine *machine,
559	     const struct ati_fragment_shader *shader,
560	     const SWspan *span, GLuint col)
561{
562   GLfloat (*inputs)[4] = machine->Inputs;
563   GLint i, j;
564
565   for (i = 0; i < 6; i++) {
566      for (j = 0; j < 4; j++)
567	 machine->Registers[i][j] = 0.0;
568   }
569
570   COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
571   COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
572}
573
574
575
576/**
577 * Execute the current ATI shader program, operating on the given span.
578 */
579void
580_swrast_exec_fragment_shader(GLcontext * ctx, SWspan *span)
581{
582   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
583   struct atifs_machine machine;
584   GLuint i;
585
586   /* incoming colors should be floats */
587   ASSERT(span->array->ChanType == GL_FLOAT);
588
589   for (i = 0; i < span->end; i++) {
590      if (span->array->mask[i]) {
591	 init_machine(ctx, &machine, shader, span, i);
592
593	 execute_shader(ctx, shader, &machine, span, i);
594
595         /* store result color */
596	 {
597	    const GLfloat *colOut = machine.Registers[0];
598            /*fprintf(stderr,"outputs %f %f %f %f\n",
599              colOut[0], colOut[1], colOut[2], colOut[3]); */
600            COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
601	 }
602      }
603   }
604}
605