s_atifragshader.c revision 55187ea63e980b32c7a701855571332f4357d634
17f752fed993e5e9423abac200dd59141edbada56Dave Airlie/*
27f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
37f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Copyright (C) 2004  David Airlie   All Rights Reserved.
47f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
57f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Permission is hereby granted, free of charge, to any person obtaining a
67f752fed993e5e9423abac200dd59141edbada56Dave Airlie * copy of this software and associated documentation files (the "Software"),
77f752fed993e5e9423abac200dd59141edbada56Dave Airlie * to deal in the Software without restriction, including without limitation
87f752fed993e5e9423abac200dd59141edbada56Dave Airlie * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97f752fed993e5e9423abac200dd59141edbada56Dave Airlie * and/or sell copies of the Software, and to permit persons to whom the
107f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Software is furnished to do so, subject to the following conditions:
117f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
127f752fed993e5e9423abac200dd59141edbada56Dave Airlie * The above copyright notice and this permission notice shall be included
137f752fed993e5e9423abac200dd59141edbada56Dave Airlie * in all copies or substantial portions of the Software.
147f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
157f752fed993e5e9423abac200dd59141edbada56Dave Airlie * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
167f752fed993e5e9423abac200dd59141edbada56Dave Airlie * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177f752fed993e5e9423abac200dd59141edbada56Dave Airlie * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187f752fed993e5e9423abac200dd59141edbada56Dave Airlie * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
197f752fed993e5e9423abac200dd59141edbada56Dave Airlie * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
207f752fed993e5e9423abac200dd59141edbada56Dave Airlie * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
217f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
227f752fed993e5e9423abac200dd59141edbada56Dave Airlie
237f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "glheader.h"
247f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "colormac.h"
257f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "context.h"
267f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "atifragshader.h"
277f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "macros.h"
287f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "program.h"
297f752fed993e5e9423abac200dd59141edbada56Dave Airlie
306c58ddae17f63cd14b10d9022baee2ca6346fad2Brian Paul#include "s_atifragshader.h"
317f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "s_nvfragprog.h"
327f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "s_span.h"
3355187ea63e980b32c7a701855571332f4357d634Brian Paul
347f752fed993e5e9423abac200dd59141edbada56Dave Airlie
357f752fed993e5e9423abac200dd59141edbada56Dave Airlie/**
367f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Fetch a texel.
377f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
387f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
397f752fed993e5e9423abac200dd59141edbada56Dave Airliefetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
407f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    GLuint unit, GLfloat color[4])
417f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
427f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLchan rgba[4];
437f752fed993e5e9423abac200dd59141edbada56Dave Airlie   SWcontext *swrast = SWRAST_CONTEXT(ctx);
447f752fed993e5e9423abac200dd59141edbada56Dave Airlie
457f752fed993e5e9423abac200dd59141edbada56Dave Airlie   /* XXX use a float-valued TextureSample routine here!!! */
467f752fed993e5e9423abac200dd59141edbada56Dave Airlie   swrast->TextureSample[unit] (ctx, unit, ctx->Texture.Unit[unit]._Current,
477f752fed993e5e9423abac200dd59141edbada56Dave Airlie				1, (const GLfloat(*)[4]) texcoord,
487f752fed993e5e9423abac200dd59141edbada56Dave Airlie				&lambda, &rgba);
497f752fed993e5e9423abac200dd59141edbada56Dave Airlie   color[0] = CHAN_TO_FLOAT(rgba[0]);
507f752fed993e5e9423abac200dd59141edbada56Dave Airlie   color[1] = CHAN_TO_FLOAT(rgba[1]);
517f752fed993e5e9423abac200dd59141edbada56Dave Airlie   color[2] = CHAN_TO_FLOAT(rgba[2]);
527f752fed993e5e9423abac200dd59141edbada56Dave Airlie   color[3] = CHAN_TO_FLOAT(rgba[3]);
537f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
547f752fed993e5e9423abac200dd59141edbada56Dave Airlie
557f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
563c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheideggerapply_swizzle(GLfloat values[4], GLuint swizzle)
577f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
587f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat s, t, r, q;
597f752fed993e5e9423abac200dd59141edbada56Dave Airlie
603c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   s = values[0];
613c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   t = values[1];
623c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   r = values[2];
633c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   q = values[3];
647f752fed993e5e9423abac200dd59141edbada56Dave Airlie
657f752fed993e5e9423abac200dd59141edbada56Dave Airlie   switch (swizzle) {
667f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STR_ATI:
673c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s;
683c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t;
693c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = r;
707f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
717f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STQ_ATI:
723c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s;
733c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t;
743c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = q;
757f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
767f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STR_DR_ATI:
773c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s / r;
783c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t / r;
793c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = 1 / r;
807f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
817f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STQ_DQ_ATI:
823c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
833c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      if (q == 0.0F) q = 0.000000001;
843c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s / q;
853c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t / q;
863c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = 1 / q;
877f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
887f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
893c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   values[3] = 0.0;
907f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
917f752fed993e5e9423abac200dd59141edbada56Dave Airlie
927f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
937f752fed993e5e9423abac200dd59141edbada56Dave Airlieapply_src_rep(GLint optype, GLuint rep, GLfloat * val)
947f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
957f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
967f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint start, end;
977f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (!rep)
987f752fed993e5e9423abac200dd59141edbada56Dave Airlie      return;
997f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1007f752fed993e5e9423abac200dd59141edbada56Dave Airlie   start = optype ? 3 : 0;
101b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger   end = 4;
1027f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1037f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = start; i < end; i++) {
1047f752fed993e5e9423abac200dd59141edbada56Dave Airlie      switch (rep) {
1057f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_RED:
1067f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[0];
1077f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1087f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_GREEN:
1097f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[1];
1107f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1117f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_BLUE:
1127f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[2];
1137f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1147f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_ALPHA:
1157f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[3];
1167f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1177f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1187f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
1197f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
1207f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1217f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
1227f752fed993e5e9423abac200dd59141edbada56Dave Airlieapply_src_mod(GLint optype, GLuint mod, GLfloat * val)
1237f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
1247f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
1257f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint start, end;
1267f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1277f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (!mod)
1287f752fed993e5e9423abac200dd59141edbada56Dave Airlie      return;
1297f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1307f752fed993e5e9423abac200dd59141edbada56Dave Airlie   start = optype ? 3 : 0;
131b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger   end = 4;
1327f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1337f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = start; i < end; i++) {
1347f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_COMP_BIT_ATI)
1357f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 1 - val[i];
1367f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1377f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_BIAS_BIT_ATI)
1387f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[i] - 0.5;
1397f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1407f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_2X_BIT_ATI)
1417f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 2 * val[i];
1427f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1437f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_NEGATE_BIT_ATI)
1447f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = -val[i];
1457f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
1467f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
1477f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1487f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
1497f752fed993e5e9423abac200dd59141edbada56Dave Airlieapply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
1507f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
1517f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
1527f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint has_sat = mod & GL_SATURATE_BIT_ATI;
1537f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint start, end;
1547f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1557f752fed993e5e9423abac200dd59141edbada56Dave Airlie   mod &= ~GL_SATURATE_BIT_ATI;
1567f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1577f752fed993e5e9423abac200dd59141edbada56Dave Airlie   start = optype ? 3 : 0;
1587f752fed993e5e9423abac200dd59141edbada56Dave Airlie   end = optype ? 4 : 3;
1597f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1607f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = start; i < end; i++) {
1617f752fed993e5e9423abac200dd59141edbada56Dave Airlie      switch (mod) {
1627f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_2X_BIT_ATI:
1637f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 2 * val[i];
1647f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1657f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_4X_BIT_ATI:
1667f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 4 * val[i];
1677f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1687f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_8X_BIT_ATI:
1697f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 8 * val[i];
1707f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1717f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_HALF_BIT_ATI:
1727f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[i] * 0.5;
1737f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1747f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_QUARTER_BIT_ATI:
1757f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[i] * 0.25;
1767f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1777f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_EIGHTH_BIT_ATI:
1787f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[i] * 0.125;
1797f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1807f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1817f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1827f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (has_sat) {
1837f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (val[i] < 0.0)
1847f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    val[i] = 0;
1857f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 else if (val[i] > 1.0)
1867f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    val[i] = 1.0;
1877f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1887f752fed993e5e9423abac200dd59141edbada56Dave Airlie      else {
1897f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (val[i] < -8.0)
1907f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    val[i] = -8.0;
1917f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 else if (val[i] > 8.0)
1927f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    val[i] = 8.0;
1937f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1947f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
1957f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
1967f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1977f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1987f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
1997f752fed993e5e9423abac200dd59141edbada56Dave Airliewrite_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
2007f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       GLfloat * dst)
2017f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
2027f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
2037f752fed993e5e9423abac200dd59141edbada56Dave Airlie   apply_dst_mod(optype, mod, src);
2047f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2057f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
2067f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mask) {
2077f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (mask & GL_RED_BIT_ATI)
2087f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[0] = src[0];
2097f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2107f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (mask & GL_GREEN_BIT_ATI)
2117f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[1] = src[1];
2127f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2137f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (mask & GL_BLUE_BIT_ATI)
2147f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[2] = src[2];
2157f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
2167f752fed993e5e9423abac200dd59141edbada56Dave Airlie      else {
2177f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (i = 0; i < 3; i++)
2187f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[i] = src[i];
2197f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
2207f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2217f752fed993e5e9423abac200dd59141edbada56Dave Airlie   else
2227f752fed993e5e9423abac200dd59141edbada56Dave Airlie      dst[3] = src[3];
2237f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2247f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2256c58ddae17f63cd14b10d9022baee2ca6346fad2Brian Paulstatic void
2267f752fed993e5e9423abac200dd59141edbada56Dave Airliefinish_pass(struct atifs_machine *machine)
2277f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
2287f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
2297f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2307f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = 0; i < 6; i++) {
2317f752fed993e5e9423abac200dd59141edbada56Dave Airlie      COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
2327f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2337f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2347f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2357f752fed993e5e9423abac200dd59141edbada56Dave Airlie/**
2367f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Execute the given fragment shader
2377f752fed993e5e9423abac200dd59141edbada56Dave Airlie * NOTE: we do everything in single-precision floating point; we don't
2387f752fed993e5e9423abac200dd59141edbada56Dave Airlie * currently observe the single/half/fixed-precision qualifiers.
2397f752fed993e5e9423abac200dd59141edbada56Dave Airlie * \param ctx - rendering context
2407f752fed993e5e9423abac200dd59141edbada56Dave Airlie * \param program - the fragment program to execute
2417f752fed993e5e9423abac200dd59141edbada56Dave Airlie * \param machine - machine state (register file)
2427f752fed993e5e9423abac200dd59141edbada56Dave Airlie * \param maxInst - max number of instructions to execute
2437f752fed993e5e9423abac200dd59141edbada56Dave Airlie * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
2447f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
2457f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2467f752fed993e5e9423abac200dd59141edbada56Dave Airliestruct ati_fs_opcode_st ati_fs_opcodes[] = {
2477f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_ADD_ATI, 2},
2487f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_SUB_ATI, 2},
2497f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_MUL_ATI, 2},
2507f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_MAD_ATI, 3},
2517f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_LERP_ATI, 3},
2527f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_MOV_ATI, 1},
2537f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_CND_ATI, 3},
2547f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_CND0_ATI, 3},
2557f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_DOT2_ADD_ATI, 3},
2567f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_DOT3_ATI, 2},
2577f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_DOT4_ATI, 2}
2587f752fed993e5e9423abac200dd59141edbada56Dave Airlie};
2597f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2607f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2617f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2627f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
263f519a770d074dac9e188e3b450c828510506c46dRoland Scheideggerhandle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
264f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	       const struct sw_span *span, GLuint column, GLuint idx)
2657f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
266f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLuint swizzle = texinst->swizzle;
267f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLuint pass_tex = texinst->src;
268f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger
2697f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
2707f752fed993e5e9423abac200dd59141edbada56Dave Airlie      pass_tex -= GL_TEXTURE0_ARB;
2717f752fed993e5e9423abac200dd59141edbada56Dave Airlie      COPY_4V(machine->Registers[idx],
2727f752fed993e5e9423abac200dd59141edbada56Dave Airlie	      span->array->texcoords[pass_tex][column]);
2737f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2743c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
2757f752fed993e5e9423abac200dd59141edbada56Dave Airlie      pass_tex -= GL_REG_0_ATI;
2767f752fed993e5e9423abac200dd59141edbada56Dave Airlie      COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
2777f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2783c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   apply_swizzle(machine->Registers[idx], swizzle);
2797f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2807f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2817f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2827f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
2837f752fed993e5e9423abac200dd59141edbada56Dave Airliehandle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
284f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		 struct atifs_setupinst *texinst, const struct sw_span *span,
285f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		 GLuint column, GLuint idx)
2867f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
2873c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger/* sample from unit idx using texinst->src as coords */
288f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLuint swizzle = texinst->swizzle;
2893c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   GLuint coord_source = texinst->src;
2903c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   GLfloat tex_coords[4];
2917f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2923c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
2933c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      coord_source -= GL_TEXTURE0_ARB;
2943c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      COPY_4V(tex_coords, span->array->texcoords[coord_source][column]);
2957f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2963c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
2973c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      coord_source -= GL_REG_0_ATI;
2983c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
2997f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
3003c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   apply_swizzle(tex_coords, swizzle);
3013c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
3027f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
3037f752fed993e5e9423abac200dd59141edbada56Dave Airlie
304b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheidegger#define SETUP_SRC_REG(optype, i, x)		\
305b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheideggerdo {						\
306b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheidegger   COPY_4V(src[optype][i], x); 			\
307b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheidegger} while (0)
3087f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3097f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic GLboolean
3107f752fed993e5e9423abac200dd59141edbada56Dave Airlieexecute_shader(GLcontext * ctx,
3117f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       const struct ati_fragment_shader *shader, GLuint maxInst,
3127f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       struct atifs_machine *machine, const struct sw_span *span,
3137f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       GLuint column)
3147f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
3157f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLuint pc;
3167f752fed993e5e9423abac200dd59141edbada56Dave Airlie   struct atifs_instruction *inst;
317f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   struct atifs_setupinst *texinst;
3187f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint optype;
319f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLint i, j, pass;
3207f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint dstreg;
3217f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat src[2][3][4];
3227f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
3237f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
3247f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat dst[2][4], *dstp;
3257f752fed993e5e9423abac200dd59141edbada56Dave Airlie
326f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   for (pass = 0; pass < shader->NumPasses; pass++) {
327f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      if (pass > 0)
328f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 finish_pass(machine);
329f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
330f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 texinst = &shader->SetupInst[pass][j];
331f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
332f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	    handle_pass_op(machine, texinst, span, column, j);
333f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
334f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	    handle_sample_op(ctx, machine, texinst, span, column, j);
335f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      }
3367f752fed993e5e9423abac200dd59141edbada56Dave Airlie
337f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
338f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 inst = &shader->Instructions[pass][pc];
3397f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3407f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 /* setup the source registers for color and alpha ops */
3417f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (optype = 0; optype < 2; optype++) {
3427f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    for (i = 0; i < inst->ArgCount[optype]; i++) {
3437f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       GLint index = inst->SrcReg[optype][i].Index;
3447f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3457f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
3467f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i,
3477f752fed993e5e9423abac200dd59141edbada56Dave Airlie				machine->Registers[index - GL_REG_0_ATI]);
348f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
349f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		  if (shader->localConstDef & (1 << (index - GL_CON_0_ATI))) {
350f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		     SETUP_SRC_REG(optype, i,
3517f752fed993e5e9423abac200dd59141edbada56Dave Airlie				shader->Constants[index - GL_CON_0_ATI]);
352f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		  } else {
353f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		     SETUP_SRC_REG(optype, i,
354f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger				ctx->ATIFragmentShader.globalConstants[index - GL_CON_0_ATI]);
355f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		  }
356f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	       }
3577f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_ONE)
3587f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i, ones);
3597f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_ZERO)
3607f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i, zeros);
3617f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_PRIMARY_COLOR_EXT)
3627f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i,
3637f752fed993e5e9423abac200dd59141edbada56Dave Airlie				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
3647f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
3657f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i,
3667f752fed993e5e9423abac200dd59141edbada56Dave Airlie				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
3677f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3687f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
3697f752fed993e5e9423abac200dd59141edbada56Dave Airlie			     src[optype][i]);
3707f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
3717f752fed993e5e9423abac200dd59141edbada56Dave Airlie			     src[optype][i]);
3727f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    }
3737f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
3747f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3757f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 /* Execute the operations - color then alpha */
3767f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (optype = 0; optype < 2; optype++) {
3777f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    if (inst->Opcode[optype]) {
3787f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       switch (inst->Opcode[optype]) {
3797f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_ADD_ATI:
3807f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
3817f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
3827f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
3837f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] + src[optype][1][i];
3847f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
3857f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
3867f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
3877f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
3887f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_SUB_ATI:
3897f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
3907f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
3917f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
3927f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] - src[optype][1][i];
3937f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
3947f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
3957f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
3967f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
3977f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_MUL_ATI:
3987f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
3997f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4007f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4017f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] * src[optype][1][i];
4027f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4037f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4047f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
4057f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4067f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_MAD_ATI:
4077f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4087f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4097f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4107f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] * src[optype][1][i] +
4117f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][2][i];
4127f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4137f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4147f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4157f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][0][3] * src[optype][1][3] +
4167f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][2][3];
4177f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4187f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_LERP_ATI:
4197f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4207f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4217f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4227f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] * src[optype][1][i] + (1 -
4237f752fed993e5e9423abac200dd59141edbada56Dave Airlie								    src
4247f752fed993e5e9423abac200dd59141edbada56Dave Airlie								    [optype]
4257f752fed993e5e9423abac200dd59141edbada56Dave Airlie								    [0][i]) *
4267f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][2][i];
4277f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4287f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4297f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4307f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][0][3] * src[optype][1][3] + (1 -
4317f752fed993e5e9423abac200dd59141edbada56Dave Airlie								 src[optype]
4327f752fed993e5e9423abac200dd59141edbada56Dave Airlie								 [0][3]) *
4337f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][2][3];
4347f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4357f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4367f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_MOV_ATI:
4377f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4387f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4397f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] = src[optype][0][i];
4407f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4417f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4427f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3];
4437f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4447f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_CND_ATI:
4457f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype) {
4467f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4477f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4487f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   (src[optype][2][i] >
4497f752fed993e5e9423abac200dd59141edbada56Dave Airlie			    0.5) ? src[optype][0][i] : src[optype][1][i];
4507f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4517f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4527f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else {
4537f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4547f752fed993e5e9423abac200dd59141edbada56Dave Airlie			(src[optype][2][3] >
4557f752fed993e5e9423abac200dd59141edbada56Dave Airlie			 0.5) ? src[optype][0][3] : src[optype][1][3];
4567f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4577f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4587f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4597f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_CND0_ATI:
4607f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4617f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4627f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4637f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   (src[optype][2][i] >=
4647f752fed993e5e9423abac200dd59141edbada56Dave Airlie			    0) ? src[optype][0][i] : src[optype][1][i];
4657f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4667f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else {
4677f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4687f752fed993e5e9423abac200dd59141edbada56Dave Airlie			(src[optype][2][3] >=
4697f752fed993e5e9423abac200dd59141edbada56Dave Airlie			 0) ? src[optype][0][3] : src[optype][1][3];
4707f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4717f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4727f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_DOT2_ADD_ATI:
4737f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  {
4747f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     GLfloat result;
4757f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4767f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     /* DOT 2 always uses the source from the color op */
477b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		     /* could save recalculation of dot products for alpha inst */
4787f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     result = src[0][0][0] * src[0][1][0] +
4797f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][1] * src[0][1][1] + src[0][2][2];
4807f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     if (!optype) {
4817f752fed993e5e9423abac200dd59141edbada56Dave Airlie			for (i = 0; i < 3; i++) {
4827f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   dst[optype][i] = result;
4837f752fed993e5e9423abac200dd59141edbada56Dave Airlie			}
4847f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4857f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     else
4867f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][3] = result;
4877f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4887f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4897f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_DOT3_ATI:
4907f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  {
4917f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     GLfloat result;
4927f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4937f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     /* DOT 3 always uses the source from the color op */
4947f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     result = src[0][0][0] * src[0][1][0] +
4957f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][1] * src[0][1][1] +
4967f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][2] * src[0][1][2];
4977f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4987f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     if (!optype) {
4997f752fed993e5e9423abac200dd59141edbada56Dave Airlie			for (i = 0; i < 3; i++) {
5007f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   dst[optype][i] = result;
5017f752fed993e5e9423abac200dd59141edbada56Dave Airlie			}
5027f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
5037f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     else
5047f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][3] = result;
5057f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
5067f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
5077f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_DOT4_ATI:
5087f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  {
5097f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     GLfloat result;
5107f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5117f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     /* DOT 4 always uses the source from the color op */
512b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		     result = src[0][0][0] * src[0][1][0] +
5137f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][1] * src[0][1][1] +
5147f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][2] * src[0][1][2] +
5157f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][3] * src[0][1][3];
5167f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     if (!optype) {
5177f752fed993e5e9423abac200dd59141edbada56Dave Airlie			for (i = 0; i < 3; i++) {
5187f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   dst[optype][i] = result;
5197f752fed993e5e9423abac200dd59141edbada56Dave Airlie			}
5207f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
5217f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     else
5227f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][3] = result;
5237f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
5247f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
5257f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5267f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       }
5277f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    }
5287f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
5297f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5307f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 /* write out the destination registers */
5317f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (optype = 0; optype < 2; optype++) {
5327f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    if (inst->Opcode[optype]) {
5337f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       dstreg = inst->DstReg[optype].Index;
5347f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
5357f752fed993e5e9423abac200dd59141edbada56Dave Airlie
536b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
537b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
538b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
5397f752fed993e5e9423abac200dd59141edbada56Dave Airlie			      inst->DstReg[optype].dstMask, dst[optype],
5407f752fed993e5e9423abac200dd59141edbada56Dave Airlie			      dstp);
541b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger	       else
542b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
5437f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    }
5447f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
5457f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
5467f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
5477f752fed993e5e9423abac200dd59141edbada56Dave Airlie   return GL_TRUE;
5487f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
5497f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5507f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
5517f752fed993e5e9423abac200dd59141edbada56Dave Airlieinit_machine(GLcontext * ctx, struct atifs_machine *machine,
5527f752fed993e5e9423abac200dd59141edbada56Dave Airlie	     const struct ati_fragment_shader *shader,
5537f752fed993e5e9423abac200dd59141edbada56Dave Airlie	     const struct sw_span *span, GLuint col)
5547f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
5557f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i, j;
5567f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5577f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = 0; i < 6; i++) {
5587f752fed993e5e9423abac200dd59141edbada56Dave Airlie      for (j = 0; j < 4; j++)
5597f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 ctx->ATIFragmentShader.Machine.Registers[i][j] = 0.0;
5607f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5617f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
5627f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5637f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][0] =
5647f752fed993e5e9423abac200dd59141edbada56Dave Airlie      CHAN_TO_FLOAT(span->array->rgba[col][0]);
5657f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][1] =
5667f752fed993e5e9423abac200dd59141edbada56Dave Airlie      CHAN_TO_FLOAT(span->array->rgba[col][1]);
5677f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][2] =
5687f752fed993e5e9423abac200dd59141edbada56Dave Airlie      CHAN_TO_FLOAT(span->array->rgba[col][2]);
5697f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][3] =
5707f752fed993e5e9423abac200dd59141edbada56Dave Airlie      CHAN_TO_FLOAT(span->array->rgba[col][3]);
5717f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5727f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][0] =
5737f752fed993e5e9423abac200dd59141edbada56Dave Airlie      CHAN_TO_FLOAT(span->array->spec[col][0]);
5747f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][1] =
5757f752fed993e5e9423abac200dd59141edbada56Dave Airlie      CHAN_TO_FLOAT(span->array->spec[col][1]);
5767f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][2] =
5777f752fed993e5e9423abac200dd59141edbada56Dave Airlie      CHAN_TO_FLOAT(span->array->spec[col][2]);
5787f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][3] =
5797f752fed993e5e9423abac200dd59141edbada56Dave Airlie      CHAN_TO_FLOAT(span->array->spec[col][3]);
5807f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5817f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->ATIFragmentShader.Machine.pass = 0;
5827f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
5837f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5847f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5857f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5867f752fed993e5e9423abac200dd59141edbada56Dave Airlie/**
5877f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Execute the current fragment program, operating on the given span.
5887f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
5897f752fed993e5e9423abac200dd59141edbada56Dave Airlievoid
5907f752fed993e5e9423abac200dd59141edbada56Dave Airlie_swrast_exec_fragment_shader(GLcontext * ctx, struct sw_span *span)
5917f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
5927f752fed993e5e9423abac200dd59141edbada56Dave Airlie   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
5937f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLuint i;
5947f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5957f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI;
5967f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5977f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = 0; i < span->end; i++) {
5987f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (span->array->mask[i]) {
5997f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 init_machine(ctx, &ctx->ATIFragmentShader.Machine,
6007f752fed993e5e9423abac200dd59141edbada56Dave Airlie		      ctx->ATIFragmentShader.Current, span, i);
6017f752fed993e5e9423abac200dd59141edbada56Dave Airlie
6027f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (execute_shader(ctx, shader, ~0,
6037f752fed993e5e9423abac200dd59141edbada56Dave Airlie			    &ctx->ATIFragmentShader.Machine, span, i)) {
6047f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    span->array->mask[i] = GL_FALSE;
6057f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
6067f752fed993e5e9423abac200dd59141edbada56Dave Airlie
6077f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 {
6087f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    const GLfloat *colOut =
6097f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       ctx->ATIFragmentShader.Machine.Registers[0];
6107f752fed993e5e9423abac200dd59141edbada56Dave Airlie
6117f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    /*fprintf(stderr,"outputs %f %f %f %f\n", colOut[0], colOut[1], colOut[2], colOut[3]); */
6127f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
6137f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
6147f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
6157f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
6167f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
6177f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
6187f752fed993e5e9423abac200dd59141edbada56Dave Airlie
6197f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
6207f752fed993e5e9423abac200dd59141edbada56Dave Airlie
6217f752fed993e5e9423abac200dd59141edbada56Dave Airlie
6227f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->_CurrentProgram = 0;
6237f752fed993e5e9423abac200dd59141edbada56Dave Airlie
6247f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
625