s_atifragshader.c revision 7e85b0a025a82c3ffed060a757a3b4adae03d269
17f752fed993e5e9423abac200dd59141edbada56Dave Airlie/*
27f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
37f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Copyright (C) 2004  David Airlie   All Rights Reserved.
47f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
57f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Permission is hereby granted, free of charge, to any person obtaining a
67f752fed993e5e9423abac200dd59141edbada56Dave Airlie * copy of this software and associated documentation files (the "Software"),
77f752fed993e5e9423abac200dd59141edbada56Dave Airlie * to deal in the Software without restriction, including without limitation
87f752fed993e5e9423abac200dd59141edbada56Dave Airlie * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97f752fed993e5e9423abac200dd59141edbada56Dave Airlie * and/or sell copies of the Software, and to permit persons to whom the
107f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Software is furnished to do so, subject to the following conditions:
117f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
127f752fed993e5e9423abac200dd59141edbada56Dave Airlie * The above copyright notice and this permission notice shall be included
137f752fed993e5e9423abac200dd59141edbada56Dave Airlie * in all copies or substantial portions of the Software.
147f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
157f752fed993e5e9423abac200dd59141edbada56Dave Airlie * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
167f752fed993e5e9423abac200dd59141edbada56Dave Airlie * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177f752fed993e5e9423abac200dd59141edbada56Dave Airlie * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187f752fed993e5e9423abac200dd59141edbada56Dave Airlie * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
197f752fed993e5e9423abac200dd59141edbada56Dave Airlie * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
207f752fed993e5e9423abac200dd59141edbada56Dave Airlie * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
217f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
227f752fed993e5e9423abac200dd59141edbada56Dave Airlie
237f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "glheader.h"
247f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "colormac.h"
257f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "context.h"
267f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "atifragshader.h"
277f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "macros.h"
287f752fed993e5e9423abac200dd59141edbada56Dave Airlie#include "program.h"
297f752fed993e5e9423abac200dd59141edbada56Dave Airlie
306c58ddae17f63cd14b10d9022baee2ca6346fad2Brian Paul#include "s_atifragshader.h"
3155187ea63e980b32c7a701855571332f4357d634Brian Paul
327f752fed993e5e9423abac200dd59141edbada56Dave Airlie
337f752fed993e5e9423abac200dd59141edbada56Dave Airlie/**
34c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul * State for executing ATI fragment shader.
35c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul */
36c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paulstruct atifs_machine
37c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul{
38c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   GLfloat Registers[6][4];         /** six temporary registers */
39c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   GLfloat PrevPassRegisters[6][4];
40c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   GLfloat Inputs[2][4];   /** Primary, secondary input colors */
41c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul};
42c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul
43c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul
44c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul
45c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul/**
467f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Fetch a texel.
477f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
487f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
497f752fed993e5e9423abac200dd59141edbada56Dave Airliefetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
507f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    GLuint unit, GLfloat color[4])
517f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
527f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLchan rgba[4];
537f752fed993e5e9423abac200dd59141edbada56Dave Airlie   SWcontext *swrast = SWRAST_CONTEXT(ctx);
547f752fed993e5e9423abac200dd59141edbada56Dave Airlie
557f752fed993e5e9423abac200dd59141edbada56Dave Airlie   /* XXX use a float-valued TextureSample routine here!!! */
56aa8abf8081023c00469b6c88760ed0291033eb6eBrian Paul   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
57aa8abf8081023c00469b6c88760ed0291033eb6eBrian Paul                               1, (const GLfloat(*)[4]) texcoord,
58aa8abf8081023c00469b6c88760ed0291033eb6eBrian Paul                               &lambda, &rgba);
597f752fed993e5e9423abac200dd59141edbada56Dave Airlie   color[0] = CHAN_TO_FLOAT(rgba[0]);
607f752fed993e5e9423abac200dd59141edbada56Dave Airlie   color[1] = CHAN_TO_FLOAT(rgba[1]);
617f752fed993e5e9423abac200dd59141edbada56Dave Airlie   color[2] = CHAN_TO_FLOAT(rgba[2]);
627f752fed993e5e9423abac200dd59141edbada56Dave Airlie   color[3] = CHAN_TO_FLOAT(rgba[3]);
637f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
647f752fed993e5e9423abac200dd59141edbada56Dave Airlie
657f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
663c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheideggerapply_swizzle(GLfloat values[4], GLuint swizzle)
677f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
687f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat s, t, r, q;
697f752fed993e5e9423abac200dd59141edbada56Dave Airlie
703c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   s = values[0];
713c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   t = values[1];
723c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   r = values[2];
733c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   q = values[3];
747f752fed993e5e9423abac200dd59141edbada56Dave Airlie
757f752fed993e5e9423abac200dd59141edbada56Dave Airlie   switch (swizzle) {
767f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STR_ATI:
773c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s;
783c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t;
793c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = r;
807f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
817f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STQ_ATI:
823c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s;
833c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t;
843c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = q;
857f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
867f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STR_DR_ATI:
873c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s / r;
883c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t / r;
893c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = 1 / r;
907f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
917f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STQ_DQ_ATI:
923c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
933c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      if (q == 0.0F) q = 0.000000001;
943c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s / q;
953c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t / q;
963c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = 1 / q;
977f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
987f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
993c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   values[3] = 0.0;
1007f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
1017f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1027f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
1037f752fed993e5e9423abac200dd59141edbada56Dave Airlieapply_src_rep(GLint optype, GLuint rep, GLfloat * val)
1047f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
1057f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
1067f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint start, end;
1077f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (!rep)
1087f752fed993e5e9423abac200dd59141edbada56Dave Airlie      return;
1097f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1107f752fed993e5e9423abac200dd59141edbada56Dave Airlie   start = optype ? 3 : 0;
111b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger   end = 4;
1127f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1137f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = start; i < end; i++) {
1147f752fed993e5e9423abac200dd59141edbada56Dave Airlie      switch (rep) {
1157f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_RED:
1167f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[0];
1177f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1187f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_GREEN:
1197f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[1];
1207f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1217f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_BLUE:
1227f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[2];
1237f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1247f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_ALPHA:
1257f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[3];
1267f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1277f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1287f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
1297f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
1307f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1317f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
1327f752fed993e5e9423abac200dd59141edbada56Dave Airlieapply_src_mod(GLint optype, GLuint mod, GLfloat * val)
1337f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
1347f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
1357f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint start, end;
1367f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1377f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (!mod)
1387f752fed993e5e9423abac200dd59141edbada56Dave Airlie      return;
1397f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1407f752fed993e5e9423abac200dd59141edbada56Dave Airlie   start = optype ? 3 : 0;
141b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger   end = 4;
1427f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1437f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = start; i < end; i++) {
1447f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_COMP_BIT_ATI)
1457f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 1 - val[i];
1467f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1477f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_BIAS_BIT_ATI)
1487f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[i] - 0.5;
1497f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1507f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_2X_BIT_ATI)
1517f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 2 * val[i];
1527f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1537f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_NEGATE_BIT_ATI)
1547f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = -val[i];
1557f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
1567f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
1577f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1587f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
1597f752fed993e5e9423abac200dd59141edbada56Dave Airlieapply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
1607f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
1617f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
1627f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint has_sat = mod & GL_SATURATE_BIT_ATI;
1637f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint start, end;
1647f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1657f752fed993e5e9423abac200dd59141edbada56Dave Airlie   mod &= ~GL_SATURATE_BIT_ATI;
1667f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1677f752fed993e5e9423abac200dd59141edbada56Dave Airlie   start = optype ? 3 : 0;
1687f752fed993e5e9423abac200dd59141edbada56Dave Airlie   end = optype ? 4 : 3;
1697f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1707f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = start; i < end; i++) {
1717f752fed993e5e9423abac200dd59141edbada56Dave Airlie      switch (mod) {
1727f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_2X_BIT_ATI:
1737f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 2 * val[i];
1747f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1757f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_4X_BIT_ATI:
1767f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 4 * val[i];
1777f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1787f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_8X_BIT_ATI:
1797f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 8 * val[i];
1807f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1817f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_HALF_BIT_ATI:
1827f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[i] * 0.5;
1837f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1847f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_QUARTER_BIT_ATI:
1857f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[i] * 0.25;
1867f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1877f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_EIGHTH_BIT_ATI:
1887f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[i] * 0.125;
1897f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1907f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1917f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1927f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (has_sat) {
1937f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (val[i] < 0.0)
1947f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    val[i] = 0;
1957f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 else if (val[i] > 1.0)
1967f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    val[i] = 1.0;
1977f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1987f752fed993e5e9423abac200dd59141edbada56Dave Airlie      else {
1997f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (val[i] < -8.0)
2007f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    val[i] = -8.0;
2017f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 else if (val[i] > 8.0)
2027f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    val[i] = 8.0;
2037f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
2047f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2057f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2067f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2077f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2087f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
2097f752fed993e5e9423abac200dd59141edbada56Dave Airliewrite_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
2107f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       GLfloat * dst)
2117f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
2127f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
2137f752fed993e5e9423abac200dd59141edbada56Dave Airlie   apply_dst_mod(optype, mod, src);
2147f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2157f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
2167f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mask) {
2177f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (mask & GL_RED_BIT_ATI)
2187f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[0] = src[0];
2197f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2207f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (mask & GL_GREEN_BIT_ATI)
2217f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[1] = src[1];
2227f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2237f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (mask & GL_BLUE_BIT_ATI)
2247f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[2] = src[2];
2257f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
2267f752fed993e5e9423abac200dd59141edbada56Dave Airlie      else {
2277f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (i = 0; i < 3; i++)
2287f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[i] = src[i];
2297f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
2307f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2317f752fed993e5e9423abac200dd59141edbada56Dave Airlie   else
2327f752fed993e5e9423abac200dd59141edbada56Dave Airlie      dst[3] = src[3];
2337f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2347f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2356c58ddae17f63cd14b10d9022baee2ca6346fad2Brian Paulstatic void
2367f752fed993e5e9423abac200dd59141edbada56Dave Airliefinish_pass(struct atifs_machine *machine)
2377f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
2387f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
2397f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2407f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = 0; i < 6; i++) {
2417f752fed993e5e9423abac200dd59141edbada56Dave Airlie      COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
2427f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2437f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2447f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2457f752fed993e5e9423abac200dd59141edbada56Dave Airliestruct ati_fs_opcode_st ati_fs_opcodes[] = {
2467f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_ADD_ATI, 2},
2477f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_SUB_ATI, 2},
2487f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_MUL_ATI, 2},
2497f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_MAD_ATI, 3},
2507f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_LERP_ATI, 3},
2517f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_MOV_ATI, 1},
2527f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_CND_ATI, 3},
2537f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_CND0_ATI, 3},
2547f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_DOT2_ADD_ATI, 3},
2557f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_DOT3_ATI, 2},
2567f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_DOT4_ATI, 2}
2577f752fed993e5e9423abac200dd59141edbada56Dave Airlie};
2587f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2597f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2607f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2617f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
262f519a770d074dac9e188e3b450c828510506c46dRoland Scheideggerhandle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
263cdb27e8242215271364602995d85607cfc06d441Brian Paul	       const SWspan *span, GLuint column, GLuint idx)
2647f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
265f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLuint swizzle = texinst->swizzle;
266f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLuint pass_tex = texinst->src;
267f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger
2687f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
2697f752fed993e5e9423abac200dd59141edbada56Dave Airlie      pass_tex -= GL_TEXTURE0_ARB;
2707f752fed993e5e9423abac200dd59141edbada56Dave Airlie      COPY_4V(machine->Registers[idx],
2717f752fed993e5e9423abac200dd59141edbada56Dave Airlie	      span->array->texcoords[pass_tex][column]);
2727f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2733c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
2747f752fed993e5e9423abac200dd59141edbada56Dave Airlie      pass_tex -= GL_REG_0_ATI;
2757f752fed993e5e9423abac200dd59141edbada56Dave Airlie      COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
2767f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2773c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   apply_swizzle(machine->Registers[idx], swizzle);
2787f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2797f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2807f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2817f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
2827f752fed993e5e9423abac200dd59141edbada56Dave Airliehandle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
283cdb27e8242215271364602995d85607cfc06d441Brian Paul		 struct atifs_setupinst *texinst, const SWspan *span,
284f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		 GLuint column, GLuint idx)
2857f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
2863c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger/* sample from unit idx using texinst->src as coords */
287f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLuint swizzle = texinst->swizzle;
2883c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   GLuint coord_source = texinst->src;
2893c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   GLfloat tex_coords[4];
2907f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2913c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
2923c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      coord_source -= GL_TEXTURE0_ARB;
2933c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      COPY_4V(tex_coords, span->array->texcoords[coord_source][column]);
2947f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2953c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
2963c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      coord_source -= GL_REG_0_ATI;
2973c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
2987f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2993c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   apply_swizzle(tex_coords, swizzle);
3003c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
3017f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
3027f752fed993e5e9423abac200dd59141edbada56Dave Airlie
303b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheidegger#define SETUP_SRC_REG(optype, i, x)		\
304b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheideggerdo {						\
305b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheidegger   COPY_4V(src[optype][i], x); 			\
306b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheidegger} while (0)
3077f752fed993e5e9423abac200dd59141edbada56Dave Airlie
308919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul
309919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul
310919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul/**
311919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * Execute the given fragment shader.
312919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * NOTE: we do everything in single-precision floating point
313919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param ctx - rendering context
314919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param shader - the shader to execute
315919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param machine - virtual machine state
316919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param span - the SWspan we're operating on
317919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param column - which pixel [i] we're operating on in the span
318919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul */
319919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paulstatic void
320919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paulexecute_shader(GLcontext *ctx, const struct ati_fragment_shader *shader,
321cdb27e8242215271364602995d85607cfc06d441Brian Paul	       struct atifs_machine *machine, const SWspan *span,
322919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul               GLuint column)
3237f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
3247f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLuint pc;
3257f752fed993e5e9423abac200dd59141edbada56Dave Airlie   struct atifs_instruction *inst;
326f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   struct atifs_setupinst *texinst;
3277f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint optype;
3287e85b0a025a82c3ffed060a757a3b4adae03d269Brian   GLuint i;
3297e85b0a025a82c3ffed060a757a3b4adae03d269Brian   GLint j, pass;
3307f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint dstreg;
3317f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat src[2][3][4];
3327f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
3337f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
3347f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat dst[2][4], *dstp;
3357f752fed993e5e9423abac200dd59141edbada56Dave Airlie
336f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   for (pass = 0; pass < shader->NumPasses; pass++) {
337f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      if (pass > 0)
338f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 finish_pass(machine);
339f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
340f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 texinst = &shader->SetupInst[pass][j];
341f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
342f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	    handle_pass_op(machine, texinst, span, column, j);
343f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
344f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	    handle_sample_op(ctx, machine, texinst, span, column, j);
345f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      }
3467f752fed993e5e9423abac200dd59141edbada56Dave Airlie
347f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
348f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 inst = &shader->Instructions[pass][pc];
3497f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3507f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 /* setup the source registers for color and alpha ops */
3517f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (optype = 0; optype < 2; optype++) {
3527e85b0a025a82c3ffed060a757a3b4adae03d269Brian 	    for (i = 0; i < inst->ArgCount[optype]; i++) {
3537f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       GLint index = inst->SrcReg[optype][i].Index;
3547f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3557f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
3567f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i,
3577f752fed993e5e9423abac200dd59141edbada56Dave Airlie				machine->Registers[index - GL_REG_0_ATI]);
358f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
35963d683091fe3a9600b65ae7ef3b554168b805406Brian Paul		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
360f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		     SETUP_SRC_REG(optype, i,
3617f752fed993e5e9423abac200dd59141edbada56Dave Airlie				shader->Constants[index - GL_CON_0_ATI]);
362f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		  } else {
363f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		     SETUP_SRC_REG(optype, i,
36463d683091fe3a9600b65ae7ef3b554168b805406Brian Paul				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
365f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		  }
366f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	       }
3677f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_ONE)
3687f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i, ones);
3697f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_ZERO)
3707f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i, zeros);
3717f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_PRIMARY_COLOR_EXT)
3727f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i,
3737f752fed993e5e9423abac200dd59141edbada56Dave Airlie				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
3747f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
3757f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i,
3767f752fed993e5e9423abac200dd59141edbada56Dave Airlie				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
3777f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3787f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
3797f752fed993e5e9423abac200dd59141edbada56Dave Airlie			     src[optype][i]);
3807f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
3817f752fed993e5e9423abac200dd59141edbada56Dave Airlie			     src[optype][i]);
3827f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    }
3837f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
3847f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3857f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 /* Execute the operations - color then alpha */
3867f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (optype = 0; optype < 2; optype++) {
3877f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    if (inst->Opcode[optype]) {
3887f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       switch (inst->Opcode[optype]) {
3897f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_ADD_ATI:
3907f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
3917f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
3927f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
3937f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] + src[optype][1][i];
3947f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
3957f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
3967f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
3977f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
3987f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_SUB_ATI:
3997f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4007f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4017f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4027f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] - src[optype][1][i];
4037f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4047f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4057f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
4067f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4077f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_MUL_ATI:
4087f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4097f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4107f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4117f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] * src[optype][1][i];
4127f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4137f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4147f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
4157f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4167f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_MAD_ATI:
4177f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4187f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4197f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4207f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] * src[optype][1][i] +
4217f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][2][i];
4227f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4237f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4247f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4257f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][0][3] * src[optype][1][3] +
4267f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][2][3];
4277f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4287f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_LERP_ATI:
4297f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4307f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4317f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4327f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] * src[optype][1][i] + (1 -
4337f752fed993e5e9423abac200dd59141edbada56Dave Airlie								    src
4347f752fed993e5e9423abac200dd59141edbada56Dave Airlie								    [optype]
4357f752fed993e5e9423abac200dd59141edbada56Dave Airlie								    [0][i]) *
4367f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][2][i];
4377f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4387f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4397f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4407f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][0][3] * src[optype][1][3] + (1 -
4417f752fed993e5e9423abac200dd59141edbada56Dave Airlie								 src[optype]
4427f752fed993e5e9423abac200dd59141edbada56Dave Airlie								 [0][3]) *
4437f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][2][3];
4447f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4457f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4467f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_MOV_ATI:
4477f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4487f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4497f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] = src[optype][0][i];
4507f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4517f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4527f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3];
4537f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4547f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_CND_ATI:
4557f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype) {
4567f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4577f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4587f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   (src[optype][2][i] >
4597f752fed993e5e9423abac200dd59141edbada56Dave Airlie			    0.5) ? src[optype][0][i] : src[optype][1][i];
4607f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4617f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4627f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else {
4637f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4647f752fed993e5e9423abac200dd59141edbada56Dave Airlie			(src[optype][2][3] >
4657f752fed993e5e9423abac200dd59141edbada56Dave Airlie			 0.5) ? src[optype][0][3] : src[optype][1][3];
4667f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4677f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4687f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4697f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_CND0_ATI:
4707f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4717f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4727f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4737f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   (src[optype][2][i] >=
4747f752fed993e5e9423abac200dd59141edbada56Dave Airlie			    0) ? src[optype][0][i] : src[optype][1][i];
4757f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4767f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else {
4777f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4787f752fed993e5e9423abac200dd59141edbada56Dave Airlie			(src[optype][2][3] >=
4797f752fed993e5e9423abac200dd59141edbada56Dave Airlie			 0) ? src[optype][0][3] : src[optype][1][3];
4807f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4817f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4827f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_DOT2_ADD_ATI:
4837f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  {
4847f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     GLfloat result;
4857f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4867f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     /* DOT 2 always uses the source from the color op */
487b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		     /* could save recalculation of dot products for alpha inst */
4887f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     result = src[0][0][0] * src[0][1][0] +
4897f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][1] * src[0][1][1] + src[0][2][2];
4907f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     if (!optype) {
4917f752fed993e5e9423abac200dd59141edbada56Dave Airlie			for (i = 0; i < 3; i++) {
4927f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   dst[optype][i] = result;
4937f752fed993e5e9423abac200dd59141edbada56Dave Airlie			}
4947f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4957f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     else
4967f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][3] = result;
4977f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4987f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4997f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_DOT3_ATI:
5007f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  {
5017f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     GLfloat result;
5027f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5037f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     /* DOT 3 always uses the source from the color op */
5047f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     result = src[0][0][0] * src[0][1][0] +
5057f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][1] * src[0][1][1] +
5067f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][2] * src[0][1][2];
5077f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5087f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     if (!optype) {
5097f752fed993e5e9423abac200dd59141edbada56Dave Airlie			for (i = 0; i < 3; i++) {
5107f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   dst[optype][i] = result;
5117f752fed993e5e9423abac200dd59141edbada56Dave Airlie			}
5127f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
5137f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     else
5147f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][3] = result;
5157f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
5167f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
5177f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_DOT4_ATI:
5187f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  {
5197f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     GLfloat result;
5207f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5217f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     /* DOT 4 always uses the source from the color op */
522b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		     result = src[0][0][0] * src[0][1][0] +
5237f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][1] * src[0][1][1] +
5247f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][2] * src[0][1][2] +
5257f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][3] * src[0][1][3];
5267f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     if (!optype) {
5277f752fed993e5e9423abac200dd59141edbada56Dave Airlie			for (i = 0; i < 3; i++) {
5287f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   dst[optype][i] = result;
5297f752fed993e5e9423abac200dd59141edbada56Dave Airlie			}
5307f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
5317f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     else
5327f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][3] = result;
5337f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
5347f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
5357f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5367f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       }
5377f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    }
5387f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
5397f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5407f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 /* write out the destination registers */
5417f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (optype = 0; optype < 2; optype++) {
5427f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    if (inst->Opcode[optype]) {
5437f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       dstreg = inst->DstReg[optype].Index;
5447f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
5457f752fed993e5e9423abac200dd59141edbada56Dave Airlie
546b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
547b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
548b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
5497f752fed993e5e9423abac200dd59141edbada56Dave Airlie			      inst->DstReg[optype].dstMask, dst[optype],
5507f752fed993e5e9423abac200dd59141edbada56Dave Airlie			      dstp);
551b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger	       else
552b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
5537f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    }
5547f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
5557f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
5567f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
5577f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
5587f752fed993e5e9423abac200dd59141edbada56Dave Airlie
559c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul
560c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul/**
561c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul * Init fragment shader virtual machine state.
562c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul */
5637f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
5647f752fed993e5e9423abac200dd59141edbada56Dave Airlieinit_machine(GLcontext * ctx, struct atifs_machine *machine,
5657f752fed993e5e9423abac200dd59141edbada56Dave Airlie	     const struct ati_fragment_shader *shader,
566cdb27e8242215271364602995d85607cfc06d441Brian Paul	     const SWspan *span, GLuint col)
5677f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
568c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   GLfloat (*inputs)[4] = machine->Inputs;
5697f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i, j;
5707f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5717f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = 0; i < 6; i++) {
5727f752fed993e5e9423abac200dd59141edbada56Dave Airlie      for (j = 0; j < 4; j++)
573c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul	 machine->Registers[i][j] = 0.0;
5747f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
5757f752fed993e5e9423abac200dd59141edbada56Dave Airlie
576c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul   COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->color.sz4.rgba[col]);
577c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul   COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->color.sz4.spec[col]);
5787f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
5797f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5807f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5817f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5827f752fed993e5e9423abac200dd59141edbada56Dave Airlie/**
583c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul * Execute the current ATI shader program, operating on the given span.
5847f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
5857f752fed993e5e9423abac200dd59141edbada56Dave Airlievoid
586cdb27e8242215271364602995d85607cfc06d441Brian Paul_swrast_exec_fragment_shader(GLcontext * ctx, SWspan *span)
5877f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
5887f752fed993e5e9423abac200dd59141edbada56Dave Airlie   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
589c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   struct atifs_machine machine;
5907f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLuint i;
5917f752fed993e5e9423abac200dd59141edbada56Dave Airlie
592c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul   /* incoming colors should be floats */
593c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul   ASSERT(span->array->ChanType == GL_FLOAT);
594c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul
5957f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI;
5967f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5977f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = 0; i < span->end; i++) {
5987f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (span->array->mask[i]) {
599c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul	 init_machine(ctx, &machine, shader, span, i);
600919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul
601919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul	 execute_shader(ctx, shader, &machine, span, i);
6027f752fed993e5e9423abac200dd59141edbada56Dave Airlie
603c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul         /* store result color */
6047f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 {
605c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul	    const GLfloat *colOut = machine.Registers[0];
606c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul            /*fprintf(stderr,"outputs %f %f %f %f\n",
607c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul              colOut[0], colOut[1], colOut[2], colOut[3]); */
608c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul            COPY_4V(span->array->color.sz4.rgba[i], colOut);
6097f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
6107f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
6117f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
6127f752fed993e5e9423abac200dd59141edbada56Dave Airlie
6137f752fed993e5e9423abac200dd59141edbada56Dave Airlie   ctx->_CurrentProgram = 0;
6147f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
615