s_atifragshader.c revision b30898f4ab533085d97a33638ad0a1cf9ddb1d67
17f752fed993e5e9423abac200dd59141edbada56Dave Airlie/*
27f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Copyright (C) 2004  David Airlie   All Rights Reserved.
37f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
47f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Permission is hereby granted, free of charge, to any person obtaining a
57f752fed993e5e9423abac200dd59141edbada56Dave Airlie * copy of this software and associated documentation files (the "Software"),
67f752fed993e5e9423abac200dd59141edbada56Dave Airlie * to deal in the Software without restriction, including without limitation
77f752fed993e5e9423abac200dd59141edbada56Dave Airlie * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87f752fed993e5e9423abac200dd59141edbada56Dave Airlie * and/or sell copies of the Software, and to permit persons to whom the
97f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Software is furnished to do so, subject to the following conditions:
107f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
117f752fed993e5e9423abac200dd59141edbada56Dave Airlie * The above copyright notice and this permission notice shall be included
127f752fed993e5e9423abac200dd59141edbada56Dave Airlie * in all copies or substantial portions of the Software.
137f752fed993e5e9423abac200dd59141edbada56Dave Airlie *
147f752fed993e5e9423abac200dd59141edbada56Dave Airlie * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
157f752fed993e5e9423abac200dd59141edbada56Dave Airlie * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
167f752fed993e5e9423abac200dd59141edbada56Dave Airlie * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
177f752fed993e5e9423abac200dd59141edbada56Dave Airlie * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
187f752fed993e5e9423abac200dd59141edbada56Dave Airlie * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
197f752fed993e5e9423abac200dd59141edbada56Dave Airlie * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
207f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
217f752fed993e5e9423abac200dd59141edbada56Dave Airlie
22bbd287103dad776d8a45c87c4e51fbc26d9b80d5Brian Paul#include "main/glheader.h"
23bbd287103dad776d8a45c87c4e51fbc26d9b80d5Brian Paul#include "main/colormac.h"
24bbd287103dad776d8a45c87c4e51fbc26d9b80d5Brian Paul#include "main/context.h"
25bbd287103dad776d8a45c87c4e51fbc26d9b80d5Brian Paul#include "main/macros.h"
26c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian#include "shader/atifragshader.h"
27c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian#include "swrast/s_atifragshader.h"
2855187ea63e980b32c7a701855571332f4357d634Brian Paul
297f752fed993e5e9423abac200dd59141edbada56Dave Airlie
307f752fed993e5e9423abac200dd59141edbada56Dave Airlie/**
31c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul * State for executing ATI fragment shader.
32c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul */
33c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paulstruct atifs_machine
34c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul{
35c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   GLfloat Registers[6][4];         /** six temporary registers */
36c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   GLfloat PrevPassRegisters[6][4];
37c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   GLfloat Inputs[2][4];   /** Primary, secondary input colors */
38c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul};
39c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul
40c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul
41c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul
42c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul/**
437f752fed993e5e9423abac200dd59141edbada56Dave Airlie * Fetch a texel.
447f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
457f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
467f752fed993e5e9423abac200dd59141edbada56Dave Airliefetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
477f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    GLuint unit, GLfloat color[4])
487f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
497f752fed993e5e9423abac200dd59141edbada56Dave Airlie   SWcontext *swrast = SWRAST_CONTEXT(ctx);
507f752fed993e5e9423abac200dd59141edbada56Dave Airlie
517f752fed993e5e9423abac200dd59141edbada56Dave Airlie   /* XXX use a float-valued TextureSample routine here!!! */
52aa8abf8081023c00469b6c88760ed0291033eb6eBrian Paul   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
53aa8abf8081023c00469b6c88760ed0291033eb6eBrian Paul                               1, (const GLfloat(*)[4]) texcoord,
54de2afd8688ceb45013d15be7c6e0995199b80e5aBrian Paul                               &lambda, (GLfloat (*)[4]) color);
557f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
567f752fed993e5e9423abac200dd59141edbada56Dave Airlie
577f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
583c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheideggerapply_swizzle(GLfloat values[4], GLuint swizzle)
597f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
607f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat s, t, r, q;
617f752fed993e5e9423abac200dd59141edbada56Dave Airlie
623c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   s = values[0];
633c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   t = values[1];
643c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   r = values[2];
653c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   q = values[3];
667f752fed993e5e9423abac200dd59141edbada56Dave Airlie
677f752fed993e5e9423abac200dd59141edbada56Dave Airlie   switch (swizzle) {
687f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STR_ATI:
693c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s;
703c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t;
713c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = r;
727f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
737f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STQ_ATI:
743c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s;
753c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t;
763c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = q;
777f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
787f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STR_DR_ATI:
793c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s / r;
803c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t / r;
813c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[2] = 1 / r;
827f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
837f752fed993e5e9423abac200dd59141edbada56Dave Airlie   case GL_SWIZZLE_STQ_DQ_ATI:
843c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
85880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul      if (q == 0.0F)
86880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul         q = 0.000000001F;
873c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[0] = s / q;
883c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      values[1] = t / q;
89b30898f4ab533085d97a33638ad0a1cf9ddb1d67Karl Schultz      values[2] = 1.0F / q;
907f752fed993e5e9423abac200dd59141edbada56Dave Airlie      break;
917f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
923c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   values[3] = 0.0;
937f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
947f752fed993e5e9423abac200dd59141edbada56Dave Airlie
957f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
967f752fed993e5e9423abac200dd59141edbada56Dave Airlieapply_src_rep(GLint optype, GLuint rep, GLfloat * val)
977f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
987f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
997f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint start, end;
1007f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (!rep)
1017f752fed993e5e9423abac200dd59141edbada56Dave Airlie      return;
1027f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1037f752fed993e5e9423abac200dd59141edbada56Dave Airlie   start = optype ? 3 : 0;
104b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger   end = 4;
1057f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1067f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = start; i < end; i++) {
1077f752fed993e5e9423abac200dd59141edbada56Dave Airlie      switch (rep) {
1087f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_RED:
1097f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[0];
1107f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1117f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_GREEN:
1127f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[1];
1137f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1147f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_BLUE:
1157f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[2];
1167f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1177f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_ALPHA:
1187f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = val[3];
1197f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1207f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1217f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
1227f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
1237f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1247f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
1257f752fed993e5e9423abac200dd59141edbada56Dave Airlieapply_src_mod(GLint optype, GLuint mod, GLfloat * val)
1267f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
1277f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
1287f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint start, end;
1297f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1307f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (!mod)
1317f752fed993e5e9423abac200dd59141edbada56Dave Airlie      return;
1327f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1337f752fed993e5e9423abac200dd59141edbada56Dave Airlie   start = optype ? 3 : 0;
134b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger   end = 4;
1357f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1367f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = start; i < end; i++) {
1377f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_COMP_BIT_ATI)
1387f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 1 - val[i];
1397f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1407f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_BIAS_BIT_ATI)
141b30898f4ab533085d97a33638ad0a1cf9ddb1d67Karl Schultz	 val[i] = val[i] - 0.5F;
1427f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1437f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_2X_BIT_ATI)
1447f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 2 * val[i];
1457f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1467f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mod & GL_NEGATE_BIT_ATI)
1477f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = -val[i];
1487f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
1497f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
1507f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1517f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
1527f752fed993e5e9423abac200dd59141edbada56Dave Airlieapply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
1537f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
1547f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
1557f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint has_sat = mod & GL_SATURATE_BIT_ATI;
1567f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint start, end;
1577f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1587f752fed993e5e9423abac200dd59141edbada56Dave Airlie   mod &= ~GL_SATURATE_BIT_ATI;
1597f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1607f752fed993e5e9423abac200dd59141edbada56Dave Airlie   start = optype ? 3 : 0;
1617f752fed993e5e9423abac200dd59141edbada56Dave Airlie   end = optype ? 4 : 3;
1627f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1637f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = start; i < end; i++) {
1647f752fed993e5e9423abac200dd59141edbada56Dave Airlie      switch (mod) {
1657f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_2X_BIT_ATI:
1667f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 2 * val[i];
1677f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1687f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_4X_BIT_ATI:
1697f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 4 * val[i];
1707f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1717f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_8X_BIT_ATI:
1727f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 val[i] = 8 * val[i];
1737f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1747f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_HALF_BIT_ATI:
175880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	 val[i] = val[i] * 0.5F;
1767f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1777f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_QUARTER_BIT_ATI:
178880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	 val[i] = val[i] * 0.25F;
1797f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1807f752fed993e5e9423abac200dd59141edbada56Dave Airlie      case GL_EIGHTH_BIT_ATI:
181880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	 val[i] = val[i] * 0.125F;
1827f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 break;
1837f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1847f752fed993e5e9423abac200dd59141edbada56Dave Airlie
1857f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (has_sat) {
186880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	 if (val[i] < 0.0F)
187880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	    val[i] = 0.0F;
188880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	 else if (val[i] > 1.0F)
189880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	    val[i] = 1.0F;
1907f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1917f752fed993e5e9423abac200dd59141edbada56Dave Airlie      else {
192880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	 if (val[i] < -8.0F)
193880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	    val[i] = -8.0F;
194880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	 else if (val[i] > 8.0F)
195880411c72aee7c0ec81366bdf6ab8cf25bebb9d5Brian Paul	    val[i] = 8.0F;
1967f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
1977f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
1987f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
1997f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2007f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2017f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
2027f752fed993e5e9423abac200dd59141edbada56Dave Airliewrite_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
2037f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       GLfloat * dst)
2047f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
2057f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
2067f752fed993e5e9423abac200dd59141edbada56Dave Airlie   apply_dst_mod(optype, mod, src);
2077f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2087f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
2097f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (mask) {
2107f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (mask & GL_RED_BIT_ATI)
2117f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[0] = src[0];
2127f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2137f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (mask & GL_GREEN_BIT_ATI)
2147f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[1] = src[1];
2157f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2167f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 if (mask & GL_BLUE_BIT_ATI)
2177f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[2] = src[2];
2187f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
2197f752fed993e5e9423abac200dd59141edbada56Dave Airlie      else {
2207f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (i = 0; i < 3; i++)
2217f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    dst[i] = src[i];
2227f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
2237f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2247f752fed993e5e9423abac200dd59141edbada56Dave Airlie   else
2257f752fed993e5e9423abac200dd59141edbada56Dave Airlie      dst[3] = src[3];
2267f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2277f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2286c58ddae17f63cd14b10d9022baee2ca6346fad2Brian Paulstatic void
2297f752fed993e5e9423abac200dd59141edbada56Dave Airliefinish_pass(struct atifs_machine *machine)
2307f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
2317f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i;
2327f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2337f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = 0; i < 6; i++) {
2347f752fed993e5e9423abac200dd59141edbada56Dave Airlie      COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
2357f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2367f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2377f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2387f752fed993e5e9423abac200dd59141edbada56Dave Airliestruct ati_fs_opcode_st ati_fs_opcodes[] = {
2397f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_ADD_ATI, 2},
2407f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_SUB_ATI, 2},
2417f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_MUL_ATI, 2},
2427f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_MAD_ATI, 3},
2437f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_LERP_ATI, 3},
2447f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_MOV_ATI, 1},
2457f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_CND_ATI, 3},
2467f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_CND0_ATI, 3},
2477f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_DOT2_ADD_ATI, 3},
2487f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_DOT3_ATI, 2},
2497f752fed993e5e9423abac200dd59141edbada56Dave Airlie   {GL_DOT4_ATI, 2}
2507f752fed993e5e9423abac200dd59141edbada56Dave Airlie};
2517f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2527f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2537f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2547f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
255f519a770d074dac9e188e3b450c828510506c46dRoland Scheideggerhandle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
256cdb27e8242215271364602995d85607cfc06d441Brian Paul	       const SWspan *span, GLuint column, GLuint idx)
2577f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
258f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLuint swizzle = texinst->swizzle;
259f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLuint pass_tex = texinst->src;
260f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger
2617f752fed993e5e9423abac200dd59141edbada56Dave Airlie   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
2627f752fed993e5e9423abac200dd59141edbada56Dave Airlie      pass_tex -= GL_TEXTURE0_ARB;
2637f752fed993e5e9423abac200dd59141edbada56Dave Airlie      COPY_4V(machine->Registers[idx],
264f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian	      span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
2657f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2663c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
2677f752fed993e5e9423abac200dd59141edbada56Dave Airlie      pass_tex -= GL_REG_0_ATI;
2687f752fed993e5e9423abac200dd59141edbada56Dave Airlie      COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
2697f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2703c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   apply_swizzle(machine->Registers[idx], swizzle);
2717f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2727f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2737f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2747f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
2757f752fed993e5e9423abac200dd59141edbada56Dave Airliehandle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
276cdb27e8242215271364602995d85607cfc06d441Brian Paul		 struct atifs_setupinst *texinst, const SWspan *span,
277f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		 GLuint column, GLuint idx)
2787f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
2793c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger/* sample from unit idx using texinst->src as coords */
280f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   GLuint swizzle = texinst->swizzle;
2813c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   GLuint coord_source = texinst->src;
2821e444c9960b18bcee5216a49db997b1c5ec14ecaVinson Lee   GLfloat tex_coords[4] = { 0 };
2837f752fed993e5e9423abac200dd59141edbada56Dave Airlie
2843c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
2853c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      coord_source -= GL_TEXTURE0_ARB;
286f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian      COPY_4V(tex_coords,
287f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian              span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
2887f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2893c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
2903c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      coord_source -= GL_REG_0_ATI;
2913c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger      COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
2927f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
2933c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   apply_swizzle(tex_coords, swizzle);
2943c450b2917ee2eef06197b9ef546f19d06b7d76aRoland Scheidegger   fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
2957f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
2967f752fed993e5e9423abac200dd59141edbada56Dave Airlie
297b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheidegger#define SETUP_SRC_REG(optype, i, x)		\
298b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheideggerdo {						\
299b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheidegger   COPY_4V(src[optype][i], x); 			\
300b683b0df45de5f9fc83759b09d630fff7064e394Roland Scheidegger} while (0)
3017f752fed993e5e9423abac200dd59141edbada56Dave Airlie
302919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul
303919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul
304919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul/**
305919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * Execute the given fragment shader.
306919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * NOTE: we do everything in single-precision floating point
307919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param ctx - rendering context
308919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param shader - the shader to execute
309919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param machine - virtual machine state
310919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param span - the SWspan we're operating on
311919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul * \param column - which pixel [i] we're operating on in the span
312919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul */
313919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paulstatic void
314919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paulexecute_shader(GLcontext *ctx, const struct ati_fragment_shader *shader,
315cdb27e8242215271364602995d85607cfc06d441Brian Paul	       struct atifs_machine *machine, const SWspan *span,
316919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul               GLuint column)
3177f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
3187f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLuint pc;
3197f752fed993e5e9423abac200dd59141edbada56Dave Airlie   struct atifs_instruction *inst;
320f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   struct atifs_setupinst *texinst;
3217f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint optype;
3227e85b0a025a82c3ffed060a757a3b4adae03d269Brian   GLuint i;
3237e85b0a025a82c3ffed060a757a3b4adae03d269Brian   GLint j, pass;
3247f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint dstreg;
3257f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat src[2][3][4];
3267f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
3277f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
3287f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLfloat dst[2][4], *dstp;
3297f752fed993e5e9423abac200dd59141edbada56Dave Airlie
330f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger   for (pass = 0; pass < shader->NumPasses; pass++) {
331f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      if (pass > 0)
332f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 finish_pass(machine);
333f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
334f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 texinst = &shader->SetupInst[pass][j];
335f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
336f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	    handle_pass_op(machine, texinst, span, column, j);
337f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
338f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	    handle_sample_op(ctx, machine, texinst, span, column, j);
339f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      }
3407f752fed993e5e9423abac200dd59141edbada56Dave Airlie
341f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
342f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	 inst = &shader->Instructions[pass][pc];
3437f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3447f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 /* setup the source registers for color and alpha ops */
3457f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (optype = 0; optype < 2; optype++) {
3467e85b0a025a82c3ffed060a757a3b4adae03d269Brian 	    for (i = 0; i < inst->ArgCount[optype]; i++) {
3477f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       GLint index = inst->SrcReg[optype][i].Index;
3487f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3497f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
3507f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i,
3517f752fed993e5e9423abac200dd59141edbada56Dave Airlie				machine->Registers[index - GL_REG_0_ATI]);
352f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
35363d683091fe3a9600b65ae7ef3b554168b805406Brian Paul		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
354f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		     SETUP_SRC_REG(optype, i,
3557f752fed993e5e9423abac200dd59141edbada56Dave Airlie				shader->Constants[index - GL_CON_0_ATI]);
356f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		  } else {
357f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		     SETUP_SRC_REG(optype, i,
35863d683091fe3a9600b65ae7ef3b554168b805406Brian Paul				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
359f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger		  }
360f519a770d074dac9e188e3b450c828510506c46dRoland Scheidegger	       }
3617f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_ONE)
3627f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i, ones);
3637f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_ZERO)
3647f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i, zeros);
3657f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_PRIMARY_COLOR_EXT)
3667f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i,
3677f752fed993e5e9423abac200dd59141edbada56Dave Airlie				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
3687f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
3697f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  SETUP_SRC_REG(optype, i,
3707f752fed993e5e9423abac200dd59141edbada56Dave Airlie				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
3717f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3727f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
3737f752fed993e5e9423abac200dd59141edbada56Dave Airlie			     src[optype][i]);
3747f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
3757f752fed993e5e9423abac200dd59141edbada56Dave Airlie			     src[optype][i]);
3767f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    }
3777f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
3787f752fed993e5e9423abac200dd59141edbada56Dave Airlie
3797f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 /* Execute the operations - color then alpha */
3807f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (optype = 0; optype < 2; optype++) {
3817f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    if (inst->Opcode[optype]) {
3827f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       switch (inst->Opcode[optype]) {
3837f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_ADD_ATI:
3847f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
3857f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
3867f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
3877f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] + src[optype][1][i];
3887f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
3897f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
3907f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
3917f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
3927f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_SUB_ATI:
3937f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
3947f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
3957f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
3967f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] - src[optype][1][i];
3977f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
3987f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
3997f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
4007f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4017f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_MUL_ATI:
4027f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4037f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4047f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4057f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] * src[optype][1][i];
4067f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4077f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4087f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
4097f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4107f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_MAD_ATI:
4117f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4127f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4137f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4147f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] * src[optype][1][i] +
4157f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][2][i];
4167f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4177f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4187f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4197f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][0][3] * src[optype][1][3] +
4207f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][2][3];
4217f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4227f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_LERP_ATI:
4237f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4247f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4257f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4267f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][0][i] * src[optype][1][i] + (1 -
4277f752fed993e5e9423abac200dd59141edbada56Dave Airlie								    src
4287f752fed993e5e9423abac200dd59141edbada56Dave Airlie								    [optype]
4297f752fed993e5e9423abac200dd59141edbada56Dave Airlie								    [0][i]) *
4307f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   src[optype][2][i];
4317f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4327f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4337f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4347f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][0][3] * src[optype][1][3] + (1 -
4357f752fed993e5e9423abac200dd59141edbada56Dave Airlie								 src[optype]
4367f752fed993e5e9423abac200dd59141edbada56Dave Airlie								 [0][3]) *
4377f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[optype][2][3];
4387f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4397f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4407f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_MOV_ATI:
4417f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4427f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4437f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] = src[optype][0][i];
4447f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4457f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else
4467f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] = src[optype][0][3];
4477f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4487f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_CND_ATI:
4497f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype) {
4507f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4517f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4527f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   (src[optype][2][i] >
4537f752fed993e5e9423abac200dd59141edbada56Dave Airlie			    0.5) ? src[optype][0][i] : src[optype][1][i];
4547f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4557f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4567f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else {
4577f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4587f752fed993e5e9423abac200dd59141edbada56Dave Airlie			(src[optype][2][3] >
4597f752fed993e5e9423abac200dd59141edbada56Dave Airlie			 0.5) ? src[optype][0][3] : src[optype][1][3];
4607f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4617f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4627f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4637f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_CND0_ATI:
4647f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  if (!optype)
4657f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     for (i = 0; i < 3; i++) {
4667f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][i] =
4677f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   (src[optype][2][i] >=
4687f752fed993e5e9423abac200dd59141edbada56Dave Airlie			    0) ? src[optype][0][i] : src[optype][1][i];
4697f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4707f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  else {
4717f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     dst[optype][3] =
4727f752fed993e5e9423abac200dd59141edbada56Dave Airlie			(src[optype][2][3] >=
4737f752fed993e5e9423abac200dd59141edbada56Dave Airlie			 0) ? src[optype][0][3] : src[optype][1][3];
4747f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4757f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4767f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_DOT2_ADD_ATI:
4777f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  {
4787f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     GLfloat result;
4797f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4807f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     /* DOT 2 always uses the source from the color op */
481b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		     /* could save recalculation of dot products for alpha inst */
4827f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     result = src[0][0][0] * src[0][1][0] +
4837f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][1] * src[0][1][1] + src[0][2][2];
4847f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     if (!optype) {
4857f752fed993e5e9423abac200dd59141edbada56Dave Airlie			for (i = 0; i < 3; i++) {
4867f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   dst[optype][i] = result;
4877f752fed993e5e9423abac200dd59141edbada56Dave Airlie			}
4887f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
4897f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     else
4907f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][3] = result;
4917f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
4927f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
4937f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_DOT3_ATI:
4947f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  {
4957f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     GLfloat result;
4967f752fed993e5e9423abac200dd59141edbada56Dave Airlie
4977f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     /* DOT 3 always uses the source from the color op */
4987f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     result = src[0][0][0] * src[0][1][0] +
4997f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][1] * src[0][1][1] +
5007f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][2] * src[0][1][2];
5017f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5027f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     if (!optype) {
5037f752fed993e5e9423abac200dd59141edbada56Dave Airlie			for (i = 0; i < 3; i++) {
5047f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   dst[optype][i] = result;
5057f752fed993e5e9423abac200dd59141edbada56Dave Airlie			}
5067f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
5077f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     else
5087f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][3] = result;
5097f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
5107f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
5117f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       case GL_DOT4_ATI:
5127f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  {
5137f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     GLfloat result;
5147f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5157f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     /* DOT 4 always uses the source from the color op */
516b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		     result = src[0][0][0] * src[0][1][0] +
5177f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][1] * src[0][1][1] +
5187f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][2] * src[0][1][2] +
5197f752fed993e5e9423abac200dd59141edbada56Dave Airlie			src[0][0][3] * src[0][1][3];
5207f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     if (!optype) {
5217f752fed993e5e9423abac200dd59141edbada56Dave Airlie			for (i = 0; i < 3; i++) {
5227f752fed993e5e9423abac200dd59141edbada56Dave Airlie			   dst[optype][i] = result;
5237f752fed993e5e9423abac200dd59141edbada56Dave Airlie			}
5247f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     }
5257f752fed993e5e9423abac200dd59141edbada56Dave Airlie		     else
5267f752fed993e5e9423abac200dd59141edbada56Dave Airlie			dst[optype][3] = result;
5277f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  }
5287f752fed993e5e9423abac200dd59141edbada56Dave Airlie		  break;
5297f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5307f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       }
5317f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    }
5327f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
5337f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5347f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 /* write out the destination registers */
5357f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 for (optype = 0; optype < 2; optype++) {
5367f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    if (inst->Opcode[optype]) {
5377f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       dstreg = inst->DstReg[optype].Index;
5387f752fed993e5e9423abac200dd59141edbada56Dave Airlie	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
5397f752fed993e5e9423abac200dd59141edbada56Dave Airlie
540b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
541b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
542b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
5437f752fed993e5e9423abac200dd59141edbada56Dave Airlie			      inst->DstReg[optype].dstMask, dst[optype],
5447f752fed993e5e9423abac200dd59141edbada56Dave Airlie			      dstp);
545b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger	       else
546b346213960057a25edf404e3323fc9da4dc54c0eRoland Scheidegger		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
5477f752fed993e5e9423abac200dd59141edbada56Dave Airlie	    }
5487f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
5497f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
5507f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
5517f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
5527f752fed993e5e9423abac200dd59141edbada56Dave Airlie
553c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul
554c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul/**
555c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul * Init fragment shader virtual machine state.
556c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul */
5577f752fed993e5e9423abac200dd59141edbada56Dave Airliestatic void
5587f752fed993e5e9423abac200dd59141edbada56Dave Airlieinit_machine(GLcontext * ctx, struct atifs_machine *machine,
5597f752fed993e5e9423abac200dd59141edbada56Dave Airlie	     const struct ati_fragment_shader *shader,
560cdb27e8242215271364602995d85607cfc06d441Brian Paul	     const SWspan *span, GLuint col)
5617f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
562c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   GLfloat (*inputs)[4] = machine->Inputs;
5637f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLint i, j;
5647f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5657f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = 0; i < 6; i++) {
5667f752fed993e5e9423abac200dd59141edbada56Dave Airlie      for (j = 0; j < 4; j++)
567c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul	 machine->Registers[i][j] = 0.0;
5687f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
5697f752fed993e5e9423abac200dd59141edbada56Dave Airlie
570f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian   COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
571f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian   COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
5727f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
5737f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5747f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5757f752fed993e5e9423abac200dd59141edbada56Dave Airlie
5767f752fed993e5e9423abac200dd59141edbada56Dave Airlie/**
577c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul * Execute the current ATI shader program, operating on the given span.
5787f752fed993e5e9423abac200dd59141edbada56Dave Airlie */
5797f752fed993e5e9423abac200dd59141edbada56Dave Airlievoid
580cdb27e8242215271364602995d85607cfc06d441Brian Paul_swrast_exec_fragment_shader(GLcontext * ctx, SWspan *span)
5817f752fed993e5e9423abac200dd59141edbada56Dave Airlie{
5827f752fed993e5e9423abac200dd59141edbada56Dave Airlie   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
583c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul   struct atifs_machine machine;
5847f752fed993e5e9423abac200dd59141edbada56Dave Airlie   GLuint i;
5857f752fed993e5e9423abac200dd59141edbada56Dave Airlie
586c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul   /* incoming colors should be floats */
587c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul   ASSERT(span->array->ChanType == GL_FLOAT);
588c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul
5897f752fed993e5e9423abac200dd59141edbada56Dave Airlie   for (i = 0; i < span->end; i++) {
5907f752fed993e5e9423abac200dd59141edbada56Dave Airlie      if (span->array->mask[i]) {
591c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul	 init_machine(ctx, &machine, shader, span, i);
592919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul
593919cd2c3ba39bf4d8d2ffcea0daec7bab8645d34Brian Paul	 execute_shader(ctx, shader, &machine, span, i);
5947f752fed993e5e9423abac200dd59141edbada56Dave Airlie
595c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul         /* store result color */
5967f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 {
597c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul	    const GLfloat *colOut = machine.Registers[0];
598c3caaa3dd45809e672177ab322445fe51d03af25Brian Paul            /*fprintf(stderr,"outputs %f %f %f %f\n",
599c2074645cd23e23ff86ed7f0a71845a3209f0beaBrian Paul              colOut[0], colOut[1], colOut[2], colOut[3]); */
600f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian            COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
6017f752fed993e5e9423abac200dd59141edbada56Dave Airlie	 }
6027f752fed993e5e9423abac200dd59141edbada56Dave Airlie      }
6037f752fed993e5e9423abac200dd59141edbada56Dave Airlie   }
6047f752fed993e5e9423abac200dd59141edbada56Dave Airlie}
605