s_fragprog.c revision 865f88afc0d59d886fb2ad50429e584ecf17fa81
1865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/*
2865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Mesa 3-D graphics library
3865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Version:  6.5.2
4865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
5865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
6865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
7865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Permission is hereby granted, free of charge, to any person obtaining a
8865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * copy of this software and associated documentation files (the "Software"),
9865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * to deal in the Software without restriction, including without limitation
10865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * and/or sell copies of the Software, and to permit persons to whom the
12865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Software is furnished to do so, subject to the following conditions:
13865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
14865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * The above copyright notice and this permission notice shall be included
15865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * in all copies or substantial portions of the Software.
16865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
17865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
24865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
25865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/*
26865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Regarding GL_NV_fragment_program:
27865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
28865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Portions of this software may use or implement intellectual
29865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
30865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * any and all warranties with respect to such intellectual property,
31865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * including any use thereof or modifications thereto.
32865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
33865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
34865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "glheader.h"
35865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "colormac.h"
36865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "context.h"
37865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "prog_instruction.h"
38865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "prog_parameter.h"
39865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "prog_print.h"
40865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "program.h"
41865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
42865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "s_nvfragprog.h"
43865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "s_span.h"
44865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
45865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
46865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/* See comments below for info about this */
47865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#define LAMBDA_ZERO 1
48865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
49865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/* debug predicate */
50865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#define DEBUG_FRAG 0
51865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
52865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
53865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
54865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Virtual machine state used during execution of a fragment programs.
55865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
56865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstruct fp_machine
57865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
58865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat Temporaries[MAX_PROGRAM_TEMPS][4];
59865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat Inputs[FRAG_ATTRIB_MAX][4];
60865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat Outputs[FRAG_RESULT_MAX][4];
61865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint CondCodes[4];  /**< COND_* value for x/y/z/w */
62865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
63865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint CallStack[MAX_PROGRAM_CALL_DEPTH]; /**< For CAL/RET instructions */
64865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint StackDepth; /**< Index/ptr to top of CallStack[] */
65865f88afc0d59d886fb2ad50429e584ecf17fa81Brian};
66865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
67865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
68865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#if FEATURE_MESA_program_debug
69865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic struct fp_machine *CurrentMachine = NULL;
70865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
71865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
72865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * For GL_MESA_program_debug.
73865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Return current value (4*GLfloat) of a fragment program register.
74865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Called via ctx->Driver.GetFragmentProgramRegister().
75865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
76865f88afc0d59d886fb2ad50429e584ecf17fa81Brianvoid
77865f88afc0d59d886fb2ad50429e584ecf17fa81Brian_swrast_get_program_register(GLcontext *ctx, enum register_file file,
78865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                             GLuint index, GLfloat val[4])
79865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
80865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (CurrentMachine) {
81865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      switch (file) {
82865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_INPUT:
83865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(val, CurrentMachine->Inputs[index]);
84865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
85865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_OUTPUT:
86865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(val, CurrentMachine->Outputs[index]);
87865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
88865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_TEMPORARY:
89865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(val, CurrentMachine->Temporaries[index]);
90865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
91865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      default:
92865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         _mesa_problem(NULL,
93865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                       "bad register file in _swrast_get_program_register");
94865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
95865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
96865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
97865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#endif /* FEATURE_MESA_program_debug */
98865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
99865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
100865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
101865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Fetch a texel.
102865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
103865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
104865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
105865f88afc0d59d886fb2ad50429e584ecf17fa81Brian             GLuint unit, GLfloat color[4] )
106865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
107865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLchan rgba[4];
108865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   SWcontext *swrast = SWRAST_CONTEXT(ctx);
109865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
110865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* XXX use a float-valued TextureSample routine here!!! */
111865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
112865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               1, (const GLfloat (*)[4]) texcoord,
113865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               &lambda, &rgba);
114865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[0] = CHAN_TO_FLOAT(rgba[0]);
115865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[1] = CHAN_TO_FLOAT(rgba[1]);
116865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[2] = CHAN_TO_FLOAT(rgba[2]);
117865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[3] = CHAN_TO_FLOAT(rgba[3]);
118865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
119865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
120865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
121865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
122865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Fetch a texel with the given partial derivatives to compute a level
123865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * of detail in the mipmap.
124865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
125865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
126865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
127865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   const GLfloat texdx[4], const GLfloat texdy[4],
128865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   GLuint unit, GLfloat color[4] )
129865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
130865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   SWcontext *swrast = SWRAST_CONTEXT(ctx);
131865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
132865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
133865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLfloat texW = (GLfloat) texImg->WidthScale;
134865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLfloat texH = (GLfloat) texImg->HeightScale;
135865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLchan rgba[4];
136865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
137865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
138865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         texdx[1], texdy[1], /* dt/dx, dt/dy */
139865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         texdx[3], texdy[2], /* dq/dx, dq/dy */
140865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         texW, texH,
141865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         texcoord[0], texcoord[1], texcoord[3],
142865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         1.0F / texcoord[3]);
143865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
144865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
145865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               1, (const GLfloat (*)[4]) texcoord,
146865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               &lambda, &rgba);
147865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[0] = CHAN_TO_FLOAT(rgba[0]);
148865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[1] = CHAN_TO_FLOAT(rgba[1]);
149865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[2] = CHAN_TO_FLOAT(rgba[2]);
150865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[3] = CHAN_TO_FLOAT(rgba[3]);
151865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
152865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
153865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
154865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
155865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Return a pointer to the 4-element float vector specified by the given
156865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * source register.
157865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
158865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic INLINE const GLfloat *
159865f88afc0d59d886fb2ad50429e584ecf17fa81Brianget_register_pointer( GLcontext *ctx,
160865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      const struct prog_src_register *source,
161865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      const struct fp_machine *machine,
162865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      const struct gl_fragment_program *program )
163865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
164865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   switch (source->File) {
165865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_TEMPORARY:
166865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < MAX_PROGRAM_TEMPS);
167865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return machine->Temporaries[source->Index];
168865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_INPUT:
169865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < FRAG_ATTRIB_MAX);
170865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return machine->Inputs[source->Index];
171865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_OUTPUT:
172865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* This is only for PRINT */
173865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < FRAG_RESULT_MAX);
174865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return machine->Outputs[source->Index];
175865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_LOCAL_PARAM:
176865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
177865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return program->Base.LocalParams[source->Index];
178865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_ENV_PARAM:
179865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
180865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return ctx->FragmentProgram.Parameters[source->Index];
181865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_STATE_VAR:
182865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Fallthrough */
183865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_CONSTANT:
184865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Fallthrough */
185865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_UNIFORM:
186865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Fallthrough */
187865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_NAMED_PARAM:
188865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < (GLint) program->Base.Parameters->NumParameters);
189865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return program->Base.Parameters->ParameterValues[source->Index];
190865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   default:
191865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      _mesa_problem(ctx, "Invalid input register file %d in fp "
192865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    "get_register_pointer", source->File);
193865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return NULL;
194865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
195865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
196865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
197865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
198865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
199865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Fetch a 4-element float vector from the given source register.
200865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Apply swizzling and negating as needed.
201865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
202865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
203865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_vector4( GLcontext *ctx,
204865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct prog_src_register *source,
205865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct fp_machine *machine,
206865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct gl_fragment_program *program,
207865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat result[4] )
208865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
209865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLfloat *src = get_register_pointer(ctx, source, machine, program);
210865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(src);
211865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
212865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->Swizzle == MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
213865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                        SWIZZLE_Z, SWIZZLE_W)) {
214865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* no swizzling */
215865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      COPY_4V(result, src);
216865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
217865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   else {
218865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = src[GET_SWZ(source->Swizzle, 0)];
219865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = src[GET_SWZ(source->Swizzle, 1)];
220865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = src[GET_SWZ(source->Swizzle, 2)];
221865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = src[GET_SWZ(source->Swizzle, 3)];
222865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
223865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
224865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateBase) {
225865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
226865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = -result[1];
227865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = -result[2];
228865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = -result[3];
229865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
230865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->Abs) {
231865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = FABSF(result[0]);
232865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = FABSF(result[1]);
233865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = FABSF(result[2]);
234865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = FABSF(result[3]);
235865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
236865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateAbs) {
237865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
238865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = -result[1];
239865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = -result[2];
240865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = -result[3];
241865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
242865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
243865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
244865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
245865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
246865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Fetch the derivative with respect to X for the given register.
247865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \return GL_TRUE if it was easily computed or GL_FALSE if we
248865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * need to execute another instance of the program (ugh)!
249865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
250865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic GLboolean
251865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_vector4_deriv( GLcontext *ctx,
252865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     const struct prog_src_register *source,
253865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     const SWspan *span,
254865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     char xOrY, GLint column, GLfloat result[4] )
255865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
256865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat src[4];
257865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
258865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(xOrY == 'X' || xOrY == 'Y');
259865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
260865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   switch (source->Index) {
261865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_WPOS:
262865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
263865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = 1.0;
264865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = 0.0;
265865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
266865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->dwdx;
267865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
268865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
269865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = 0.0;
270865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = 1.0;
271865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
272865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->dwdy;
273865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
274865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
275865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_COL0:
276865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
277865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->drdx * (1.0F / CHAN_MAXF);
278865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->dgdx * (1.0F / CHAN_MAXF);
279865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dbdx * (1.0F / CHAN_MAXF);
280865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->dadx * (1.0F / CHAN_MAXF);
281865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
282865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
283865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->drdy * (1.0F / CHAN_MAXF);
284865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->dgdy * (1.0F / CHAN_MAXF);
285865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dbdy * (1.0F / CHAN_MAXF);
286865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->dady * (1.0F / CHAN_MAXF);
287865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
288865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
289865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_COL1:
290865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
291865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
292865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
293865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
294865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = 0.0; /* XXX need this */
295865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
296865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
297865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
298865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
299865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
300865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = 0.0; /* XXX need this */
301865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
302865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
303865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_FOGC:
304865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
305865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->dfogdx;
306865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = 0.0;
307865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = 0.0;
308865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = 0.0;
309865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
310865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
311865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->dfogdy;
312865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = 0.0;
313865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = 0.0;
314865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = 0.0;
315865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
316865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
317865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX0:
318865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX1:
319865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX2:
320865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX3:
321865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX4:
322865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX5:
323865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX6:
324865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX7:
325865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
326865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
327865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /* this is a little tricky - I think I've got it right */
328865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         const GLfloat invQ = 1.0f / (span->tex[u][3]
329865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                      + span->texStepX[u][3] * column);
330865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->texStepX[u][0] * invQ;
331865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->texStepX[u][1] * invQ;
332865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->texStepX[u][2] * invQ;
333865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->texStepX[u][3] * invQ;
334865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
335865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
336865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
337865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /* Tricky, as above, but in Y direction */
338865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
339865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->texStepY[u][0] * invQ;
340865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->texStepY[u][1] * invQ;
341865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->texStepY[u][2] * invQ;
342865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->texStepY[u][3] * invQ;
343865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
344865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
345865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   default:
346865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return GL_FALSE;
347865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
348865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
349865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[0] = src[GET_SWZ(source->Swizzle, 0)];
350865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[1] = src[GET_SWZ(source->Swizzle, 1)];
351865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[2] = src[GET_SWZ(source->Swizzle, 2)];
352865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[3] = src[GET_SWZ(source->Swizzle, 3)];
353865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
354865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateBase) {
355865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
356865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = -result[1];
357865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = -result[2];
358865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = -result[3];
359865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
360865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->Abs) {
361865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = FABSF(result[0]);
362865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = FABSF(result[1]);
363865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = FABSF(result[2]);
364865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = FABSF(result[3]);
365865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
366865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateAbs) {
367865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
368865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = -result[1];
369865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = -result[2];
370865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = -result[3];
371865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
372865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   return GL_TRUE;
373865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
374865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
375865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
376865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
377865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * As above, but only return result[0] element.
378865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
379865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
380865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_vector1( GLcontext *ctx,
381865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct prog_src_register *source,
382865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct fp_machine *machine,
383865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct gl_fragment_program *program,
384865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat result[4] )
385865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
386865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLfloat *src = get_register_pointer(ctx, source, machine, program);
387865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(src);
388865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
389865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[0] = src[GET_SWZ(source->Swizzle, 0)];
390865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
391865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateBase) {
392865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
393865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
394865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->Abs) {
395865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = FABSF(result[0]);
396865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
397865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateAbs) {
398865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
399865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
400865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
401865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
402865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
403865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
404865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Test value against zero and return GT, LT, EQ or UN if NaN.
405865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
406865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic INLINE GLuint
407865f88afc0d59d886fb2ad50429e584ecf17fa81Briangenerate_cc( float value )
408865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
409865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (value != value)
410865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return COND_UN;  /* NaN */
411865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (value > 0.0F)
412865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return COND_GT;
413865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (value < 0.0F)
414865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return COND_LT;
415865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   return COND_EQ;
416865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
417865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
418865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
419865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
420865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Test if the ccMaskRule is satisfied by the given condition code.
421865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Used to mask destination writes according to the current condition code.
422865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
423865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic INLINE GLboolean
424865f88afc0d59d886fb2ad50429e584ecf17fa81Briantest_cc(GLuint condCode, GLuint ccMaskRule)
425865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
426865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   switch (ccMaskRule) {
427865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_EQ: return (condCode == COND_EQ);
428865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_NE: return (condCode != COND_EQ);
429865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_LT: return (condCode == COND_LT);
430865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
431865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
432865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_GT: return (condCode == COND_GT);
433865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_TR: return GL_TRUE;
434865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_FL: return GL_FALSE;
435865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   default:      return GL_TRUE;
436865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
437865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
438865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
439865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
440865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
441865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Store 4 floats into a register.  Observe the instructions saturate and
442865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * set-condition-code flags.
443865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
444865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
445865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstore_vector4( const struct prog_instruction *inst,
446865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               struct fp_machine *machine,
447865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLfloat value[4] )
448865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
449865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct prog_dst_register *dest = &(inst->DstReg);
450865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
451865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat *dstReg;
452865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat dummyReg[4];
453865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat clampedValue[4];
454865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint writeMask = dest->WriteMask;
455865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
456865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   switch (dest->File) {
457865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_OUTPUT:
458865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         dstReg = machine->Outputs[dest->Index];
459865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
460865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_TEMPORARY:
461865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         dstReg = machine->Temporaries[dest->Index];
462865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
463865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_WRITE_ONLY:
464865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         dstReg = dummyReg;
465865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         return;
466865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      default:
467865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         _mesa_problem(NULL, "bad register file in store_vector4(fp)");
468865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         return;
469865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
470865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
471865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#if 0
472865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (value[0] > 1.0e10 ||
473865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       IS_INF_OR_NAN(value[0]) ||
474865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       IS_INF_OR_NAN(value[1]) ||
475865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       IS_INF_OR_NAN(value[2]) ||
476865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       IS_INF_OR_NAN(value[3])  )
477865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
478865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#endif
479865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
480865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (clamp) {
481865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
482865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
483865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
484865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
485865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      value = clampedValue;
486865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
487865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
488865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (dest->CondMask != COND_TR) {
489865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* condition codes may turn off some writes */
490865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_X) {
491865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
492865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      dest->CondMask))
493865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            writeMask &= ~WRITEMASK_X;
494865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
495865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_Y) {
496865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
497865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      dest->CondMask))
498865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            writeMask &= ~WRITEMASK_Y;
499865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
500865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_Z) {
501865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
502865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      dest->CondMask))
503865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            writeMask &= ~WRITEMASK_Z;
504865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
505865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_W) {
506865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
507865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      dest->CondMask))
508865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            writeMask &= ~WRITEMASK_W;
509865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
510865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
511865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
512865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (writeMask & WRITEMASK_X)
513865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      dstReg[0] = value[0];
514865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (writeMask & WRITEMASK_Y)
515865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      dstReg[1] = value[1];
516865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (writeMask & WRITEMASK_Z)
517865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      dstReg[2] = value[2];
518865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (writeMask & WRITEMASK_W)
519865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      dstReg[3] = value[3];
520865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
521865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inst->CondUpdate) {
522865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_X)
523865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         machine->CondCodes[0] = generate_cc(value[0]);
524865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_Y)
525865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         machine->CondCodes[1] = generate_cc(value[1]);
526865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_Z)
527865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         machine->CondCodes[2] = generate_cc(value[2]);
528865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_W)
529865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         machine->CondCodes[3] = generate_cc(value[3]);
530865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
531865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
532865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
533865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
534865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
535865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Initialize a new machine state instance from an existing one, adding
536865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * the partial derivatives onto the input registers.
537865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Used to implement DDX and DDY instructions in non-trivial cases.
538865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
539865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
540865f88afc0d59d886fb2ad50429e584ecf17fa81Brianinit_machine_deriv( GLcontext *ctx,
541865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    const struct fp_machine *machine,
542865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    const struct gl_fragment_program *program,
543865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    const SWspan *span, char xOrY,
544865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    struct fp_machine *dMachine )
545865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
546865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint u, v;
547865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
548865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(xOrY == 'X' || xOrY == 'Y');
549865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
550865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* copy existing machine */
551865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
552865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
553865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
554865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Clear temporary registers (undefined for ARB_f_p) */
555865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      _mesa_bzero( (void*) machine->Temporaries,
556865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
557865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
558865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
559865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* Add derivatives */
560865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
561865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
562865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
563865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[0] += 1.0F;
564865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[1] += 0.0F;
565865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[2] += span->dzdx;
566865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[3] += span->dwdx;
567865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
568865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
569865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[0] += 0.0F;
570865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[1] += 1.0F;
571865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[2] += span->dzdy;
572865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[3] += span->dwdy;
573865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
574865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
575865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL0)) {
576865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
577865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
578865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[0] += span->drdx * (1.0F / CHAN_MAXF);
579865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
580865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
581865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[3] += span->dadx * (1.0F / CHAN_MAXF);
582865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
583865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
584865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[0] += span->drdy * (1.0F / CHAN_MAXF);
585865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
586865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
587865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[3] += span->dady * (1.0F / CHAN_MAXF);
588865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
589865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
590865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL1)) {
591865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
592865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
593865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
594865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
595865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
596865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[3] += 0.0; /*XXX fix */
597865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
598865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
599865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
600865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
601865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
602865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[3] += 0.0; /*XXX fix */
603865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
604865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
605865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
606865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
607865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
608865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         fogc[0] += span->dfogdx;
609865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
610865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
611865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         fogc[0] += span->dfogdy;
612865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
613865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
614865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
615865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
616865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
617865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /* XXX perspective-correct interpolation */
618865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (xOrY == 'X') {
619865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[0] += span->texStepX[u][0];
620865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[1] += span->texStepX[u][1];
621865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[2] += span->texStepX[u][2];
622865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[3] += span->texStepX[u][3];
623865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         }
624865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         else {
625865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[0] += span->texStepY[u][0];
626865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[1] += span->texStepY[u][1];
627865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[2] += span->texStepY[u][2];
628865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[3] += span->texStepY[u][3];
629865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         }
630865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
631865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
632865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
633865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (v = 0; v < ctx->Const.MaxVarying; v++) {
634865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_VAR0 + v))) {
635865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         GLfloat *var = (GLfloat*) machine->Inputs[FRAG_ATTRIB_VAR0 + v];
636865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /* XXXX finish this */
637865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         var[0] += span->varStepX[v][0];
638865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         var[1] += span->varStepX[v][1];
639865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         var[2] += span->varStepX[v][2];
640865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         var[3] += span->varStepX[v][3];
641865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
642865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
643865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
644865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* init condition codes */
645865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   dMachine->CondCodes[0] = COND_EQ;
646865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   dMachine->CondCodes[1] = COND_EQ;
647865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   dMachine->CondCodes[2] = COND_EQ;
648865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   dMachine->CondCodes[3] = COND_EQ;
649865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
650865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
651865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
652865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
653865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Execute the given vertex program.
654865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * NOTE: we do everything in single-precision floating point; we don't
655865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * currently observe the single/half/fixed-precision qualifiers.
656865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param ctx - rendering context
657865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param program - the fragment program to execute
658865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param machine - machine state (register file)
659865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param maxInst - max number of instructions to execute
660865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
661865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
662865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic GLboolean
663865f88afc0d59d886fb2ad50429e584ecf17fa81Brianexecute_program( GLcontext *ctx,
664865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                 const struct gl_fragment_program *program, GLuint maxInst,
665865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                 struct fp_machine *machine, const SWspan *span,
666865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                 GLuint column )
667865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
668865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint pc;
669865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
670865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (DEBUG_FRAG) {
671865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      printf("execute fragment program --------------------\n");
672865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
673865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
674865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (pc = 0; pc < maxInst; pc++) {
675865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      const struct prog_instruction *inst = program->Base.Instructions + pc;
676865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
677865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (ctx->FragmentProgram.CallbackEnabled &&
678865f88afc0d59d886fb2ad50429e584ecf17fa81Brian          ctx->FragmentProgram.Callback) {
679865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         ctx->FragmentProgram.CurrentPosition = inst->StringPos;
680865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         ctx->FragmentProgram.Callback(program->Base.Target,
681865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                       ctx->FragmentProgram.CallbackData);
682865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
683865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
684865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (DEBUG_FRAG) {
685865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         _mesa_print_instruction(inst);
686865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
687865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
688865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      switch (inst->Opcode) {
689865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_ABS:
690865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
691865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
692865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
693865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = FABSF(a[0]);
694865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = FABSF(a[1]);
695865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = FABSF(a[2]);
696865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = FABSF(a[3]);
697865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
698865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
699865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
700865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_ADD:
701865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
702865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
703865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
704865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
705865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] + b[0];
706865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] + b[1];
707865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] + b[2];
708865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] + b[3];
709865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
710865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
711865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
712865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
713865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
714865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3]);
715865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
716865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
717865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
718865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_BRA: /* conditional branch */
719865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
720865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* NOTE: The return is conditional! */
721865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint swizzle = inst->DstReg.CondSwizzle;
722865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint condMask = inst->DstReg.CondMask;
723865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
724865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
725865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
726865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
727865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  /* take branch */
728865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  pc = inst->BranchTarget;
729865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
730865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
731865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
732865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_CAL: /* Call subroutine */
733865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
734865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* NOTE: The call is conditional! */
735865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint swizzle = inst->DstReg.CondSwizzle;
736865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint condMask = inst->DstReg.CondMask;
737865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
738865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
739865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
740865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
741865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
742865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
743865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  }
744865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  machine->CallStack[machine->StackDepth++] = pc + 1;
745865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  pc = inst->BranchTarget;
746865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
747865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
748865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
749865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_CMP:
750865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
751865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], c[4], result[4];
752865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
753865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
754865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
755865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] < 0.0F ? b[0] : c[0];
756865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] < 0.0F ? b[1] : c[1];
757865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] < 0.0F ? b[2] : c[2];
758865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] < 0.0F ? b[3] : c[3];
759865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
760865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
761865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
762865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_COS:
763865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
764865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
765865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
766865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3]
767865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = (GLfloat) _mesa_cos(a[0]);
768865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
769865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
770865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
771865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DDX: /* Partial derivative with respect to X */
772865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
773865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], aNext[4], result[4];
774865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               struct fp_machine dMachine;
775865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
776865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                        column, result)) {
777865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  /* This is tricky.  Make a copy of the current machine state,
778865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * increment the input registers by the dx or dy partial
779865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * derivatives, then re-execute the program up to the
780865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * preceeding instruction, then fetch the source register.
781865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * Finally, find the difference in the register values for
782865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * the original and derivative runs.
783865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   */
784865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
785865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  init_machine_deriv(ctx, machine, program, span,
786865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                     'X', &dMachine);
787865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  execute_program(ctx, program, pc, &dMachine, span, column);
788865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
789865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[0] = aNext[0] - a[0];
790865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[1] = aNext[1] - a[1];
791865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[2] = aNext[2] - a[2];
792865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[3] = aNext[3] - a[3];
793865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
794865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
795865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
796865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
797865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DDY: /* Partial derivative with respect to Y */
798865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
799865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], aNext[4], result[4];
800865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               struct fp_machine dMachine;
801865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
802865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                        column, result)) {
803865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  init_machine_deriv(ctx, machine, program, span,
804865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                     'Y', &dMachine);
805865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
806865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  execute_program(ctx, program, pc, &dMachine, span, column);
807865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
808865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[0] = aNext[0] - a[0];
809865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[1] = aNext[1] - a[1];
810865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[2] = aNext[2] - a[2];
811865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[3] = aNext[3] - a[3];
812865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
813865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
814865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
815865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
816865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DP3:
817865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
818865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
819865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
820865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
821865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
822865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
823865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
824865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
825865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
826865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
827865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
828865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
829865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DP4:
830865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
831865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
832865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
833865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
834865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
835865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
836865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
837865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
838865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], a[0], a[1], a[2], a[3],
839865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3]);
840865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
841865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
842865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
843865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DPH:
844865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
845865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
846865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
847865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
848865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] =
849865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
850865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
851865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
852865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
853865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DST: /* Distance vector */
854865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
855865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
856865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
857865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
858865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = 1.0F;
859865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] * b[1];
860865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2];
861865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = b[3];
862865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
863865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
864865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
865865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_EX2: /* Exponential base 2 */
866865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
867865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
868865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
869865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] =
870865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  (GLfloat) _mesa_pow(2.0, a[0]);
871865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
872865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
873865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
874865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_FLR:
875865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
876865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
877865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
878865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = FLOORF(a[0]);
879865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = FLOORF(a[1]);
880865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = FLOORF(a[2]);
881865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = FLOORF(a[3]);
882865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
883865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
884865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
885865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_FRC:
886865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
887865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
888865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
889865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] - FLOORF(a[0]);
890865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] - FLOORF(a[1]);
891865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] - FLOORF(a[2]);
892865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] - FLOORF(a[3]);
893865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
894865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
895865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
896865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_KIL_NV: /* NV_f_p only */
897865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
898865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint swizzle = inst->DstReg.CondSwizzle;
899865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint condMask = inst->DstReg.CondMask;
900865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
901865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
902865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
903865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
904865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  return GL_FALSE;
905865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
906865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
907865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
908865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_KIL: /* ARB_f_p only */
909865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
910865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4];
911865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
912865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
913865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  return GL_FALSE;
914865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
915865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
916865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
917865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_LG2:  /* log base 2 */
918865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
919865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
920865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
921865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
922865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
923865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
924865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
925865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_LIT:
926865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
927865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
928865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
929865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
930865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = MAX2(a[0], 0.0F);
931865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[1] = MAX2(a[1], 0.0F);
932865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* XXX ARB version clamps a[3], NV version doesn't */
933865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
934865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = 1.0F;
935865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[0];
936865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* XXX we could probably just use pow() here */
937865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (a[0] > 0.0F) {
938865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (a[1] == 0.0 && a[3] == 0.0)
939865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[2] = 1.0;
940865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  else
941865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[2] = EXPF(a[3] * LOGF(a[1]));
942865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
943865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else {
944865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[2] = 0.0;
945865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
946865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = 1.0F;
947865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
948865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
949865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
950865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
951865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3]);
952865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
953865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
954865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
955865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_LRP:
956865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
957865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], c[4], result[4];
958865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
959865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
960865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
961865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
962865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
963865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
964865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
965865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
966865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
967865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("LRP (%g %g %g %g) = (%g %g %g %g), "
968865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         "(%g %g %g %g), (%g %g %g %g)\n",
969865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
970865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
971865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3],
972865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         c[0], c[1], c[2], c[3]);
973865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
974865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
975865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
976865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MAD:
977865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
978865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], c[4], result[4];
979865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
980865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
981865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
982865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] * b[0] + c[0];
983865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] * b[1] + c[1];
984865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] * b[2] + c[2];
985865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] * b[3] + c[3];
986865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
987865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
988865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
989865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         "(%g %g %g %g) + (%g %g %g %g)\n",
990865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
991865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
992865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3],
993865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         c[0], c[1], c[2], c[3]);
994865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
995865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
996865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
997865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MAX:
998865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
999865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1000865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1001865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1002865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = MAX2(a[0], b[0]);
1003865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = MAX2(a[1], b[1]);
1004865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = MAX2(a[2], b[2]);
1005865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = MAX2(a[3], b[3]);
1006865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1007865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1008865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1009865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
1010865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
1011865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3]);
1012865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1013865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1014865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1015865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MIN:
1016865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1017865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1018865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1019865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1020865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = MIN2(a[0], b[0]);
1021865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = MIN2(a[1], b[1]);
1022865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = MIN2(a[2], b[2]);
1023865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = MIN2(a[3], b[3]);
1024865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1025865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1026865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1027865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MOV:
1028865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1029865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat result[4];
1030865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
1031865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1032865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1033865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("MOV (%g %g %g %g)\n",
1034865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3]);
1035865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1036865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1037865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1038865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MUL:
1039865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1040865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1041865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1042865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1043865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] * b[0];
1044865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] * b[1];
1045865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] * b[2];
1046865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] * b[3];
1047865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1048865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1049865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1050865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
1051865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
1052865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3]);
1053865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1054865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1055865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1056865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1057865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1058865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1059865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLhalfNV hx, hy;
1060865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint *rawResult = (GLuint *) result;
1061865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint twoHalves;
1062865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1063865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               hx = _mesa_float_to_half(a[0]);
1064865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               hy = _mesa_float_to_half(a[1]);
1065865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               twoHalves = hx | (hy << 16);
1066865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1067865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = twoHalves;
1068865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1069865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1070865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1071865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1072865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1073865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1074865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint usx, usy, *rawResult = (GLuint *) result;
1075865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1076865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = CLAMP(a[0], 0.0F, 1.0F);
1077865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[1] = CLAMP(a[1], 0.0F, 1.0F);
1078865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               usx = IROUND(a[0] * 65535.0F);
1079865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               usy = IROUND(a[1] * 65535.0F);
1080865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1081865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = usx | (usy << 16);
1082865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1083865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1084865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1085865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1086865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1087865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1088865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1089865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1090865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1091865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1092865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1093865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1094865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubx = IROUND(127.0F * a[0] + 128.0F);
1095865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               uby = IROUND(127.0F * a[1] + 128.0F);
1096865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubz = IROUND(127.0F * a[2] + 128.0F);
1097865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubw = IROUND(127.0F * a[3] + 128.0F);
1098865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1099865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1100865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1101865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1102865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1103865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1104865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1105865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1106865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1107865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1108865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = CLAMP(a[0], 0.0F, 1.0F);
1109865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[1] = CLAMP(a[1], 0.0F, 1.0F);
1110865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[2] = CLAMP(a[2], 0.0F, 1.0F);
1111865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[3] = CLAMP(a[3], 0.0F, 1.0F);
1112865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubx = IROUND(255.0F * a[0]);
1113865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               uby = IROUND(255.0F * a[1]);
1114865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubz = IROUND(255.0F * a[2]);
1115865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubw = IROUND(255.0F * a[3]);
1116865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1117865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1118865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1119865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1120865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1121865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_POW:
1122865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1123865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1124865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1125865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
1126865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3]
1127865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = (GLfloat)_mesa_pow(a[0], b[0]);
1128865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1129865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1130865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1131865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_RCP:
1132865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1133865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1134865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1135865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1136865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (a[0] == 0)
1137865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     printf("RCP(0)\n");
1138865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  else if (IS_INF_OR_NAN(a[0]))
1139865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     printf("RCP(inf)\n");
1140865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1141865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1142865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1143865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1144865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1145865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_RET: /* return from subroutine */
1146865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1147865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* NOTE: The return is conditional! */
1148865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint swizzle = inst->DstReg.CondSwizzle;
1149865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint condMask = inst->DstReg.CondMask;
1150865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
1151865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
1152865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
1153865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
1154865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (machine->StackDepth == 0) {
1155865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1156865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  }
1157865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  pc = machine->CallStack[--machine->StackDepth];
1158865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1159865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1160865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1161865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_RFL: /* reflection vector */
1162865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1163865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1164865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1165865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1166865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               tmpW = DOT3(axis, axis);
1167865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1168865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = tmpX * axis[0] - dir[0];
1169865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = tmpX * axis[1] - dir[1];
1170865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = tmpX * axis[2] - dir[2];
1171865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* result[3] is never written! XXX enforce in parser! */
1172865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1173865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1174865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1175865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_RSQ: /* 1 / sqrt() */
1176865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1177865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1178865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1179865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = FABSF(a[0]);
1180865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1181865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1182865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1183865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1184865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1185865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1186865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1187865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SCS: /* sine and cos */
1188865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1189865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1190865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1191865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (GLfloat)_mesa_cos(a[0]);
1192865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (GLfloat)_mesa_sin(a[0]);
1193865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = 0.0;  /* undefined! */
1194865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = 0.0;  /* undefined! */
1195865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1196865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1197865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1198865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SEQ: /* set on equal */
1199865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1200865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1201865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1202865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1203865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1204865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1205865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1206865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1207865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1208865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1209865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1210865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SFL: /* set false, operands ignored */
1211865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1212865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1213865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1214865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1215865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1216865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SGE: /* set on greater or equal */
1217865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1218865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1219865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1220865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1221865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1222865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1223865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1224865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1225865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1226865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1227865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1228865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SGT: /* set on greater */
1229865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1230865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1231865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1232865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1233865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1234865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1235865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1236865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1237865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1238865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1239865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1240865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SIN:
1241865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1242865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1243865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1244865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3]
1245865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = (GLfloat) _mesa_sin(a[0]);
1246865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1247865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1248865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1249865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SLE: /* set on less or equal */
1250865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1251865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1252865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1253865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1254865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1255865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1256865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1257865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1258865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1259865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1260865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1261865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SLT: /* set on less */
1262865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1263865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1264865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1265865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1266865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1267865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1268865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1269865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1270865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1271865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1272865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1273865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SNE: /* set on not equal */
1274865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1275865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1276865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1277865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1278865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1279865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1280865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1281865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1282865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1283865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1284865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1285865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_STR: /* set true, operands ignored */
1286865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1287865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1288865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1289865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1290865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1291865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SUB:
1292865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1293865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1294865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1295865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1296865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] - b[0];
1297865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] - b[1];
1298865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] - b[2];
1299865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] - b[3];
1300865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1301865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1302865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1303865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
1304865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1305865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1306865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1307865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1308865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SWZ: /* extended swizzle */
1309865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1310865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct prog_src_register *source = &inst->SrcReg[0];
1311865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLfloat *src = get_register_pointer(ctx, source,
1312865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                                         machine, program);
1313865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat result[4];
1314865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint i;
1315865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               for (i = 0; i < 4; i++) {
1316865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  const GLuint swz = GET_SWZ(source->Swizzle, i);
1317865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (swz == SWIZZLE_ZERO)
1318865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[i] = 0.0;
1319865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  else if (swz == SWIZZLE_ONE)
1320865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[i] = 1.0;
1321865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  else {
1322865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     ASSERT(swz >= 0);
1323865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     ASSERT(swz <= 3);
1324865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[i] = src[swz];
1325865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  }
1326865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (source->NegateBase & (1 << i))
1327865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[i] = -result[i];
1328865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1329865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1330865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1331865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1332865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TEX: /* Both ARB and NV frag prog */
1333865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texel lookup */
1334865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1335865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* Note: only use the precomputed lambda value when we're
1336865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * sampling texture unit [K] with texcoord[K].
1337865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * Otherwise, the lambda value may have no relation to the
1338865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * instruction's texcoord or texture image.  Using the wrong
1339865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * lambda is usually bad news.
1340865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * The rest of the time, just use zero (until we get a more
1341865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * sophisticated way of computing lambda).
1342865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                */
1343865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat coord[4], color[4], lambda;
1344865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1345865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1346865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = span->array->lambda[inst->TexSrcUnit][column];
1347865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1348865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = 0.0;
1349865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1350865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel( ctx, coord, lambda, inst->TexSrcUnit, color );
1351865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1352865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1353865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         "lod %f\n",
1354865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         color[0], color[1], color[2], color[3],
1355865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         inst->TexSrcUnit,
1356865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         coord[0], coord[1], coord[2], coord[3], lambda);
1357865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1358865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1359865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1360865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1361865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TXB: /* GL_ARB_fragment_program only */
1362865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texel lookup with LOD bias */
1363865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1364865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat coord[4], color[4], lambda, bias;
1365865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1366865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1367865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = span->array->lambda[inst->TexSrcUnit][column];
1368865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1369865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = 0.0;
1370865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1371865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* coord[3] is the bias to add to lambda */
1372865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1373865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1374865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    + coord[3];
1375865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel(ctx, coord, lambda + bias, inst->TexSrcUnit, color);
1376865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1377865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1378865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1379865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TXD: /* GL_NV_fragment_program only */
1380865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texture lookup w/ partial derivatives for LOD */
1381865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1382865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1383865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1384865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1385865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1386865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1387865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                  color );
1388865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1389865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1390865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1391865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TXP: /* GL_ARB_fragment_program only */
1392865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texture lookup w/ projective divide */
1393865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1394865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat texcoord[4], color[4], lambda;
1395865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1396865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1397865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = span->array->lambda[inst->TexSrcUnit][column];
1398865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1399865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = 0.0;
1400865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1401865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	       /* Not so sure about this test - if texcoord[3] is
1402865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		* zero, we'd probably be fine except for an ASSERT in
1403865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		* IROUND_POS() which gets triggered by the inf values created.
1404865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		*/
1405865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	       if (texcoord[3] != 0.0) {
1406865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		  texcoord[0] /= texcoord[3];
1407865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		  texcoord[1] /= texcoord[3];
1408865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		  texcoord[2] /= texcoord[3];
1409865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	       }
1410865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1411865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1412865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1413865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1414865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1415865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texture lookup w/ projective divide */
1416865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1417865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat texcoord[4], color[4], lambda;
1418865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1419865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1420865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = span->array->lambda[inst->TexSrcUnit][column];
1421865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1422865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = 0.0;
1423865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1424865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1425865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		   texcoord[3] != 0.0) {
1426865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  texcoord[0] /= texcoord[3];
1427865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  texcoord[1] /= texcoord[3];
1428865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  texcoord[2] /= texcoord[3];
1429865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1430865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1431865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1432865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1433865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1434865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_UP2H: /* unpack two 16-bit floats */
1435865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1436865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1437865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint *rawBits = (const GLuint *) a;
1438865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLhalfNV hx, hy;
1439865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1440865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               hx = rawBits[0] & 0xffff;
1441865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               hy = rawBits[0] >> 16;
1442865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[2] = _mesa_half_to_float(hx);
1443865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = result[3] = _mesa_half_to_float(hy);
1444865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1445865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1446865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1447865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_UP2US: /* unpack two GLushorts */
1448865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1449865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1450865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint *rawBits = (const GLuint *) a;
1451865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLushort usx, usy;
1452865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1453865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               usx = rawBits[0] & 0xffff;
1454865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               usy = rawBits[0] >> 16;
1455865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[2] = usx * (1.0f / 65535.0f);
1456865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = result[3] = usy * (1.0f / 65535.0f);
1457865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1458865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1459865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1460865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_UP4B: /* unpack four GLbytes */
1461865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1462865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1463865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint *rawBits = (const GLuint *) a;
1464865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1465865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
1466865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
1467865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1468865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1469865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1470865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1471865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1472865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_UP4UB: /* unpack four GLubytes */
1473865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1474865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1475865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint *rawBits = (const GLuint *) a;
1476865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1477865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
1478865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
1479865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1480865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1481865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1482865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1483865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1484865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_XPD: /* cross product */
1485865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1486865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1487865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1488865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1489865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[1] * b[2] - a[2] * b[1];
1490865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[2] * b[0] - a[0] * b[2];
1491865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[0] * b[1] - a[1] * b[0];
1492865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = 1.0;
1493865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1494865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1495865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1496865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_X2D: /* 2-D matrix transform */
1497865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1498865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], c[4], result[4];
1499865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1500865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1501865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1502865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1503865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1504865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1505865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1506865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1507865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1508865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1509865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PRINT:
1510865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1511865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File != -1) {
1512865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  GLfloat a[4];
1513865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1514865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1515865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               a[0], a[1], a[2], a[3]);
1516865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1517865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else {
1518865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  _mesa_printf("%s\n", (const char *) inst->Data);
1519865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1520865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1521865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1522865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_END:
1523865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            return GL_TRUE;
1524865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         default:
1525865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1526865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                          inst->Opcode);
1527865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            return GL_TRUE; /* return value doesn't matter */
1528865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1529865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1530865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   return GL_TRUE;
1531865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
1532865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1533865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1534865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
1535865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Initialize the virtual fragment program machine state prior to running
1536865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * fragment program on a fragment.  This involves initializing the input
1537865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * registers, condition codes, etc.
1538865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param machine  the virtual machine state to init
1539865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param program  the fragment program we're about to run
1540865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param span  the span of pixels we'll operate on
1541865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param col  which element (column) of the span we'll operate on
1542865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
1543865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
1544865f88afc0d59d886fb2ad50429e584ecf17fa81Brianinit_machine( GLcontext *ctx, struct fp_machine *machine,
1545865f88afc0d59d886fb2ad50429e584ecf17fa81Brian              const struct gl_fragment_program *program,
1546865f88afc0d59d886fb2ad50429e584ecf17fa81Brian              const SWspan *span, GLuint col )
1547865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
1548865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint inputsRead = program->Base.InputsRead;
1549865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint u, v;
1550865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1551865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (ctx->FragmentProgram.CallbackEnabled)
1552865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      inputsRead = ~0;
1553865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1554865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1555865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Clear temporary registers (undefined for ARB_f_p) */
1556865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      _mesa_bzero(machine->Temporaries,
1557865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1558865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1559865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1560865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* Load input registers */
1561865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1562865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1563865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(span->arrayMask & SPAN_Z);
1564865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (span->arrayMask & SPAN_XY) {
1565865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[0] = (GLfloat) span->array->x[col];
1566865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[1] = (GLfloat) span->array->y[col];
1567865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1568865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
1569865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[0] = (GLfloat) span->x + col;
1570865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[1] = (GLfloat) span->y;
1571865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1572865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
1573865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      wpos[3] = span->w + col * span->dwdx;
1574865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1575865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1576865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(span->arrayMask & SPAN_RGBA);
1577865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      COPY_4V(machine->Inputs[FRAG_ATTRIB_COL0],
1578865f88afc0d59d886fb2ad50429e584ecf17fa81Brian              span->array->color.sz4.rgba[col]);
1579865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1580865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1581865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(span->arrayMask & SPAN_SPEC);
1582865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      COPY_4V(machine->Inputs[FRAG_ATTRIB_COL1],
1583865f88afc0d59d886fb2ad50429e584ecf17fa81Brian              span->array->color.sz4.spec[col]);
1584865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1585865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1586865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1587865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(span->arrayMask & SPAN_FOG);
1588865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      fogc[0] = span->array->fog[col];
1589865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      fogc[1] = 0.0F;
1590865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      fogc[2] = 0.0F;
1591865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      fogc[3] = 0.0F;
1592865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1593865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1594865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1595865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1596865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1597865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(tex, span->array->texcoords[u][col]);
1598865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1599865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1600865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1601865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (v = 0; v < ctx->Const.MaxVarying; v++) {
1602865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (inputsRead & (1 << (FRAG_ATTRIB_VAR0 + v))) {
1603865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#if 0
1604865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         printf("Frag Var %d: %f %f %f\n", col,
1605865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                span->array->varying[col][v][0],
1606865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                span->array->varying[col][v][1],
1607865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                span->array->varying[col][v][2]);
1608865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#endif
1609865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(machine->Inputs[FRAG_ATTRIB_VAR0 + v],
1610865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                 span->array->varying[col][v]);
1611865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1612865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1613865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1614865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* init condition codes */
1615865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->CondCodes[0] = COND_EQ;
1616865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->CondCodes[1] = COND_EQ;
1617865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->CondCodes[2] = COND_EQ;
1618865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->CondCodes[3] = COND_EQ;
1619865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1620865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* init call stack */
1621865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->StackDepth = 0;
1622865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
1623865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1624865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1625865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
1626865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Run fragment program on the pixels in span from 'start' to 'end' - 1.
1627865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
1628865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
1629865f88afc0d59d886fb2ad50429e584ecf17fa81Brianrun_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end)
1630865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
1631865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1632865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   struct fp_machine machine;
1633865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint i;
1634865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1635865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   CurrentMachine = &machine;
1636865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1637865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (i = start; i < end; i++) {
1638865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (span->array->mask[i]) {
1639865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         init_machine(ctx, &machine, program, span, i);
1640865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1641865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (execute_program(ctx, program, ~0, &machine, span, i)) {
1642865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Store result color */
1643865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            COPY_4V(span->array->color.sz4.rgba[i],
1644865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    machine.Outputs[FRAG_RESULT_COLR]);
1645865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1646865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Store result depth/z */
1647865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1648865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLfloat depth = machine.Outputs[FRAG_RESULT_DEPR][2];
1649865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (depth <= 0.0)
1650865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  span->array->z[i] = 0;
1651865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else if (depth >= 1.0)
1652865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  span->array->z[i] = ctx->DrawBuffer->_DepthMax;
1653865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1654865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  span->array->z[i] = IROUND(depth * ctx->DrawBuffer->_DepthMaxF);
1655865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1656865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         }
1657865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         else {
1658865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* killed fragment */
1659865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            span->array->mask[i] = GL_FALSE;
1660865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            span->writeAll = GL_FALSE;
1661865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         }
1662865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1663865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1664865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1665865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   CurrentMachine = NULL;
1666865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
1667865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1668865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1669865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
1670865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Execute the current fragment program for all the fragments
1671865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * in the given span.
1672865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
1673865f88afc0d59d886fb2ad50429e584ecf17fa81Brianvoid
1674865f88afc0d59d886fb2ad50429e584ecf17fa81Brian_swrast_exec_fragment_program( GLcontext *ctx, SWspan *span )
1675865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
1676865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1677865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1678865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* incoming colors should be floats */
1679865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(span->array->ChanType == GL_FLOAT);
1680865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1681865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1682865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1683865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   run_program(ctx, span, 0, span->end);
1684865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1685865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1686865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      span->interpMask &= ~SPAN_Z;
1687865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      span->arrayMask |= SPAN_Z;
1688865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1689865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1690865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ctx->_CurrentProgram = 0;
1691865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
1692865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1693