s_fragprog.c revision c968d3d410a1897ecbb41d3557adaef69a4c627a
1865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/*
2865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Mesa 3-D graphics library
3865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Version:  6.5.2
4865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
5865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
6865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
7865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Permission is hereby granted, free of charge, to any person obtaining a
8865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * copy of this software and associated documentation files (the "Software"),
9865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * to deal in the Software without restriction, including without limitation
10865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * and/or sell copies of the Software, and to permit persons to whom the
12865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Software is furnished to do so, subject to the following conditions:
13865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
14865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * The above copyright notice and this permission notice shall be included
15865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * in all copies or substantial portions of the Software.
16865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
17865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
24865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
25865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "glheader.h"
26865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "colormac.h"
27865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "context.h"
28865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "prog_instruction.h"
29865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "prog_parameter.h"
30865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "prog_print.h"
31865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "program.h"
32865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
33c968d3d410a1897ecbb41d3557adaef69a4c627aBrian#include "s_fragprog.h"
34865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "s_span.h"
35865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
36865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
37865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/* See comments below for info about this */
38865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#define LAMBDA_ZERO 1
39865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
40865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/* debug predicate */
41865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#define DEBUG_FRAG 0
42865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
43865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
44865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
45865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Virtual machine state used during execution of a fragment programs.
46865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
47865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstruct fp_machine
48865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
49865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat Temporaries[MAX_PROGRAM_TEMPS][4];
50865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat Inputs[FRAG_ATTRIB_MAX][4];
51865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat Outputs[FRAG_RESULT_MAX][4];
52865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint CondCodes[4];  /**< COND_* value for x/y/z/w */
53865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
54865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint CallStack[MAX_PROGRAM_CALL_DEPTH]; /**< For CAL/RET instructions */
55865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint StackDepth; /**< Index/ptr to top of CallStack[] */
56865f88afc0d59d886fb2ad50429e584ecf17fa81Brian};
57865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
58865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
59865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#if FEATURE_MESA_program_debug
60865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic struct fp_machine *CurrentMachine = NULL;
61865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
62865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
63865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * For GL_MESA_program_debug.
64865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Return current value (4*GLfloat) of a fragment program register.
65865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Called via ctx->Driver.GetFragmentProgramRegister().
66865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
67865f88afc0d59d886fb2ad50429e584ecf17fa81Brianvoid
68865f88afc0d59d886fb2ad50429e584ecf17fa81Brian_swrast_get_program_register(GLcontext *ctx, enum register_file file,
69865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                             GLuint index, GLfloat val[4])
70865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
71865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (CurrentMachine) {
72865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      switch (file) {
73865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_INPUT:
74865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(val, CurrentMachine->Inputs[index]);
75865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
76865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_OUTPUT:
77865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(val, CurrentMachine->Outputs[index]);
78865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
79865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_TEMPORARY:
80865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(val, CurrentMachine->Temporaries[index]);
81865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
82865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      default:
83865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         _mesa_problem(NULL,
84865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                       "bad register file in _swrast_get_program_register");
85865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
86865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
87865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
88865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#endif /* FEATURE_MESA_program_debug */
89865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
90865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
91865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
92865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Fetch a texel.
93865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
94865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
95865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
96865f88afc0d59d886fb2ad50429e584ecf17fa81Brian             GLuint unit, GLfloat color[4] )
97865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
98865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLchan rgba[4];
99865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   SWcontext *swrast = SWRAST_CONTEXT(ctx);
100865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
101865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* XXX use a float-valued TextureSample routine here!!! */
102865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
103865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               1, (const GLfloat (*)[4]) texcoord,
104865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               &lambda, &rgba);
105865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[0] = CHAN_TO_FLOAT(rgba[0]);
106865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[1] = CHAN_TO_FLOAT(rgba[1]);
107865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[2] = CHAN_TO_FLOAT(rgba[2]);
108865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[3] = CHAN_TO_FLOAT(rgba[3]);
109865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
110865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
111865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
112865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
113865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Fetch a texel with the given partial derivatives to compute a level
114865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * of detail in the mipmap.
115865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
116865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
117865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
118865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   const GLfloat texdx[4], const GLfloat texdy[4],
119865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   GLuint unit, GLfloat color[4] )
120865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
121865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   SWcontext *swrast = SWRAST_CONTEXT(ctx);
122865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
123865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
124865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLfloat texW = (GLfloat) texImg->WidthScale;
125865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLfloat texH = (GLfloat) texImg->HeightScale;
126865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLchan rgba[4];
127865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
128865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
129865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         texdx[1], texdy[1], /* dt/dx, dt/dy */
130865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         texdx[3], texdy[2], /* dq/dx, dq/dy */
131865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         texW, texH,
132865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         texcoord[0], texcoord[1], texcoord[3],
133865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                         1.0F / texcoord[3]);
134865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
135865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
136865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               1, (const GLfloat (*)[4]) texcoord,
137865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               &lambda, &rgba);
138865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[0] = CHAN_TO_FLOAT(rgba[0]);
139865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[1] = CHAN_TO_FLOAT(rgba[1]);
140865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[2] = CHAN_TO_FLOAT(rgba[2]);
141865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   color[3] = CHAN_TO_FLOAT(rgba[3]);
142865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
143865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
144865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
145865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
146865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Return a pointer to the 4-element float vector specified by the given
147865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * source register.
148865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
149865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic INLINE const GLfloat *
150865f88afc0d59d886fb2ad50429e584ecf17fa81Brianget_register_pointer( GLcontext *ctx,
151865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      const struct prog_src_register *source,
152865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      const struct fp_machine *machine,
153865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      const struct gl_fragment_program *program )
154865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
155865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   switch (source->File) {
156865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_TEMPORARY:
157865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < MAX_PROGRAM_TEMPS);
158865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return machine->Temporaries[source->Index];
159865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_INPUT:
160865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < FRAG_ATTRIB_MAX);
161865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return machine->Inputs[source->Index];
162865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_OUTPUT:
163865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* This is only for PRINT */
164865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < FRAG_RESULT_MAX);
165865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return machine->Outputs[source->Index];
166865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_LOCAL_PARAM:
167865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
168865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return program->Base.LocalParams[source->Index];
169865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_ENV_PARAM:
170865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
171865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return ctx->FragmentProgram.Parameters[source->Index];
172865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_STATE_VAR:
173865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Fallthrough */
174865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_CONSTANT:
175865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Fallthrough */
176865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_UNIFORM:
177865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Fallthrough */
178865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case PROGRAM_NAMED_PARAM:
179865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(source->Index < (GLint) program->Base.Parameters->NumParameters);
180865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return program->Base.Parameters->ParameterValues[source->Index];
181865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   default:
182865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      _mesa_problem(ctx, "Invalid input register file %d in fp "
183865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    "get_register_pointer", source->File);
184865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return NULL;
185865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
186865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
187865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
188865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
189865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
190865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Fetch a 4-element float vector from the given source register.
191865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Apply swizzling and negating as needed.
192865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
193865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
194865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_vector4( GLcontext *ctx,
195865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct prog_src_register *source,
196865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct fp_machine *machine,
197865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct gl_fragment_program *program,
198865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat result[4] )
199865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
200865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLfloat *src = get_register_pointer(ctx, source, machine, program);
201865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(src);
202865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
203865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->Swizzle == MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
204865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                        SWIZZLE_Z, SWIZZLE_W)) {
205865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* no swizzling */
206865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      COPY_4V(result, src);
207865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
208865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   else {
209865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = src[GET_SWZ(source->Swizzle, 0)];
210865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = src[GET_SWZ(source->Swizzle, 1)];
211865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = src[GET_SWZ(source->Swizzle, 2)];
212865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = src[GET_SWZ(source->Swizzle, 3)];
213865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
214865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
215865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateBase) {
216865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
217865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = -result[1];
218865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = -result[2];
219865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = -result[3];
220865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
221865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->Abs) {
222865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = FABSF(result[0]);
223865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = FABSF(result[1]);
224865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = FABSF(result[2]);
225865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = FABSF(result[3]);
226865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
227865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateAbs) {
228865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
229865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = -result[1];
230865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = -result[2];
231865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = -result[3];
232865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
233865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
234865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
235865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
236865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
237865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Fetch the derivative with respect to X for the given register.
238865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \return GL_TRUE if it was easily computed or GL_FALSE if we
239865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * need to execute another instance of the program (ugh)!
240865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
241865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic GLboolean
242865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_vector4_deriv( GLcontext *ctx,
243865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     const struct prog_src_register *source,
244865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     const SWspan *span,
245865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     char xOrY, GLint column, GLfloat result[4] )
246865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
247865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat src[4];
248865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
249865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(xOrY == 'X' || xOrY == 'Y');
250865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
251865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   switch (source->Index) {
252865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_WPOS:
253865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
254865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = 1.0;
255865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = 0.0;
256865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
257865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->dwdx;
258865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
259865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
260865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = 0.0;
261865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = 1.0;
262865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
263865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->dwdy;
264865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
265865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
266865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_COL0:
267865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
268865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->drdx * (1.0F / CHAN_MAXF);
269865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->dgdx * (1.0F / CHAN_MAXF);
270865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dbdx * (1.0F / CHAN_MAXF);
271865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->dadx * (1.0F / CHAN_MAXF);
272865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
273865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
274865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->drdy * (1.0F / CHAN_MAXF);
275865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->dgdy * (1.0F / CHAN_MAXF);
276865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dbdy * (1.0F / CHAN_MAXF);
277865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->dady * (1.0F / CHAN_MAXF);
278865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
279865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
280865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_COL1:
281865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
282865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
283865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
284865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
285865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = 0.0; /* XXX need this */
286865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
287865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
288865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
289865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
290865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
291865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = 0.0; /* XXX need this */
292865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
293865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
294865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_FOGC:
295865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
296865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->dfogdx;
297865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = 0.0;
298865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = 0.0;
299865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = 0.0;
300865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
301865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
302865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->dfogdy;
303865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = 0.0;
304865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = 0.0;
305865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = 0.0;
306865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
307865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
308865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX0:
309865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX1:
310865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX2:
311865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX3:
312865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX4:
313865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX5:
314865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX6:
315865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case FRAG_ATTRIB_TEX7:
316865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
317865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
318865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /* this is a little tricky - I think I've got it right */
319865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         const GLfloat invQ = 1.0f / (span->tex[u][3]
320865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                      + span->texStepX[u][3] * column);
321865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->texStepX[u][0] * invQ;
322865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->texStepX[u][1] * invQ;
323865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->texStepX[u][2] * invQ;
324865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->texStepX[u][3] * invQ;
325865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
326865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
327865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
328865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /* Tricky, as above, but in Y direction */
329865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
330865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[0] = span->texStepY[u][0] * invQ;
331865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[1] = span->texStepY[u][1] * invQ;
332865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[2] = span->texStepY[u][2] * invQ;
333865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         src[3] = span->texStepY[u][3] * invQ;
334865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
335865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      break;
336865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   default:
337865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return GL_FALSE;
338865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
339865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
340865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[0] = src[GET_SWZ(source->Swizzle, 0)];
341865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[1] = src[GET_SWZ(source->Swizzle, 1)];
342865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[2] = src[GET_SWZ(source->Swizzle, 2)];
343865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[3] = src[GET_SWZ(source->Swizzle, 3)];
344865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
345865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateBase) {
346865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
347865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = -result[1];
348865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = -result[2];
349865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = -result[3];
350865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
351865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->Abs) {
352865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = FABSF(result[0]);
353865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = FABSF(result[1]);
354865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = FABSF(result[2]);
355865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = FABSF(result[3]);
356865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
357865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateAbs) {
358865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
359865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[1] = -result[1];
360865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[2] = -result[2];
361865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[3] = -result[3];
362865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
363865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   return GL_TRUE;
364865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
365865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
366865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
367865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
368865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * As above, but only return result[0] element.
369865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
370865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
371865f88afc0d59d886fb2ad50429e584ecf17fa81Brianfetch_vector1( GLcontext *ctx,
372865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct prog_src_register *source,
373865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct fp_machine *machine,
374865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct gl_fragment_program *program,
375865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat result[4] )
376865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
377865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLfloat *src = get_register_pointer(ctx, source, machine, program);
378865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(src);
379865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
380865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   result[0] = src[GET_SWZ(source->Swizzle, 0)];
381865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
382865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateBase) {
383865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
384865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
385865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->Abs) {
386865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = FABSF(result[0]);
387865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
388865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (source->NegateAbs) {
389865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      result[0] = -result[0];
390865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
391865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
392865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
393865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
394865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
395865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Test value against zero and return GT, LT, EQ or UN if NaN.
396865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
397865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic INLINE GLuint
398865f88afc0d59d886fb2ad50429e584ecf17fa81Briangenerate_cc( float value )
399865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
400865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (value != value)
401865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return COND_UN;  /* NaN */
402865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (value > 0.0F)
403865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return COND_GT;
404865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (value < 0.0F)
405865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return COND_LT;
406865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   return COND_EQ;
407865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
408865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
409865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
410865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
411865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Test if the ccMaskRule is satisfied by the given condition code.
412865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Used to mask destination writes according to the current condition code.
413865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
414865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic INLINE GLboolean
415865f88afc0d59d886fb2ad50429e584ecf17fa81Briantest_cc(GLuint condCode, GLuint ccMaskRule)
416865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
417865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   switch (ccMaskRule) {
418865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_EQ: return (condCode == COND_EQ);
419865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_NE: return (condCode != COND_EQ);
420865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_LT: return (condCode == COND_LT);
421865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
422865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
423865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_GT: return (condCode == COND_GT);
424865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_TR: return GL_TRUE;
425865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   case COND_FL: return GL_FALSE;
426865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   default:      return GL_TRUE;
427865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
428865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
429865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
430865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
431865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
432865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Store 4 floats into a register.  Observe the instructions saturate and
433865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * set-condition-code flags.
434865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
435865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
436865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstore_vector4( const struct prog_instruction *inst,
437865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               struct fp_machine *machine,
438865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLfloat value[4] )
439865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
440865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct prog_dst_register *dest = &(inst->DstReg);
441865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
442865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat *dstReg;
443865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat dummyReg[4];
444865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLfloat clampedValue[4];
445865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint writeMask = dest->WriteMask;
446865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
447865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   switch (dest->File) {
448865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_OUTPUT:
449865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         dstReg = machine->Outputs[dest->Index];
450865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
451865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_TEMPORARY:
452865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         dstReg = machine->Temporaries[dest->Index];
453865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         break;
454865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      case PROGRAM_WRITE_ONLY:
455865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         dstReg = dummyReg;
456865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         return;
457865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      default:
458865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         _mesa_problem(NULL, "bad register file in store_vector4(fp)");
459865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         return;
460865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
461865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
462865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#if 0
463865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (value[0] > 1.0e10 ||
464865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       IS_INF_OR_NAN(value[0]) ||
465865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       IS_INF_OR_NAN(value[1]) ||
466865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       IS_INF_OR_NAN(value[2]) ||
467865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       IS_INF_OR_NAN(value[3])  )
468865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
469865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#endif
470865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
471865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (clamp) {
472865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
473865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
474865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
475865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
476865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      value = clampedValue;
477865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
478865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
479865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (dest->CondMask != COND_TR) {
480865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* condition codes may turn off some writes */
481865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_X) {
482865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
483865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      dest->CondMask))
484865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            writeMask &= ~WRITEMASK_X;
485865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
486865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_Y) {
487865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
488865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      dest->CondMask))
489865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            writeMask &= ~WRITEMASK_Y;
490865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
491865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_Z) {
492865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
493865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      dest->CondMask))
494865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            writeMask &= ~WRITEMASK_Z;
495865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
496865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_W) {
497865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
498865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                      dest->CondMask))
499865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            writeMask &= ~WRITEMASK_W;
500865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
501865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
502865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
503865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (writeMask & WRITEMASK_X)
504865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      dstReg[0] = value[0];
505865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (writeMask & WRITEMASK_Y)
506865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      dstReg[1] = value[1];
507865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (writeMask & WRITEMASK_Z)
508865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      dstReg[2] = value[2];
509865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (writeMask & WRITEMASK_W)
510865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      dstReg[3] = value[3];
511865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
512865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inst->CondUpdate) {
513865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_X)
514865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         machine->CondCodes[0] = generate_cc(value[0]);
515865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_Y)
516865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         machine->CondCodes[1] = generate_cc(value[1]);
517865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_Z)
518865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         machine->CondCodes[2] = generate_cc(value[2]);
519865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (writeMask & WRITEMASK_W)
520865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         machine->CondCodes[3] = generate_cc(value[3]);
521865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
522865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
523865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
524865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
525865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
526865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Initialize a new machine state instance from an existing one, adding
527865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * the partial derivatives onto the input registers.
528865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Used to implement DDX and DDY instructions in non-trivial cases.
529865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
530865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
531865f88afc0d59d886fb2ad50429e584ecf17fa81Brianinit_machine_deriv( GLcontext *ctx,
532865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    const struct fp_machine *machine,
533865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    const struct gl_fragment_program *program,
534865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    const SWspan *span, char xOrY,
535865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    struct fp_machine *dMachine )
536865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
537865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint u, v;
538865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
539865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(xOrY == 'X' || xOrY == 'Y');
540865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
541865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* copy existing machine */
542865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
543865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
544865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
545865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Clear temporary registers (undefined for ARB_f_p) */
546865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      _mesa_bzero( (void*) machine->Temporaries,
547865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
548865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
549865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
550865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* Add derivatives */
551865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
552865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
553865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
554865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[0] += 1.0F;
555865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[1] += 0.0F;
556865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[2] += span->dzdx;
557865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[3] += span->dwdx;
558865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
559865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
560865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[0] += 0.0F;
561865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[1] += 1.0F;
562865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[2] += span->dzdy;
563865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[3] += span->dwdy;
564865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
565865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
566865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL0)) {
567865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
568865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
569865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[0] += span->drdx * (1.0F / CHAN_MAXF);
570865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
571865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
572865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[3] += span->dadx * (1.0F / CHAN_MAXF);
573865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
574865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
575865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[0] += span->drdy * (1.0F / CHAN_MAXF);
576865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
577865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
578865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col0[3] += span->dady * (1.0F / CHAN_MAXF);
579865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
580865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
581865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL1)) {
582865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
583865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
584865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
585865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
586865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
587865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[3] += 0.0; /*XXX fix */
588865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
589865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
590865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
591865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
592865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
593865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         col1[3] += 0.0; /*XXX fix */
594865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
595865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
596865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
597865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
598865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (xOrY == 'X') {
599865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         fogc[0] += span->dfogdx;
600865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
601865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
602865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         fogc[0] += span->dfogdy;
603865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
604865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
605865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
606865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
607865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
608865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /* XXX perspective-correct interpolation */
609865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (xOrY == 'X') {
610865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[0] += span->texStepX[u][0];
611865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[1] += span->texStepX[u][1];
612865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[2] += span->texStepX[u][2];
613865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[3] += span->texStepX[u][3];
614865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         }
615865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         else {
616865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[0] += span->texStepY[u][0];
617865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[1] += span->texStepY[u][1];
618865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[2] += span->texStepY[u][2];
619865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            tex[3] += span->texStepY[u][3];
620865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         }
621865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
622865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
623865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
624865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (v = 0; v < ctx->Const.MaxVarying; v++) {
625865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_VAR0 + v))) {
626865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         GLfloat *var = (GLfloat*) machine->Inputs[FRAG_ATTRIB_VAR0 + v];
627865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /* XXXX finish this */
628865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         var[0] += span->varStepX[v][0];
629865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         var[1] += span->varStepX[v][1];
630865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         var[2] += span->varStepX[v][2];
631865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         var[3] += span->varStepX[v][3];
632865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
633865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
634865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
635865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* init condition codes */
636865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   dMachine->CondCodes[0] = COND_EQ;
637865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   dMachine->CondCodes[1] = COND_EQ;
638865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   dMachine->CondCodes[2] = COND_EQ;
639865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   dMachine->CondCodes[3] = COND_EQ;
640865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
641865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
642865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
643865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
644865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Execute the given vertex program.
645865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * NOTE: we do everything in single-precision floating point; we don't
646865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * currently observe the single/half/fixed-precision qualifiers.
647865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param ctx - rendering context
648865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param program - the fragment program to execute
649865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param machine - machine state (register file)
650865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param maxInst - max number of instructions to execute
651865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
652865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
653865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic GLboolean
654865f88afc0d59d886fb2ad50429e584ecf17fa81Brianexecute_program( GLcontext *ctx,
655865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                 const struct gl_fragment_program *program, GLuint maxInst,
656865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                 struct fp_machine *machine, const SWspan *span,
657865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                 GLuint column )
658865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
659865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint pc;
660865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
661865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (DEBUG_FRAG) {
662865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      printf("execute fragment program --------------------\n");
663865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
664865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
665865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (pc = 0; pc < maxInst; pc++) {
666865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      const struct prog_instruction *inst = program->Base.Instructions + pc;
667865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
668865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (ctx->FragmentProgram.CallbackEnabled &&
669865f88afc0d59d886fb2ad50429e584ecf17fa81Brian          ctx->FragmentProgram.Callback) {
670865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         ctx->FragmentProgram.CurrentPosition = inst->StringPos;
671865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         ctx->FragmentProgram.Callback(program->Base.Target,
672865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                       ctx->FragmentProgram.CallbackData);
673865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
674865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
675865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (DEBUG_FRAG) {
676865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         _mesa_print_instruction(inst);
677865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
678865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
679865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      switch (inst->Opcode) {
680865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_ABS:
681865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
682865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
683865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
684865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = FABSF(a[0]);
685865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = FABSF(a[1]);
686865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = FABSF(a[2]);
687865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = FABSF(a[3]);
688865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
689865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
690865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
691865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_ADD:
692865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
693865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
694865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
695865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
696865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] + b[0];
697865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] + b[1];
698865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] + b[2];
699865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] + b[3];
700865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
701865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
702865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
703865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
704865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
705865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3]);
706865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
707865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
708865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
709865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_BRA: /* conditional branch */
710865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
711865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* NOTE: The return is conditional! */
712865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint swizzle = inst->DstReg.CondSwizzle;
713865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint condMask = inst->DstReg.CondMask;
714865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
715865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
716865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
717865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
718865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  /* take branch */
719865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  pc = inst->BranchTarget;
720865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
721865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
722865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
723865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_CAL: /* Call subroutine */
724865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
725865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* NOTE: The call is conditional! */
726865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint swizzle = inst->DstReg.CondSwizzle;
727865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint condMask = inst->DstReg.CondMask;
728865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
729865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
730865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
731865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
732865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
733865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
734865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  }
735865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  machine->CallStack[machine->StackDepth++] = pc + 1;
736865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  pc = inst->BranchTarget;
737865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
738865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
739865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
740865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_CMP:
741865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
742865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], c[4], result[4];
743865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
744865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
745865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
746865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] < 0.0F ? b[0] : c[0];
747865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] < 0.0F ? b[1] : c[1];
748865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] < 0.0F ? b[2] : c[2];
749865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] < 0.0F ? b[3] : c[3];
750865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
751865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
752865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
753865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_COS:
754865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
755865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
756865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
757865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3]
758865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = (GLfloat) _mesa_cos(a[0]);
759865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
760865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
761865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
762865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DDX: /* Partial derivative with respect to X */
763865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
764865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], aNext[4], result[4];
765865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               struct fp_machine dMachine;
766865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
767865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                        column, result)) {
768865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  /* This is tricky.  Make a copy of the current machine state,
769865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * increment the input registers by the dx or dy partial
770865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * derivatives, then re-execute the program up to the
771865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * preceeding instruction, then fetch the source register.
772865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * Finally, find the difference in the register values for
773865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   * the original and derivative runs.
774865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   */
775865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
776865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  init_machine_deriv(ctx, machine, program, span,
777865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                     'X', &dMachine);
778865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  execute_program(ctx, program, pc, &dMachine, span, column);
779865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
780865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[0] = aNext[0] - a[0];
781865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[1] = aNext[1] - a[1];
782865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[2] = aNext[2] - a[2];
783865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[3] = aNext[3] - a[3];
784865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
785865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
786865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
787865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
788865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DDY: /* Partial derivative with respect to Y */
789865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
790865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], aNext[4], result[4];
791865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               struct fp_machine dMachine;
792865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
793865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                        column, result)) {
794865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  init_machine_deriv(ctx, machine, program, span,
795865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                     'Y', &dMachine);
796865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
797865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  execute_program(ctx, program, pc, &dMachine, span, column);
798865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
799865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[0] = aNext[0] - a[0];
800865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[1] = aNext[1] - a[1];
801865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[2] = aNext[2] - a[2];
802865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[3] = aNext[3] - a[3];
803865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
804865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
805865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
806865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
807865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DP3:
808865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
809865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
810865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
811865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
812865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
813865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
814865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
815865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
816865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
817865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
818865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
819865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
820865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DP4:
821865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
822865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
823865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
824865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
825865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
826865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
827865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
828865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
829865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], a[0], a[1], a[2], a[3],
830865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3]);
831865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
832865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
833865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
834865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DPH:
835865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
836865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
837865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
838865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
839865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] =
840865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
841865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
842865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
843865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
844865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_DST: /* Distance vector */
845865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
846865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
847865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
848865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
849865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = 1.0F;
850865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] * b[1];
851865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2];
852865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = b[3];
853865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
854865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
855865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
856865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_EX2: /* Exponential base 2 */
857865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
858865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
859865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
860865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] =
861865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  (GLfloat) _mesa_pow(2.0, a[0]);
862865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
863865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
864865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
865865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_FLR:
866865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
867865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
868865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
869865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = FLOORF(a[0]);
870865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = FLOORF(a[1]);
871865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = FLOORF(a[2]);
872865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = FLOORF(a[3]);
873865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
874865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
875865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
876865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_FRC:
877865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
878865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
879865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
880865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] - FLOORF(a[0]);
881865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] - FLOORF(a[1]);
882865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] - FLOORF(a[2]);
883865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] - FLOORF(a[3]);
884865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
885865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
886865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
887865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_KIL_NV: /* NV_f_p only */
888865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
889865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint swizzle = inst->DstReg.CondSwizzle;
890865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint condMask = inst->DstReg.CondMask;
891865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
892865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
893865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
894865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
895865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  return GL_FALSE;
896865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
897865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
898865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
899865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_KIL: /* ARB_f_p only */
900865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
901865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4];
902865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
903865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
904865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  return GL_FALSE;
905865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
906865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
907865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
908865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_LG2:  /* log base 2 */
909865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
910865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
911865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
912865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
913865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
914865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
915865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
916865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_LIT:
917865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
918865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
919865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
920865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
921865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = MAX2(a[0], 0.0F);
922865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[1] = MAX2(a[1], 0.0F);
923865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* XXX ARB version clamps a[3], NV version doesn't */
924865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
925865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = 1.0F;
926865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[0];
927865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* XXX we could probably just use pow() here */
928865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (a[0] > 0.0F) {
929865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (a[1] == 0.0 && a[3] == 0.0)
930865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[2] = 1.0;
931865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  else
932865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[2] = EXPF(a[3] * LOGF(a[1]));
933865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
934865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else {
935865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  result[2] = 0.0;
936865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
937865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = 1.0F;
938865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
939865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
940865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
941865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
942865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3]);
943865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
944865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
945865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
946865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_LRP:
947865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
948865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], c[4], result[4];
949865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
950865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
951865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
952865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
953865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
954865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
955865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
956865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
957865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
958865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("LRP (%g %g %g %g) = (%g %g %g %g), "
959865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         "(%g %g %g %g), (%g %g %g %g)\n",
960865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
961865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
962865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3],
963865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         c[0], c[1], c[2], c[3]);
964865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
965865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
966865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
967865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MAD:
968865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
969865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], c[4], result[4];
970865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
971865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
972865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
973865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] * b[0] + c[0];
974865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] * b[1] + c[1];
975865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] * b[2] + c[2];
976865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] * b[3] + c[3];
977865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
978865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
979865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
980865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         "(%g %g %g %g) + (%g %g %g %g)\n",
981865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
982865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
983865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3],
984865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         c[0], c[1], c[2], c[3]);
985865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
986865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
987865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
988865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MAX:
989865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
990865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
991865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
992865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
993865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = MAX2(a[0], b[0]);
994865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = MAX2(a[1], b[1]);
995865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = MAX2(a[2], b[2]);
996865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = MAX2(a[3], b[3]);
997865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
998865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
999865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1000865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
1001865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
1002865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3]);
1003865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1004865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1005865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1006865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MIN:
1007865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1008865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1009865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1010865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1011865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = MIN2(a[0], b[0]);
1012865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = MIN2(a[1], b[1]);
1013865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = MIN2(a[2], b[2]);
1014865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = MIN2(a[3], b[3]);
1015865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1016865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1017865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1018865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MOV:
1019865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1020865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat result[4];
1021865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
1022865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1023865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1024865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("MOV (%g %g %g %g)\n",
1025865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3]);
1026865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1027865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1028865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1029865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_MUL:
1030865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1031865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1032865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1033865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1034865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] * b[0];
1035865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] * b[1];
1036865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] * b[2];
1037865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] * b[3];
1038865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1039865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1040865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1041865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
1042865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3],
1043865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         b[0], b[1], b[2], b[3]);
1044865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1045865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1046865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1047865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1048865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1049865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1050865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLhalfNV hx, hy;
1051865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint *rawResult = (GLuint *) result;
1052865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint twoHalves;
1053865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1054865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               hx = _mesa_float_to_half(a[0]);
1055865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               hy = _mesa_float_to_half(a[1]);
1056865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               twoHalves = hx | (hy << 16);
1057865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1058865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = twoHalves;
1059865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1060865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1061865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1062865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1063865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1064865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1065865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint usx, usy, *rawResult = (GLuint *) result;
1066865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1067865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = CLAMP(a[0], 0.0F, 1.0F);
1068865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[1] = CLAMP(a[1], 0.0F, 1.0F);
1069865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               usx = IROUND(a[0] * 65535.0F);
1070865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               usy = IROUND(a[1] * 65535.0F);
1071865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1072865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = usx | (usy << 16);
1073865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1074865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1075865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1076865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1077865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1078865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1079865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1080865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1081865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1082865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1083865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1084865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1085865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubx = IROUND(127.0F * a[0] + 128.0F);
1086865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               uby = IROUND(127.0F * a[1] + 128.0F);
1087865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubz = IROUND(127.0F * a[2] + 128.0F);
1088865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubw = IROUND(127.0F * a[3] + 128.0F);
1089865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1090865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1091865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1092865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1093865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1094865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1095865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1096865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1097865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1098865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1099865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = CLAMP(a[0], 0.0F, 1.0F);
1100865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[1] = CLAMP(a[1], 0.0F, 1.0F);
1101865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[2] = CLAMP(a[2], 0.0F, 1.0F);
1102865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[3] = CLAMP(a[3], 0.0F, 1.0F);
1103865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubx = IROUND(255.0F * a[0]);
1104865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               uby = IROUND(255.0F * a[1]);
1105865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubz = IROUND(255.0F * a[2]);
1106865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               ubw = IROUND(255.0F * a[3]);
1107865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1108865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1109865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1110865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1111865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1112865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_POW:
1113865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1114865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1115865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1116865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
1117865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3]
1118865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = (GLfloat)_mesa_pow(a[0], b[0]);
1119865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1120865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1121865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1122865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_RCP:
1123865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1124865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1125865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1126865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1127865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (a[0] == 0)
1128865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     printf("RCP(0)\n");
1129865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  else if (IS_INF_OR_NAN(a[0]))
1130865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     printf("RCP(inf)\n");
1131865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1132865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1133865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1134865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1135865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1136865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_RET: /* return from subroutine */
1137865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1138865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* NOTE: The return is conditional! */
1139865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint swizzle = inst->DstReg.CondSwizzle;
1140865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint condMask = inst->DstReg.CondMask;
1141865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
1142865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
1143865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
1144865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
1145865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (machine->StackDepth == 0) {
1146865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1147865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  }
1148865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  pc = machine->CallStack[--machine->StackDepth];
1149865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1150865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1151865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1152865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_RFL: /* reflection vector */
1153865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1154865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1155865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1156865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1157865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               tmpW = DOT3(axis, axis);
1158865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1159865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = tmpX * axis[0] - dir[0];
1160865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = tmpX * axis[1] - dir[1];
1161865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = tmpX * axis[2] - dir[2];
1162865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* result[3] is never written! XXX enforce in parser! */
1163865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1164865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1165865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1166865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_RSQ: /* 1 / sqrt() */
1167865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1168865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1169865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1170865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               a[0] = FABSF(a[0]);
1171865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1172865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1173865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1174865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1175865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1176865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1177865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1178865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SCS: /* sine and cos */
1179865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1180865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1181865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1182865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (GLfloat)_mesa_cos(a[0]);
1183865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (GLfloat)_mesa_sin(a[0]);
1184865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = 0.0;  /* undefined! */
1185865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = 0.0;  /* undefined! */
1186865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1187865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1188865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1189865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SEQ: /* set on equal */
1190865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1191865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1192865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1193865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1194865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1195865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1196865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1197865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1198865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1199865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1200865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1201865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SFL: /* set false, operands ignored */
1202865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1203865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1204865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1205865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1206865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1207865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SGE: /* set on greater or equal */
1208865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1209865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1210865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1211865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1212865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1213865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1214865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1215865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1216865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1217865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1218865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1219865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SGT: /* set on greater */
1220865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1221865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1222865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1223865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1224865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1225865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1226865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1227865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1228865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1229865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1230865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1231865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SIN:
1232865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1233865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1234865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1235865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[1] = result[2] = result[3]
1236865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  = (GLfloat) _mesa_sin(a[0]);
1237865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1238865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1239865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1240865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SLE: /* set on less or equal */
1241865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1242865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1243865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1244865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1245865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1246865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1247865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1248865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1249865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1250865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1251865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1252865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SLT: /* set on less */
1253865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1254865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1255865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1256865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1257865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1258865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1259865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1260865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1261865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1262865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1263865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1264865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SNE: /* set on not equal */
1265865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1266865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1267865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1268865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1269865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1270865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1271865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1272865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1273865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1274865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1275865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1276865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_STR: /* set true, operands ignored */
1277865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1278865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1279865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1280865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1281865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1282865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SUB:
1283865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1284865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1285865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1286865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1287865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] - b[0];
1288865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] - b[1];
1289865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] - b[2];
1290865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] - b[3];
1291865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1292865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1293865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1294865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         result[0], result[1], result[2], result[3],
1295865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1296865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1297865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1298865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1299865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_SWZ: /* extended swizzle */
1300865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1301865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const struct prog_src_register *source = &inst->SrcReg[0];
1302865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLfloat *src = get_register_pointer(ctx, source,
1303865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                                         machine, program);
1304865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat result[4];
1305865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLuint i;
1306865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               for (i = 0; i < 4; i++) {
1307865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  const GLuint swz = GET_SWZ(source->Swizzle, i);
1308865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (swz == SWIZZLE_ZERO)
1309865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[i] = 0.0;
1310865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  else if (swz == SWIZZLE_ONE)
1311865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[i] = 1.0;
1312865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  else {
1313865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     ASSERT(swz >= 0);
1314865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     ASSERT(swz <= 3);
1315865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[i] = src[swz];
1316865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  }
1317865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  if (source->NegateBase & (1 << i))
1318865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                     result[i] = -result[i];
1319865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1320865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1321865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1322865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1323865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TEX: /* Both ARB and NV frag prog */
1324865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texel lookup */
1325865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1326865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* Note: only use the precomputed lambda value when we're
1327865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * sampling texture unit [K] with texcoord[K].
1328865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * Otherwise, the lambda value may have no relation to the
1329865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * instruction's texcoord or texture image.  Using the wrong
1330865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * lambda is usually bad news.
1331865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * The rest of the time, just use zero (until we get a more
1332865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                * sophisticated way of computing lambda).
1333865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                */
1334865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat coord[4], color[4], lambda;
1335865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1336865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1337865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = span->array->lambda[inst->TexSrcUnit][column];
1338865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1339865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = 0.0;
1340865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1341865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel( ctx, coord, lambda, inst->TexSrcUnit, color );
1342865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (DEBUG_FRAG) {
1343865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1344865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         "lod %f\n",
1345865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         color[0], color[1], color[2], color[3],
1346865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         inst->TexSrcUnit,
1347865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                         coord[0], coord[1], coord[2], coord[3], lambda);
1348865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1349865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1350865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1351865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1352865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TXB: /* GL_ARB_fragment_program only */
1353865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texel lookup with LOD bias */
1354865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1355865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat coord[4], color[4], lambda, bias;
1356865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1357865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1358865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = span->array->lambda[inst->TexSrcUnit][column];
1359865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1360865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = 0.0;
1361865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1362865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               /* coord[3] is the bias to add to lambda */
1363865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1364865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1365865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    + coord[3];
1366865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel(ctx, coord, lambda + bias, inst->TexSrcUnit, color);
1367865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1368865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1369865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1370865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TXD: /* GL_NV_fragment_program only */
1371865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texture lookup w/ partial derivatives for LOD */
1372865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1373865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1374865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1375865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1376865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1377865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1378865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                                  color );
1379865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1380865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1381865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1382865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TXP: /* GL_ARB_fragment_program only */
1383865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texture lookup w/ projective divide */
1384865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1385865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat texcoord[4], color[4], lambda;
1386865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1387865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1388865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = span->array->lambda[inst->TexSrcUnit][column];
1389865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1390865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = 0.0;
1391865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1392865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	       /* Not so sure about this test - if texcoord[3] is
1393865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		* zero, we'd probably be fine except for an ASSERT in
1394865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		* IROUND_POS() which gets triggered by the inf values created.
1395865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		*/
1396865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	       if (texcoord[3] != 0.0) {
1397865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		  texcoord[0] /= texcoord[3];
1398865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		  texcoord[1] /= texcoord[3];
1399865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		  texcoord[2] /= texcoord[3];
1400865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	       }
1401865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1402865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1403865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1404865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1405865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1406865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Texture lookup w/ projective divide */
1407865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1408865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat texcoord[4], color[4], lambda;
1409865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1410865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                   inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1411865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = span->array->lambda[inst->TexSrcUnit][column];
1412865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1413865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  lambda = 0.0;
1414865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1415865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1416865f88afc0d59d886fb2ad50429e584ecf17fa81Brian		   texcoord[3] != 0.0) {
1417865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  texcoord[0] /= texcoord[3];
1418865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  texcoord[1] /= texcoord[3];
1419865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  texcoord[2] /= texcoord[3];
1420865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1421865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1422865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, color );
1423865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1424865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1425865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_UP2H: /* unpack two 16-bit floats */
1426865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1427865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1428865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint *rawBits = (const GLuint *) a;
1429865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLhalfNV hx, hy;
1430865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1431865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               hx = rawBits[0] & 0xffff;
1432865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               hy = rawBits[0] >> 16;
1433865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[2] = _mesa_half_to_float(hx);
1434865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = result[3] = _mesa_half_to_float(hy);
1435865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1436865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1437865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1438865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_UP2US: /* unpack two GLushorts */
1439865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1440865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1441865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint *rawBits = (const GLuint *) a;
1442865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLushort usx, usy;
1443865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1444865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               usx = rawBits[0] & 0xffff;
1445865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               usy = rawBits[0] >> 16;
1446865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = result[2] = usx * (1.0f / 65535.0f);
1447865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = result[3] = usy * (1.0f / 65535.0f);
1448865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1449865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1450865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1451865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_UP4B: /* unpack four GLbytes */
1452865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1453865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1454865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint *rawBits = (const GLuint *) a;
1455865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1456865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
1457865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
1458865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1459865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1460865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1461865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1462865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1463865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_UP4UB: /* unpack four GLubytes */
1464865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1465865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], result[4];
1466865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLuint *rawBits = (const GLuint *) a;
1467865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1468865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
1469865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
1470865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1471865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1472865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1473865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1474865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1475865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_XPD: /* cross product */
1476865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1477865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], result[4];
1478865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1479865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1480865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[1] * b[2] - a[2] * b[1];
1481865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[2] * b[0] - a[0] * b[2];
1482865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[0] * b[1] - a[1] * b[0];
1483865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = 1.0;
1484865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1485865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1486865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1487865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_X2D: /* 2-D matrix transform */
1488865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1489865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               GLfloat a[4], b[4], c[4], result[4];
1490865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1491865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1492865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1493865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1494865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1495865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1496865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1497865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               store_vector4( inst, machine, result );
1498865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1499865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1500865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_PRINT:
1501865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            {
1502865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (inst->SrcReg[0].File != -1) {
1503865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  GLfloat a[4];
1504865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1505865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1506865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                               a[0], a[1], a[2], a[3]);
1507865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1508865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else {
1509865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  _mesa_printf("%s\n", (const char *) inst->Data);
1510865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               }
1511865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1512865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            break;
1513865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         case OPCODE_END:
1514865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            return GL_TRUE;
1515865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         default:
1516865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1517865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                          inst->Opcode);
1518865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            return GL_TRUE; /* return value doesn't matter */
1519865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1520865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1521865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   return GL_TRUE;
1522865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
1523865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1524865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1525865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
1526865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Initialize the virtual fragment program machine state prior to running
1527865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * fragment program on a fragment.  This involves initializing the input
1528865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * registers, condition codes, etc.
1529865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param machine  the virtual machine state to init
1530865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param program  the fragment program we're about to run
1531865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param span  the span of pixels we'll operate on
1532865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * \param col  which element (column) of the span we'll operate on
1533865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
1534865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
1535865f88afc0d59d886fb2ad50429e584ecf17fa81Brianinit_machine( GLcontext *ctx, struct fp_machine *machine,
1536865f88afc0d59d886fb2ad50429e584ecf17fa81Brian              const struct gl_fragment_program *program,
1537865f88afc0d59d886fb2ad50429e584ecf17fa81Brian              const SWspan *span, GLuint col )
1538865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
1539865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint inputsRead = program->Base.InputsRead;
1540865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint u, v;
1541865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1542865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (ctx->FragmentProgram.CallbackEnabled)
1543865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      inputsRead = ~0;
1544865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1545865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1546865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Clear temporary registers (undefined for ARB_f_p) */
1547865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      _mesa_bzero(machine->Temporaries,
1548865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1549865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1550865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1551865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* Load input registers */
1552865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1553865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1554865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(span->arrayMask & SPAN_Z);
1555865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (span->arrayMask & SPAN_XY) {
1556865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[0] = (GLfloat) span->array->x[col];
1557865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[1] = (GLfloat) span->array->y[col];
1558865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1559865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      else {
1560865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[0] = (GLfloat) span->x + col;
1561865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         wpos[1] = (GLfloat) span->y;
1562865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1563865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
1564865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      wpos[3] = span->w + col * span->dwdx;
1565865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1566865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1567865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(span->arrayMask & SPAN_RGBA);
1568865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      COPY_4V(machine->Inputs[FRAG_ATTRIB_COL0],
1569865f88afc0d59d886fb2ad50429e584ecf17fa81Brian              span->array->color.sz4.rgba[col]);
1570865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1571865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1572865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(span->arrayMask & SPAN_SPEC);
1573865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      COPY_4V(machine->Inputs[FRAG_ATTRIB_COL1],
1574865f88afc0d59d886fb2ad50429e584ecf17fa81Brian              span->array->color.sz4.spec[col]);
1575865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1576865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1577865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1578865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ASSERT(span->arrayMask & SPAN_FOG);
1579865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      fogc[0] = span->array->fog[col];
1580865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      fogc[1] = 0.0F;
1581865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      fogc[2] = 0.0F;
1582865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      fogc[3] = 0.0F;
1583865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1584865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1585865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1586865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1587865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1588865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(tex, span->array->texcoords[u][col]);
1589865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1590865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1591865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1592865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (v = 0; v < ctx->Const.MaxVarying; v++) {
1593865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (inputsRead & (1 << (FRAG_ATTRIB_VAR0 + v))) {
1594865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#if 0
1595865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         printf("Frag Var %d: %f %f %f\n", col,
1596865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                span->array->varying[col][v][0],
1597865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                span->array->varying[col][v][1],
1598865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                span->array->varying[col][v][2]);
1599865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#endif
1600865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         COPY_4V(machine->Inputs[FRAG_ATTRIB_VAR0 + v],
1601865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                 span->array->varying[col][v]);
1602865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1603865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1604865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1605865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* init condition codes */
1606865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->CondCodes[0] = COND_EQ;
1607865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->CondCodes[1] = COND_EQ;
1608865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->CondCodes[2] = COND_EQ;
1609865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->CondCodes[3] = COND_EQ;
1610865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1611865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* init call stack */
1612865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   machine->StackDepth = 0;
1613865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
1614865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1615865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1616865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
1617865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Run fragment program on the pixels in span from 'start' to 'end' - 1.
1618865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
1619865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
1620865f88afc0d59d886fb2ad50429e584ecf17fa81Brianrun_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end)
1621865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
1622865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1623865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   struct fp_machine machine;
1624865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   GLuint i;
1625865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1626865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   CurrentMachine = &machine;
1627865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1628865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (i = start; i < end; i++) {
1629865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (span->array->mask[i]) {
1630865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         init_machine(ctx, &machine, program, span, i);
1631865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1632865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         if (execute_program(ctx, program, ~0, &machine, span, i)) {
1633865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Store result color */
1634865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            COPY_4V(span->array->color.sz4.rgba[i],
1635865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                    machine.Outputs[FRAG_RESULT_COLR]);
1636865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1637865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* Store result depth/z */
1638865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1639865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               const GLfloat depth = machine.Outputs[FRAG_RESULT_DEPR][2];
1640865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               if (depth <= 0.0)
1641865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  span->array->z[i] = 0;
1642865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else if (depth >= 1.0)
1643865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  span->array->z[i] = ctx->DrawBuffer->_DepthMax;
1644865f88afc0d59d886fb2ad50429e584ecf17fa81Brian               else
1645865f88afc0d59d886fb2ad50429e584ecf17fa81Brian                  span->array->z[i] = IROUND(depth * ctx->DrawBuffer->_DepthMaxF);
1646865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            }
1647865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         }
1648865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         else {
1649865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            /* killed fragment */
1650865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            span->array->mask[i] = GL_FALSE;
1651865f88afc0d59d886fb2ad50429e584ecf17fa81Brian            span->writeAll = GL_FALSE;
1652865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         }
1653865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
1654865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1655865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1656865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   CurrentMachine = NULL;
1657865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
1658865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1659865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1660865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/**
1661865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Execute the current fragment program for all the fragments
1662865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * in the given span.
1663865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
1664865f88afc0d59d886fb2ad50429e584ecf17fa81Brianvoid
1665865f88afc0d59d886fb2ad50429e584ecf17fa81Brian_swrast_exec_fragment_program( GLcontext *ctx, SWspan *span )
1666865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
1667865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1668865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1669865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* incoming colors should be floats */
1670865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ASSERT(span->array->ChanType == GL_FLOAT);
1671865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1672865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1673865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1674865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   run_program(ctx, span, 0, span->end);
1675865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1676865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1677865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      span->interpMask &= ~SPAN_Z;
1678865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      span->arrayMask |= SPAN_Z;
1679865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1680865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1681865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ctx->_CurrentProgram = 0;
1682865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
1683865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
1684