s_texcombine.c revision 617cdcd4c7b1cffb584c829c35bdf9c9bf04627b
1/*
2 * Mesa 3-D graphics library
3 * Version:  7.5
4 *
5 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
6 * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26
27#include "main/glheader.h"
28#include "main/context.h"
29#include "main/colormac.h"
30#include "main/imports.h"
31#include "main/pixeltransfer.h"
32#include "program/prog_instruction.h"
33
34#include "s_context.h"
35#include "s_texcombine.h"
36
37
38/**
39 * Pointer to array of float[4]
40 * This type makes the code below more concise and avoids a lot of casting.
41 */
42typedef float (*float4_array)[4];
43
44
45/**
46 * Return array of texels for given unit.
47 */
48static inline float4_array
49get_texel_array(SWcontext *swrast, GLuint unit)
50{
51#ifdef _OPENMP
52   return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num()));
53#else
54   return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
55#endif
56}
57
58
59
60/**
61 * Do texture application for:
62 *  GL_EXT_texture_env_combine
63 *  GL_ARB_texture_env_combine
64 *  GL_EXT_texture_env_dot3
65 *  GL_ARB_texture_env_dot3
66 *  GL_ATI_texture_env_combine3
67 *  GL_NV_texture_env_combine4
68 *  conventional GL texture env modes
69 *
70 * \param ctx          rendering context
71 * \param unit         the texture combiner unit
72 * \param n            number of fragments to process (span width)
73 * \param primary_rgba incoming fragment color array
74 * \param texelBuffer  pointer to texel colors for all texture units
75 *
76 * \param rgba         incoming/result fragment colors
77 */
78static void
79texture_combine( struct gl_context *ctx, GLuint unit, GLuint n,
80                 const float4_array primary_rgba,
81                 const GLfloat *texelBuffer,
82                 GLchan (*rgbaChan)[4] )
83{
84   SWcontext *swrast = SWRAST_CONTEXT(ctx);
85   const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
86   const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
87   float4_array argRGB[MAX_COMBINER_TERMS];
88   float4_array argA[MAX_COMBINER_TERMS];
89   const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
90   const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
91   const GLuint numArgsRGB = combine->_NumArgsRGB;
92   const GLuint numArgsA = combine->_NumArgsA;
93   float4_array ccolor[4], rgba;
94   GLuint i, term;
95
96   if (!swrast->TexelBuffer) {
97#ifdef _OPENMP
98      const GLint maxThreads = omp_get_max_threads();
99#else
100      const GLint maxThreads = 1;
101#endif
102
103      /* TexelBuffer is also global and normally shared by all SWspan
104       * instances; when running with multiple threads, create one per
105       * thread.
106       */
107      swrast->TexelBuffer =
108	 (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
109			    MAX_WIDTH * 4 * sizeof(GLfloat));
110      if (!swrast->TexelBuffer) {
111	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
112	 return;
113      }
114   }
115
116   /* alloc temp pixel buffers */
117   rgba = (float4_array) malloc(4 * n * sizeof(GLfloat));
118   if (!rgba) {
119      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
120      return;
121   }
122
123   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
124      ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat));
125      if (!ccolor[i]) {
126         while (i) {
127            free(ccolor[i]);
128            i--;
129         }
130         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
131         return;
132      }
133   }
134
135   for (i = 0; i < n; i++) {
136      rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
137      rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
138      rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
139      rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
140   }
141
142   /*
143   printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
144          combine->ModeRGB,
145          combine->ModeA,
146          combine->SourceRGB[0],
147          combine->SourceA[0],
148          combine->SourceRGB[1],
149          combine->SourceA[1]);
150   */
151
152   /*
153    * Do operand setup for up to 4 operands.  Loop over the terms.
154    */
155   for (term = 0; term < numArgsRGB; term++) {
156      const GLenum srcRGB = combine->SourceRGB[term];
157      const GLenum operandRGB = combine->OperandRGB[term];
158
159      switch (srcRGB) {
160         case GL_TEXTURE:
161            argRGB[term] = get_texel_array(swrast, unit);
162            break;
163         case GL_PRIMARY_COLOR:
164            argRGB[term] = primary_rgba;
165            break;
166         case GL_PREVIOUS:
167            argRGB[term] = rgba;
168            break;
169         case GL_CONSTANT:
170            {
171               float4_array c = ccolor[term];
172               GLfloat red   = textureUnit->EnvColor[0];
173               GLfloat green = textureUnit->EnvColor[1];
174               GLfloat blue  = textureUnit->EnvColor[2];
175               GLfloat alpha = textureUnit->EnvColor[3];
176               for (i = 0; i < n; i++) {
177                  ASSIGN_4V(c[i], red, green, blue, alpha);
178               }
179               argRGB[term] = ccolor[term];
180            }
181            break;
182	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
183	  */
184	 case GL_ZERO:
185            {
186               float4_array c = ccolor[term];
187               for (i = 0; i < n; i++) {
188                  ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
189               }
190               argRGB[term] = ccolor[term];
191            }
192            break;
193	 case GL_ONE:
194            {
195               float4_array c = ccolor[term];
196               for (i = 0; i < n; i++) {
197                  ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
198               }
199               argRGB[term] = ccolor[term];
200            }
201            break;
202         default:
203            /* ARB_texture_env_crossbar source */
204            {
205               const GLuint srcUnit = srcRGB - GL_TEXTURE0;
206               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
207               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
208                  goto end;
209               argRGB[term] = get_texel_array(swrast, srcUnit);
210            }
211      }
212
213      if (operandRGB != GL_SRC_COLOR) {
214         float4_array src = argRGB[term];
215         float4_array dst = ccolor[term];
216
217         /* point to new arg[term] storage */
218         argRGB[term] = ccolor[term];
219
220         switch (operandRGB) {
221         case GL_ONE_MINUS_SRC_COLOR:
222            for (i = 0; i < n; i++) {
223               dst[i][RCOMP] = 1.0F - src[i][RCOMP];
224               dst[i][GCOMP] = 1.0F - src[i][GCOMP];
225               dst[i][BCOMP] = 1.0F - src[i][BCOMP];
226            }
227            break;
228         case GL_SRC_ALPHA:
229            for (i = 0; i < n; i++) {
230               dst[i][RCOMP] =
231               dst[i][GCOMP] =
232               dst[i][BCOMP] = src[i][ACOMP];
233            }
234            break;
235         case GL_ONE_MINUS_SRC_ALPHA:
236            for (i = 0; i < n; i++) {
237               dst[i][RCOMP] =
238               dst[i][GCOMP] =
239               dst[i][BCOMP] = 1.0F - src[i][ACOMP];
240            }
241            break;
242         default:
243            _mesa_problem(ctx, "Bad operandRGB");
244         }
245      }
246   }
247
248   /*
249    * Set up the argA[term] pointers
250    */
251   for (term = 0; term < numArgsA; term++) {
252      const GLenum srcA = combine->SourceA[term];
253      const GLenum operandA = combine->OperandA[term];
254
255      switch (srcA) {
256         case GL_TEXTURE:
257            argA[term] = get_texel_array(swrast, unit);
258            break;
259         case GL_PRIMARY_COLOR:
260            argA[term] = primary_rgba;
261            break;
262         case GL_PREVIOUS:
263            argA[term] = rgba;
264            break;
265         case GL_CONSTANT:
266            {
267               float4_array c = ccolor[term];
268               GLfloat alpha = textureUnit->EnvColor[3];
269               for (i = 0; i < n; i++)
270                  c[i][ACOMP] = alpha;
271               argA[term] = ccolor[term];
272            }
273            break;
274	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
275	  */
276	 case GL_ZERO:
277            {
278               float4_array c = ccolor[term];
279               for (i = 0; i < n; i++)
280                  c[i][ACOMP] = 0.0F;
281               argA[term] = ccolor[term];
282            }
283            break;
284	 case GL_ONE:
285            {
286               float4_array c = ccolor[term];
287               for (i = 0; i < n; i++)
288                  c[i][ACOMP] = 1.0F;
289               argA[term] = ccolor[term];
290            }
291            break;
292         default:
293            /* ARB_texture_env_crossbar source */
294            {
295               const GLuint srcUnit = srcA - GL_TEXTURE0;
296               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
297               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
298                  goto end;
299               argA[term] = get_texel_array(swrast, srcUnit);
300            }
301      }
302
303      if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
304         float4_array src = argA[term];
305         float4_array dst = ccolor[term];
306         argA[term] = ccolor[term];
307         for (i = 0; i < n; i++) {
308            dst[i][ACOMP] = 1.0F - src[i][ACOMP];
309         }
310      }
311   }
312
313   /* RGB channel combine */
314   {
315      float4_array arg0 = argRGB[0];
316      float4_array arg1 = argRGB[1];
317      float4_array arg2 = argRGB[2];
318      float4_array arg3 = argRGB[3];
319
320      switch (combine->ModeRGB) {
321      case GL_REPLACE:
322         for (i = 0; i < n; i++) {
323            rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
324            rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
325            rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
326         }
327         break;
328      case GL_MODULATE:
329         for (i = 0; i < n; i++) {
330            rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
331            rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
332            rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
333         }
334         break;
335      case GL_ADD:
336         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
337            /* (a * b) + (c * d) */
338            for (i = 0; i < n; i++) {
339               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
340                                 arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
341               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
342                                 arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
343               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
344                                 arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
345            }
346         }
347         else {
348            /* 2-term addition */
349            for (i = 0; i < n; i++) {
350               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
351               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
352               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
353            }
354         }
355         break;
356      case GL_ADD_SIGNED:
357         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
358            /* (a * b) + (c * d) - 0.5 */
359            for (i = 0; i < n; i++) {
360               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
361                                 arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
362               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
363                                 arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
364               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
365                                 arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
366            }
367         }
368         else {
369            for (i = 0; i < n; i++) {
370               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
371               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
372               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
373            }
374         }
375         break;
376      case GL_INTERPOLATE:
377         for (i = 0; i < n; i++) {
378            rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
379                          arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
380            rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
381                          arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
382            rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
383                          arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
384         }
385         break;
386      case GL_SUBTRACT:
387         for (i = 0; i < n; i++) {
388            rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
389            rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
390            rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
391         }
392         break;
393      case GL_DOT3_RGB_EXT:
394      case GL_DOT3_RGBA_EXT:
395         /* Do not scale the result by 1 2 or 4 */
396         for (i = 0; i < n; i++) {
397            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
398                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
399                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
400               * 4.0F;
401            dot = CLAMP(dot, 0.0F, 1.0F);
402            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
403         }
404         break;
405      case GL_DOT3_RGB:
406      case GL_DOT3_RGBA:
407         /* DO scale the result by 1 2 or 4 */
408         for (i = 0; i < n; i++) {
409            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
410                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
411                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
412               * 4.0F * scaleRGB;
413            dot = CLAMP(dot, 0.0F, 1.0F);
414            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
415         }
416         break;
417      case GL_MODULATE_ADD_ATI:
418         for (i = 0; i < n; i++) {
419            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
420                              arg1[i][RCOMP]) * scaleRGB;
421            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
422                              arg1[i][GCOMP]) * scaleRGB;
423            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
424                              arg1[i][BCOMP]) * scaleRGB;
425	 }
426         break;
427      case GL_MODULATE_SIGNED_ADD_ATI:
428         for (i = 0; i < n; i++) {
429            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
430                              arg1[i][RCOMP] - 0.5F) * scaleRGB;
431            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
432                              arg1[i][GCOMP] - 0.5F) * scaleRGB;
433            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
434                              arg1[i][BCOMP] - 0.5F) * scaleRGB;
435	 }
436         break;
437      case GL_MODULATE_SUBTRACT_ATI:
438         for (i = 0; i < n; i++) {
439            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
440                              arg1[i][RCOMP]) * scaleRGB;
441            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
442                              arg1[i][GCOMP]) * scaleRGB;
443            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
444                              arg1[i][BCOMP]) * scaleRGB;
445	 }
446         break;
447      case GL_BUMP_ENVMAP_ATI:
448         /* this produces a fixed rgba color, and the coord calc is done elsewhere */
449         for (i = 0; i < n; i++) {
450            /* rgba result is 0,0,0,1 */
451            rgba[i][RCOMP] = 0.0;
452            rgba[i][GCOMP] = 0.0;
453            rgba[i][BCOMP] = 0.0;
454            rgba[i][ACOMP] = 1.0;
455	 }
456         goto end; /* no alpha processing */
457      default:
458         _mesa_problem(ctx, "invalid combine mode");
459      }
460   }
461
462   /* Alpha channel combine */
463   {
464      float4_array arg0 = argA[0];
465      float4_array arg1 = argA[1];
466      float4_array arg2 = argA[2];
467      float4_array arg3 = argA[3];
468
469      switch (combine->ModeA) {
470      case GL_REPLACE:
471         for (i = 0; i < n; i++) {
472            rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
473         }
474         break;
475      case GL_MODULATE:
476         for (i = 0; i < n; i++) {
477            rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
478         }
479         break;
480      case GL_ADD:
481         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
482            /* (a * b) + (c * d) */
483            for (i = 0; i < n; i++) {
484               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
485                                 arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
486            }
487         }
488         else {
489            /* two-term add */
490            for (i = 0; i < n; i++) {
491               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
492            }
493         }
494         break;
495      case GL_ADD_SIGNED:
496         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
497            /* (a * b) + (c * d) - 0.5 */
498            for (i = 0; i < n; i++) {
499               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
500                                 arg2[i][ACOMP] * arg3[i][ACOMP] -
501                                 0.5F) * scaleA;
502            }
503         }
504         else {
505            /* a + b - 0.5 */
506            for (i = 0; i < n; i++) {
507               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
508            }
509         }
510         break;
511      case GL_INTERPOLATE:
512         for (i = 0; i < n; i++) {
513            rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
514                              arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
515               * scaleA;
516         }
517         break;
518      case GL_SUBTRACT:
519         for (i = 0; i < n; i++) {
520            rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
521         }
522         break;
523      case GL_MODULATE_ADD_ATI:
524         for (i = 0; i < n; i++) {
525            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
526                              + arg1[i][ACOMP]) * scaleA;
527         }
528         break;
529      case GL_MODULATE_SIGNED_ADD_ATI:
530         for (i = 0; i < n; i++) {
531            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
532                              arg1[i][ACOMP] - 0.5F) * scaleA;
533         }
534         break;
535      case GL_MODULATE_SUBTRACT_ATI:
536         for (i = 0; i < n; i++) {
537            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
538                              - arg1[i][ACOMP]) * scaleA;
539         }
540         break;
541      default:
542         _mesa_problem(ctx, "invalid combine mode");
543      }
544   }
545
546   /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
547    * This is kind of a kludge.  It would have been better if the spec
548    * were written such that the GL_COMBINE_ALPHA value could be set to
549    * GL_DOT3.
550    */
551   if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
552       combine->ModeRGB == GL_DOT3_RGBA) {
553      for (i = 0; i < n; i++) {
554	 rgba[i][ACOMP] = rgba[i][RCOMP];
555      }
556   }
557
558   for (i = 0; i < n; i++) {
559      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
560      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
561      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
562      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
563   }
564
565end:
566   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
567      free(ccolor[i]);
568   }
569   free(rgba);
570}
571
572
573/**
574 * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
575 * See GL_EXT_texture_swizzle.
576 */
577static void
578swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
579{
580   const GLuint swzR = GET_SWZ(swizzle, 0);
581   const GLuint swzG = GET_SWZ(swizzle, 1);
582   const GLuint swzB = GET_SWZ(swizzle, 2);
583   const GLuint swzA = GET_SWZ(swizzle, 3);
584   GLfloat vector[6];
585   GLuint i;
586
587   vector[SWIZZLE_ZERO] = 0;
588   vector[SWIZZLE_ONE] = 1.0F;
589
590   for (i = 0; i < count; i++) {
591      vector[SWIZZLE_X] = texels[i][0];
592      vector[SWIZZLE_Y] = texels[i][1];
593      vector[SWIZZLE_Z] = texels[i][2];
594      vector[SWIZZLE_W] = texels[i][3];
595      texels[i][RCOMP] = vector[swzR];
596      texels[i][GCOMP] = vector[swzG];
597      texels[i][BCOMP] = vector[swzB];
598      texels[i][ACOMP] = vector[swzA];
599   }
600}
601
602
603/**
604 * Apply texture mapping to a span of fragments.
605 */
606void
607_swrast_texture_span( struct gl_context *ctx, SWspan *span )
608{
609   SWcontext *swrast = SWRAST_CONTEXT(ctx);
610   float4_array primary_rgba;
611   GLuint unit;
612
613   primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat));
614
615   if (!primary_rgba) {
616      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
617      return;
618   }
619
620   ASSERT(span->end <= MAX_WIDTH);
621
622   /*
623    * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
624    */
625   if (swrast->_TextureCombinePrimary) {
626      GLuint i;
627      for (i = 0; i < span->end; i++) {
628         primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
629         primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
630         primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
631         primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
632      }
633   }
634
635   /* First must sample all bump maps */
636   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
637      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
638
639      if (texUnit->_ReallyEnabled &&
640         texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
641         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
642            span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
643         float4_array targetcoords =
644            span->array->attribs[FRAG_ATTRIB_TEX0 +
645               ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
646
647         const struct gl_texture_object *curObj = texUnit->_Current;
648         GLfloat *lambda = span->array->lambda[unit];
649         float4_array texels = get_texel_array(swrast, unit);
650         GLuint i;
651         GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
652         GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
653         GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
654         GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
655
656         /* adjust texture lod (lambda) */
657         if (span->arrayMask & SPAN_LAMBDA) {
658            if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) {
659               /* apply LOD bias, but don't clamp yet */
660               const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias,
661                                          -ctx->Const.MaxTextureLodBias,
662                                          ctx->Const.MaxTextureLodBias);
663               GLuint i;
664               for (i = 0; i < span->end; i++) {
665                  lambda[i] += bias;
666               }
667            }
668
669            if (curObj->Sampler.MinLod != -1000.0 ||
670                curObj->Sampler.MaxLod != 1000.0) {
671               /* apply LOD clamping to lambda */
672               const GLfloat min = curObj->Sampler.MinLod;
673               const GLfloat max = curObj->Sampler.MaxLod;
674               GLuint i;
675               for (i = 0; i < span->end; i++) {
676                  GLfloat l = lambda[i];
677                  lambda[i] = CLAMP(l, min, max);
678               }
679            }
680         }
681
682         /* Sample the texture (span->end = number of fragments) */
683         swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
684                                      texcoords, lambda, texels );
685
686         /* manipulate the span values of the bump target
687            not sure this can work correctly even ignoring
688            the problem that channel is unsigned */
689         for (i = 0; i < span->end; i++) {
690            targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
691                                  rotMatrix01) / targetcoords[i][3];
692            targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
693                                  rotMatrix11) / targetcoords[i][3];
694         }
695      }
696   }
697
698   /*
699    * Must do all texture sampling before combining in order to
700    * accomodate GL_ARB_texture_env_crossbar.
701    */
702   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
703      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
704      if (texUnit->_ReallyEnabled &&
705          texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
706         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
707            span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
708         const struct gl_texture_object *curObj = texUnit->_Current;
709         GLfloat *lambda = span->array->lambda[unit];
710         float4_array texels = get_texel_array(swrast, unit);
711
712         /* adjust texture lod (lambda) */
713         if (span->arrayMask & SPAN_LAMBDA) {
714            if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) {
715               /* apply LOD bias, but don't clamp yet */
716               const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias,
717                                          -ctx->Const.MaxTextureLodBias,
718                                          ctx->Const.MaxTextureLodBias);
719               GLuint i;
720               for (i = 0; i < span->end; i++) {
721                  lambda[i] += bias;
722               }
723            }
724
725            if (curObj->Sampler.MinLod != -1000.0 ||
726                curObj->Sampler.MaxLod != 1000.0) {
727               /* apply LOD clamping to lambda */
728               const GLfloat min = curObj->Sampler.MinLod;
729               const GLfloat max = curObj->Sampler.MaxLod;
730               GLuint i;
731               for (i = 0; i < span->end; i++) {
732                  GLfloat l = lambda[i];
733                  lambda[i] = CLAMP(l, min, max);
734               }
735            }
736         }
737         else if (curObj->Sampler.MaxAnisotropy > 1.0 &&
738                  curObj->Sampler.MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
739            /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
740             * it requires the current SWspan *span as an additional parameter.
741             * In order to keep the same function signature, the unused lambda
742             * parameter will be modified to actually contain the SWspan pointer.
743             * This is a Hack. To make it right, the texture_sample_func
744             * signature and all implementing functions need to be modified.
745             */
746            /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
747            lambda = (GLfloat *)span;
748         }
749
750         /* Sample the texture (span->end = number of fragments) */
751         swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
752                                      texcoords, lambda, texels );
753
754         /* GL_EXT_texture_swizzle */
755         if (curObj->_Swizzle != SWIZZLE_NOOP) {
756            swizzle_texels(curObj->_Swizzle, span->end, texels);
757         }
758      }
759   }
760
761   /*
762    * OK, now apply the texture (aka texture combine/blend).
763    * We modify the span->color.rgba values.
764    */
765   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
766      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
767         texture_combine( ctx, unit, span->end,
768                          primary_rgba,
769                          swrast->TexelBuffer,
770                          span->array->rgba );
771      }
772   }
773
774   free(primary_rgba);
775}
776