1/*
2 * Mesa 3-D graphics library
3 * Version:  7.5
4 *
5 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
6 * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26
27#include "main/glheader.h"
28#include "main/context.h"
29#include "main/colormac.h"
30#include "main/imports.h"
31#include "main/pixeltransfer.h"
32#include "main/samplerobj.h"
33#include "program/prog_instruction.h"
34
35#include "s_context.h"
36#include "s_texcombine.h"
37
38
39/**
40 * Pointer to array of float[4]
41 * This type makes the code below more concise and avoids a lot of casting.
42 */
43typedef float (*float4_array)[4];
44
45
46/**
47 * Return array of texels for given unit.
48 */
49static inline float4_array
50get_texel_array(SWcontext *swrast, GLuint unit)
51{
52#ifdef _OPENMP
53   return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4 * omp_get_num_threads() + (SWRAST_MAX_WIDTH * 4 * omp_get_thread_num()));
54#else
55   return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4);
56#endif
57}
58
59
60
61/**
62 * Do texture application for:
63 *  GL_EXT_texture_env_combine
64 *  GL_ARB_texture_env_combine
65 *  GL_EXT_texture_env_dot3
66 *  GL_ARB_texture_env_dot3
67 *  GL_ATI_texture_env_combine3
68 *  GL_NV_texture_env_combine4
69 *  conventional GL texture env modes
70 *
71 * \param ctx          rendering context
72 * \param unit         the texture combiner unit
73 * \param primary_rgba incoming fragment color array
74 * \param texelBuffer  pointer to texel colors for all texture units
75 *
76 * \param span         two fields are used in this function:
77 *                       span->end: number of fragments to process
78 *                       span->array->rgba: incoming/result fragment colors
79 */
80static void
81texture_combine( struct gl_context *ctx, GLuint unit,
82                 const float4_array primary_rgba,
83                 const GLfloat *texelBuffer,
84                 SWspan *span )
85{
86   SWcontext *swrast = SWRAST_CONTEXT(ctx);
87   const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
88   const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
89   float4_array argRGB[MAX_COMBINER_TERMS];
90   float4_array argA[MAX_COMBINER_TERMS];
91   const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
92   const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
93   const GLuint numArgsRGB = combine->_NumArgsRGB;
94   const GLuint numArgsA = combine->_NumArgsA;
95   float4_array ccolor[4], rgba;
96   GLuint i, term;
97   GLuint n = span->end;
98   GLchan (*rgbaChan)[4] = span->array->rgba;
99
100   /* alloc temp pixel buffers */
101   rgba = (float4_array) malloc(4 * n * sizeof(GLfloat));
102   if (!rgba) {
103      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
104      return;
105   }
106
107   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
108      ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat));
109      if (!ccolor[i]) {
110         while (i) {
111            free(ccolor[i]);
112            i--;
113         }
114         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
115         free(rgba);
116         return;
117      }
118   }
119
120   for (i = 0; i < n; i++) {
121      rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
122      rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
123      rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
124      rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
125   }
126
127   /*
128   printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
129          combine->ModeRGB,
130          combine->ModeA,
131          combine->SourceRGB[0],
132          combine->SourceA[0],
133          combine->SourceRGB[1],
134          combine->SourceA[1]);
135   */
136
137   /*
138    * Do operand setup for up to 4 operands.  Loop over the terms.
139    */
140   for (term = 0; term < numArgsRGB; term++) {
141      const GLenum srcRGB = combine->SourceRGB[term];
142      const GLenum operandRGB = combine->OperandRGB[term];
143
144      switch (srcRGB) {
145         case GL_TEXTURE:
146            argRGB[term] = get_texel_array(swrast, unit);
147            break;
148         case GL_PRIMARY_COLOR:
149            argRGB[term] = primary_rgba;
150            break;
151         case GL_PREVIOUS:
152            argRGB[term] = rgba;
153            break;
154         case GL_CONSTANT:
155            {
156               float4_array c = ccolor[term];
157               GLfloat red   = textureUnit->EnvColor[0];
158               GLfloat green = textureUnit->EnvColor[1];
159               GLfloat blue  = textureUnit->EnvColor[2];
160               GLfloat alpha = textureUnit->EnvColor[3];
161               for (i = 0; i < n; i++) {
162                  ASSIGN_4V(c[i], red, green, blue, alpha);
163               }
164               argRGB[term] = ccolor[term];
165            }
166            break;
167	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
168	  */
169	 case GL_ZERO:
170            {
171               float4_array c = ccolor[term];
172               for (i = 0; i < n; i++) {
173                  ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
174               }
175               argRGB[term] = ccolor[term];
176            }
177            break;
178	 case GL_ONE:
179            {
180               float4_array c = ccolor[term];
181               for (i = 0; i < n; i++) {
182                  ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
183               }
184               argRGB[term] = ccolor[term];
185            }
186            break;
187         default:
188            /* ARB_texture_env_crossbar source */
189            {
190               const GLuint srcUnit = srcRGB - GL_TEXTURE0;
191               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
192               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
193                  goto end;
194               argRGB[term] = get_texel_array(swrast, srcUnit);
195            }
196      }
197
198      if (operandRGB != GL_SRC_COLOR) {
199         float4_array src = argRGB[term];
200         float4_array dst = ccolor[term];
201
202         /* point to new arg[term] storage */
203         argRGB[term] = ccolor[term];
204
205         switch (operandRGB) {
206         case GL_ONE_MINUS_SRC_COLOR:
207            for (i = 0; i < n; i++) {
208               dst[i][RCOMP] = 1.0F - src[i][RCOMP];
209               dst[i][GCOMP] = 1.0F - src[i][GCOMP];
210               dst[i][BCOMP] = 1.0F - src[i][BCOMP];
211            }
212            break;
213         case GL_SRC_ALPHA:
214            for (i = 0; i < n; i++) {
215               dst[i][RCOMP] =
216               dst[i][GCOMP] =
217               dst[i][BCOMP] = src[i][ACOMP];
218            }
219            break;
220         case GL_ONE_MINUS_SRC_ALPHA:
221            for (i = 0; i < n; i++) {
222               dst[i][RCOMP] =
223               dst[i][GCOMP] =
224               dst[i][BCOMP] = 1.0F - src[i][ACOMP];
225            }
226            break;
227         default:
228            _mesa_problem(ctx, "Bad operandRGB");
229         }
230      }
231   }
232
233   /*
234    * Set up the argA[term] pointers
235    */
236   for (term = 0; term < numArgsA; term++) {
237      const GLenum srcA = combine->SourceA[term];
238      const GLenum operandA = combine->OperandA[term];
239
240      switch (srcA) {
241         case GL_TEXTURE:
242            argA[term] = get_texel_array(swrast, unit);
243            break;
244         case GL_PRIMARY_COLOR:
245            argA[term] = primary_rgba;
246            break;
247         case GL_PREVIOUS:
248            argA[term] = rgba;
249            break;
250         case GL_CONSTANT:
251            {
252               float4_array c = ccolor[term];
253               GLfloat alpha = textureUnit->EnvColor[3];
254               for (i = 0; i < n; i++)
255                  c[i][ACOMP] = alpha;
256               argA[term] = ccolor[term];
257            }
258            break;
259	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
260	  */
261	 case GL_ZERO:
262            {
263               float4_array c = ccolor[term];
264               for (i = 0; i < n; i++)
265                  c[i][ACOMP] = 0.0F;
266               argA[term] = ccolor[term];
267            }
268            break;
269	 case GL_ONE:
270            {
271               float4_array c = ccolor[term];
272               for (i = 0; i < n; i++)
273                  c[i][ACOMP] = 1.0F;
274               argA[term] = ccolor[term];
275            }
276            break;
277         default:
278            /* ARB_texture_env_crossbar source */
279            {
280               const GLuint srcUnit = srcA - GL_TEXTURE0;
281               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
282               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
283                  goto end;
284               argA[term] = get_texel_array(swrast, srcUnit);
285            }
286      }
287
288      if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
289         float4_array src = argA[term];
290         float4_array dst = ccolor[term];
291         argA[term] = ccolor[term];
292         for (i = 0; i < n; i++) {
293            dst[i][ACOMP] = 1.0F - src[i][ACOMP];
294         }
295      }
296   }
297
298   /* RGB channel combine */
299   {
300      float4_array arg0 = argRGB[0];
301      float4_array arg1 = argRGB[1];
302      float4_array arg2 = argRGB[2];
303      float4_array arg3 = argRGB[3];
304
305      switch (combine->ModeRGB) {
306      case GL_REPLACE:
307         for (i = 0; i < n; i++) {
308            rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
309            rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
310            rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
311         }
312         break;
313      case GL_MODULATE:
314         for (i = 0; i < n; i++) {
315            rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
316            rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
317            rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
318         }
319         break;
320      case GL_ADD:
321         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
322            /* (a * b) + (c * d) */
323            for (i = 0; i < n; i++) {
324               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
325                                 arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
326               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
327                                 arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
328               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
329                                 arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
330            }
331         }
332         else {
333            /* 2-term addition */
334            for (i = 0; i < n; i++) {
335               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
336               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
337               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
338            }
339         }
340         break;
341      case GL_ADD_SIGNED:
342         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
343            /* (a * b) + (c * d) - 0.5 */
344            for (i = 0; i < n; i++) {
345               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
346                                 arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
347               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
348                                 arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
349               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
350                                 arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
351            }
352         }
353         else {
354            for (i = 0; i < n; i++) {
355               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
356               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
357               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
358            }
359         }
360         break;
361      case GL_INTERPOLATE:
362         for (i = 0; i < n; i++) {
363            rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
364                          arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
365            rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
366                          arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
367            rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
368                          arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
369         }
370         break;
371      case GL_SUBTRACT:
372         for (i = 0; i < n; i++) {
373            rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
374            rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
375            rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
376         }
377         break;
378      case GL_DOT3_RGB_EXT:
379      case GL_DOT3_RGBA_EXT:
380         /* Do not scale the result by 1 2 or 4 */
381         for (i = 0; i < n; i++) {
382            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
383                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
384                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
385               * 4.0F;
386            dot = CLAMP(dot, 0.0F, 1.0F);
387            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
388         }
389         break;
390      case GL_DOT3_RGB:
391      case GL_DOT3_RGBA:
392         /* DO scale the result by 1 2 or 4 */
393         for (i = 0; i < n; i++) {
394            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
395                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
396                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
397               * 4.0F * scaleRGB;
398            dot = CLAMP(dot, 0.0F, 1.0F);
399            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
400         }
401         break;
402      case GL_MODULATE_ADD_ATI:
403         for (i = 0; i < n; i++) {
404            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
405                              arg1[i][RCOMP]) * scaleRGB;
406            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
407                              arg1[i][GCOMP]) * scaleRGB;
408            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
409                              arg1[i][BCOMP]) * scaleRGB;
410	 }
411         break;
412      case GL_MODULATE_SIGNED_ADD_ATI:
413         for (i = 0; i < n; i++) {
414            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
415                              arg1[i][RCOMP] - 0.5F) * scaleRGB;
416            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
417                              arg1[i][GCOMP] - 0.5F) * scaleRGB;
418            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
419                              arg1[i][BCOMP] - 0.5F) * scaleRGB;
420	 }
421         break;
422      case GL_MODULATE_SUBTRACT_ATI:
423         for (i = 0; i < n; i++) {
424            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
425                              arg1[i][RCOMP]) * scaleRGB;
426            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
427                              arg1[i][GCOMP]) * scaleRGB;
428            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
429                              arg1[i][BCOMP]) * scaleRGB;
430	 }
431         break;
432      case GL_BUMP_ENVMAP_ATI:
433         /* this produces a fixed rgba color, and the coord calc is done elsewhere */
434         for (i = 0; i < n; i++) {
435            /* rgba result is 0,0,0,1 */
436            rgba[i][RCOMP] = 0.0;
437            rgba[i][GCOMP] = 0.0;
438            rgba[i][BCOMP] = 0.0;
439            rgba[i][ACOMP] = 1.0;
440	 }
441         goto end; /* no alpha processing */
442      default:
443         _mesa_problem(ctx, "invalid combine mode");
444      }
445   }
446
447   /* Alpha channel combine */
448   {
449      float4_array arg0 = argA[0];
450      float4_array arg1 = argA[1];
451      float4_array arg2 = argA[2];
452      float4_array arg3 = argA[3];
453
454      switch (combine->ModeA) {
455      case GL_REPLACE:
456         for (i = 0; i < n; i++) {
457            rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
458         }
459         break;
460      case GL_MODULATE:
461         for (i = 0; i < n; i++) {
462            rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
463         }
464         break;
465      case GL_ADD:
466         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
467            /* (a * b) + (c * d) */
468            for (i = 0; i < n; i++) {
469               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
470                                 arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
471            }
472         }
473         else {
474            /* two-term add */
475            for (i = 0; i < n; i++) {
476               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
477            }
478         }
479         break;
480      case GL_ADD_SIGNED:
481         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
482            /* (a * b) + (c * d) - 0.5 */
483            for (i = 0; i < n; i++) {
484               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
485                                 arg2[i][ACOMP] * arg3[i][ACOMP] -
486                                 0.5F) * scaleA;
487            }
488         }
489         else {
490            /* a + b - 0.5 */
491            for (i = 0; i < n; i++) {
492               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
493            }
494         }
495         break;
496      case GL_INTERPOLATE:
497         for (i = 0; i < n; i++) {
498            rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
499                              arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
500               * scaleA;
501         }
502         break;
503      case GL_SUBTRACT:
504         for (i = 0; i < n; i++) {
505            rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
506         }
507         break;
508      case GL_MODULATE_ADD_ATI:
509         for (i = 0; i < n; i++) {
510            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
511                              + arg1[i][ACOMP]) * scaleA;
512         }
513         break;
514      case GL_MODULATE_SIGNED_ADD_ATI:
515         for (i = 0; i < n; i++) {
516            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
517                              arg1[i][ACOMP] - 0.5F) * scaleA;
518         }
519         break;
520      case GL_MODULATE_SUBTRACT_ATI:
521         for (i = 0; i < n; i++) {
522            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
523                              - arg1[i][ACOMP]) * scaleA;
524         }
525         break;
526      default:
527         _mesa_problem(ctx, "invalid combine mode");
528      }
529   }
530
531   /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
532    * This is kind of a kludge.  It would have been better if the spec
533    * were written such that the GL_COMBINE_ALPHA value could be set to
534    * GL_DOT3.
535    */
536   if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
537       combine->ModeRGB == GL_DOT3_RGBA) {
538      for (i = 0; i < n; i++) {
539	 rgba[i][ACOMP] = rgba[i][RCOMP];
540      }
541   }
542
543   for (i = 0; i < n; i++) {
544      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
545      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
546      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
547      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
548   }
549   /* The span->array->rgba values are of CHAN type so set
550    * span->array->ChanType field accordingly.
551    */
552   span->array->ChanType = CHAN_TYPE;
553
554end:
555   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
556      free(ccolor[i]);
557   }
558   free(rgba);
559}
560
561
562/**
563 * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
564 * See GL_EXT_texture_swizzle.
565 */
566static void
567swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
568{
569   const GLuint swzR = GET_SWZ(swizzle, 0);
570   const GLuint swzG = GET_SWZ(swizzle, 1);
571   const GLuint swzB = GET_SWZ(swizzle, 2);
572   const GLuint swzA = GET_SWZ(swizzle, 3);
573   GLfloat vector[6];
574   GLuint i;
575
576   vector[SWIZZLE_ZERO] = 0;
577   vector[SWIZZLE_ONE] = 1.0F;
578
579   for (i = 0; i < count; i++) {
580      vector[SWIZZLE_X] = texels[i][0];
581      vector[SWIZZLE_Y] = texels[i][1];
582      vector[SWIZZLE_Z] = texels[i][2];
583      vector[SWIZZLE_W] = texels[i][3];
584      texels[i][RCOMP] = vector[swzR];
585      texels[i][GCOMP] = vector[swzG];
586      texels[i][BCOMP] = vector[swzB];
587      texels[i][ACOMP] = vector[swzA];
588   }
589}
590
591
592/**
593 * Apply texture mapping to a span of fragments.
594 */
595void
596_swrast_texture_span( struct gl_context *ctx, SWspan *span )
597{
598   SWcontext *swrast = SWRAST_CONTEXT(ctx);
599   float4_array primary_rgba;
600   GLuint unit;
601
602   if (!swrast->TexelBuffer) {
603#ifdef _OPENMP
604      const GLint maxThreads = omp_get_max_threads();
605#else
606      const GLint maxThreads = 1;
607#endif
608
609      /* TexelBuffer is also global and normally shared by all SWspan
610       * instances; when running with multiple threads, create one per
611       * thread.
612       */
613      swrast->TexelBuffer =
614	 (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
615			    SWRAST_MAX_WIDTH * 4 * sizeof(GLfloat));
616      if (!swrast->TexelBuffer) {
617	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
618	 return;
619      }
620   }
621
622   primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat));
623
624   if (!primary_rgba) {
625      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
626      return;
627   }
628
629   ASSERT(span->end <= SWRAST_MAX_WIDTH);
630
631   /*
632    * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
633    */
634   if (swrast->_TextureCombinePrimary) {
635      GLuint i;
636      for (i = 0; i < span->end; i++) {
637         primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
638         primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
639         primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
640         primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
641      }
642   }
643
644   /* First must sample all bump maps */
645   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
646      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
647
648      if (texUnit->_ReallyEnabled &&
649         texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
650         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
651            span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
652         float4_array targetcoords =
653            span->array->attribs[FRAG_ATTRIB_TEX0 +
654               ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
655
656         const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
657         GLfloat *lambda = span->array->lambda[unit];
658         float4_array texels = get_texel_array(swrast, unit);
659         GLuint i;
660         GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
661         GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
662         GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
663         GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
664
665         /* adjust texture lod (lambda) */
666         if (span->arrayMask & SPAN_LAMBDA) {
667            if (texUnit->LodBias + samp->LodBias != 0.0F) {
668               /* apply LOD bias, but don't clamp yet */
669               const GLfloat bias = CLAMP(texUnit->LodBias + samp->LodBias,
670                                          -ctx->Const.MaxTextureLodBias,
671                                          ctx->Const.MaxTextureLodBias);
672               GLuint i;
673               for (i = 0; i < span->end; i++) {
674                  lambda[i] += bias;
675               }
676            }
677
678            if (samp->MinLod != -1000.0 ||
679                samp->MaxLod != 1000.0) {
680               /* apply LOD clamping to lambda */
681               const GLfloat min = samp->MinLod;
682               const GLfloat max = samp->MaxLod;
683               GLuint i;
684               for (i = 0; i < span->end; i++) {
685                  GLfloat l = lambda[i];
686                  lambda[i] = CLAMP(l, min, max);
687               }
688            }
689         }
690
691         /* Sample the texture (span->end = number of fragments) */
692         swrast->TextureSample[unit]( ctx, samp,
693                                      ctx->Texture.Unit[unit]._Current,
694                                      span->end, texcoords, lambda, texels );
695
696         /* manipulate the span values of the bump target
697            not sure this can work correctly even ignoring
698            the problem that channel is unsigned */
699         for (i = 0; i < span->end; i++) {
700            targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
701                                  rotMatrix01) / targetcoords[i][3];
702            targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
703                                  rotMatrix11) / targetcoords[i][3];
704         }
705      }
706   }
707
708   /*
709    * Must do all texture sampling before combining in order to
710    * accomodate GL_ARB_texture_env_crossbar.
711    */
712   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
713      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
714      if (texUnit->_ReallyEnabled &&
715          texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
716         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
717            span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
718         const struct gl_texture_object *curObj = texUnit->_Current;
719         const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
720         GLfloat *lambda = span->array->lambda[unit];
721         float4_array texels = get_texel_array(swrast, unit);
722
723         /* adjust texture lod (lambda) */
724         if (span->arrayMask & SPAN_LAMBDA) {
725            if (texUnit->LodBias + samp->LodBias != 0.0F) {
726               /* apply LOD bias, but don't clamp yet */
727               const GLfloat bias = CLAMP(texUnit->LodBias + samp->LodBias,
728                                          -ctx->Const.MaxTextureLodBias,
729                                          ctx->Const.MaxTextureLodBias);
730               GLuint i;
731               for (i = 0; i < span->end; i++) {
732                  lambda[i] += bias;
733               }
734            }
735
736            if (samp->MinLod != -1000.0 ||
737                samp->MaxLod != 1000.0) {
738               /* apply LOD clamping to lambda */
739               const GLfloat min = samp->MinLod;
740               const GLfloat max = samp->MaxLod;
741               GLuint i;
742               for (i = 0; i < span->end; i++) {
743                  GLfloat l = lambda[i];
744                  lambda[i] = CLAMP(l, min, max);
745               }
746            }
747         }
748         else if (samp->MaxAnisotropy > 1.0 &&
749                  samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
750            /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
751             * it requires the current SWspan *span as an additional parameter.
752             * In order to keep the same function signature, the unused lambda
753             * parameter will be modified to actually contain the SWspan pointer.
754             * This is a Hack. To make it right, the texture_sample_func
755             * signature and all implementing functions need to be modified.
756             */
757            /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
758            lambda = (GLfloat *)span;
759         }
760
761         /* Sample the texture (span->end = number of fragments) */
762         swrast->TextureSample[unit]( ctx, samp,
763                                      ctx->Texture.Unit[unit]._Current,
764                                      span->end, texcoords, lambda, texels );
765
766         /* GL_EXT_texture_swizzle */
767         if (curObj->_Swizzle != SWIZZLE_NOOP) {
768            swizzle_texels(curObj->_Swizzle, span->end, texels);
769         }
770      }
771   }
772
773   /*
774    * OK, now apply the texture (aka texture combine/blend).
775    * We modify the span->color.rgba values.
776    */
777   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
778      if (ctx->Texture.Unit[unit]._ReallyEnabled)
779         texture_combine(ctx, unit, primary_rgba, swrast->TexelBuffer, span);
780   }
781
782   free(primary_rgba);
783}
784