1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * quad blending
30 * \author Brian Paul
31 */
32
33#include "pipe/p_defines.h"
34#include "util/u_math.h"
35#include "util/u_memory.h"
36#include "util/u_format.h"
37#include "util/u_dual_blend.h"
38#include "sp_context.h"
39#include "sp_state.h"
40#include "sp_quad.h"
41#include "sp_tile_cache.h"
42#include "sp_quad_pipe.h"
43
44
45enum format
46{
47   RGBA,
48   RGB,
49   LUMINANCE,
50   LUMINANCE_ALPHA,
51   INTENSITY
52};
53
54
55/** Subclass of quad_stage */
56struct blend_quad_stage
57{
58   struct quad_stage base;
59   boolean clamp[PIPE_MAX_COLOR_BUFS];  /**< clamp colors to [0,1]? */
60   enum format base_format[PIPE_MAX_COLOR_BUFS];
61   enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
62};
63
64
65/** cast wrapper */
66static INLINE struct blend_quad_stage *
67blend_quad_stage(struct quad_stage *stage)
68{
69   return (struct blend_quad_stage *) stage;
70}
71
72
73#define VEC4_COPY(DST, SRC) \
74do { \
75    DST[0] = SRC[0]; \
76    DST[1] = SRC[1]; \
77    DST[2] = SRC[2]; \
78    DST[3] = SRC[3]; \
79} while(0)
80
81#define VEC4_SCALAR(DST, SRC) \
82do { \
83    DST[0] = SRC; \
84    DST[1] = SRC; \
85    DST[2] = SRC; \
86    DST[3] = SRC; \
87} while(0)
88
89#define VEC4_ADD(R, A, B) \
90do { \
91   R[0] = A[0] + B[0]; \
92   R[1] = A[1] + B[1]; \
93   R[2] = A[2] + B[2]; \
94   R[3] = A[3] + B[3]; \
95} while (0)
96
97#define VEC4_SUB(R, A, B) \
98do { \
99   R[0] = A[0] - B[0]; \
100   R[1] = A[1] - B[1]; \
101   R[2] = A[2] - B[2]; \
102   R[3] = A[3] - B[3]; \
103} while (0)
104
105/** Add and limit result to ceiling of 1.0 */
106#define VEC4_ADD_SAT(R, A, B) \
107do { \
108   R[0] = A[0] + B[0];  if (R[0] > 1.0f) R[0] = 1.0f; \
109   R[1] = A[1] + B[1];  if (R[1] > 1.0f) R[1] = 1.0f; \
110   R[2] = A[2] + B[2];  if (R[2] > 1.0f) R[2] = 1.0f; \
111   R[3] = A[3] + B[3];  if (R[3] > 1.0f) R[3] = 1.0f; \
112} while (0)
113
114/** Subtract and limit result to floor of 0.0 */
115#define VEC4_SUB_SAT(R, A, B) \
116do { \
117   R[0] = A[0] - B[0];  if (R[0] < 0.0f) R[0] = 0.0f; \
118   R[1] = A[1] - B[1];  if (R[1] < 0.0f) R[1] = 0.0f; \
119   R[2] = A[2] - B[2];  if (R[2] < 0.0f) R[2] = 0.0f; \
120   R[3] = A[3] - B[3];  if (R[3] < 0.0f) R[3] = 0.0f; \
121} while (0)
122
123#define VEC4_MUL(R, A, B) \
124do { \
125   R[0] = A[0] * B[0]; \
126   R[1] = A[1] * B[1]; \
127   R[2] = A[2] * B[2]; \
128   R[3] = A[3] * B[3]; \
129} while (0)
130
131#define VEC4_MIN(R, A, B) \
132do { \
133   R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
134   R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
135   R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
136   R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
137} while (0)
138
139#define VEC4_MAX(R, A, B) \
140do { \
141   R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
142   R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
143   R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
144   R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
145} while (0)
146
147
148
149static void
150logicop_quad(struct quad_stage *qs,
151             float (*quadColor)[4],
152             float (*dest)[4])
153{
154   struct softpipe_context *softpipe = qs->softpipe;
155   ubyte src[4][4], dst[4][4], res[4][4];
156   uint *src4 = (uint *) src;
157   uint *dst4 = (uint *) dst;
158   uint *res4 = (uint *) res;
159   uint j;
160
161
162   /* convert to ubyte */
163   for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
164      dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
165      dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
166      dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
167      dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
168
169      src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
170      src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
171      src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
172      src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
173   }
174
175   switch (softpipe->blend->logicop_func) {
176   case PIPE_LOGICOP_CLEAR:
177      for (j = 0; j < 4; j++)
178         res4[j] = 0;
179      break;
180   case PIPE_LOGICOP_NOR:
181      for (j = 0; j < 4; j++)
182         res4[j] = ~(src4[j] | dst4[j]);
183      break;
184   case PIPE_LOGICOP_AND_INVERTED:
185      for (j = 0; j < 4; j++)
186         res4[j] = ~src4[j] & dst4[j];
187      break;
188   case PIPE_LOGICOP_COPY_INVERTED:
189      for (j = 0; j < 4; j++)
190         res4[j] = ~src4[j];
191      break;
192   case PIPE_LOGICOP_AND_REVERSE:
193      for (j = 0; j < 4; j++)
194         res4[j] = src4[j] & ~dst4[j];
195      break;
196   case PIPE_LOGICOP_INVERT:
197      for (j = 0; j < 4; j++)
198         res4[j] = ~dst4[j];
199      break;
200   case PIPE_LOGICOP_XOR:
201      for (j = 0; j < 4; j++)
202         res4[j] = dst4[j] ^ src4[j];
203      break;
204   case PIPE_LOGICOP_NAND:
205      for (j = 0; j < 4; j++)
206         res4[j] = ~(src4[j] & dst4[j]);
207      break;
208   case PIPE_LOGICOP_AND:
209      for (j = 0; j < 4; j++)
210         res4[j] = src4[j] & dst4[j];
211      break;
212   case PIPE_LOGICOP_EQUIV:
213      for (j = 0; j < 4; j++)
214         res4[j] = ~(src4[j] ^ dst4[j]);
215      break;
216   case PIPE_LOGICOP_NOOP:
217      for (j = 0; j < 4; j++)
218         res4[j] = dst4[j];
219      break;
220   case PIPE_LOGICOP_OR_INVERTED:
221      for (j = 0; j < 4; j++)
222         res4[j] = ~src4[j] | dst4[j];
223      break;
224   case PIPE_LOGICOP_COPY:
225      for (j = 0; j < 4; j++)
226         res4[j] = src4[j];
227      break;
228   case PIPE_LOGICOP_OR_REVERSE:
229      for (j = 0; j < 4; j++)
230         res4[j] = src4[j] | ~dst4[j];
231      break;
232   case PIPE_LOGICOP_OR:
233      for (j = 0; j < 4; j++)
234         res4[j] = src4[j] | dst4[j];
235      break;
236   case PIPE_LOGICOP_SET:
237      for (j = 0; j < 4; j++)
238         res4[j] = ~0;
239      break;
240   default:
241      assert(0 && "invalid logicop mode");
242   }
243
244   for (j = 0; j < 4; j++) {
245      quadColor[j][0] = ubyte_to_float(res[j][0]);
246      quadColor[j][1] = ubyte_to_float(res[j][1]);
247      quadColor[j][2] = ubyte_to_float(res[j][2]);
248      quadColor[j][3] = ubyte_to_float(res[j][3]);
249   }
250}
251
252
253
254/**
255 * Do blending for a 2x2 quad for one color buffer.
256 * \param quadColor  the incoming quad colors
257 * \param dest  the destination/framebuffer quad colors
258 * \param const_blend_color  the constant blend color
259 * \param blend_index  which set of blending terms to use
260 */
261static void
262blend_quad(struct quad_stage *qs,
263           float (*quadColor)[4],
264           float (*quadColor2)[4],
265           float (*dest)[4],
266           const float const_blend_color[4],
267           unsigned blend_index)
268{
269   static const float zero[4] = { 0, 0, 0, 0 };
270   static const float one[4] = { 1, 1, 1, 1 };
271   struct softpipe_context *softpipe = qs->softpipe;
272   float source[4][TGSI_QUAD_SIZE] = { { 0 } };
273   float blend_dest[4][TGSI_QUAD_SIZE];
274
275   /*
276    * Compute src/first term RGB
277    */
278   switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
279   case PIPE_BLENDFACTOR_ONE:
280      VEC4_COPY(source[0], quadColor[0]); /* R */
281      VEC4_COPY(source[1], quadColor[1]); /* G */
282      VEC4_COPY(source[2], quadColor[2]); /* B */
283      break;
284   case PIPE_BLENDFACTOR_SRC_COLOR:
285      VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
286      VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
287      VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
288      break;
289   case PIPE_BLENDFACTOR_SRC_ALPHA:
290      {
291         const float *alpha = quadColor[3];
292         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
293         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
294         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
295      }
296      break;
297   case PIPE_BLENDFACTOR_DST_COLOR:
298      VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
299      VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
300      VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
301      break;
302   case PIPE_BLENDFACTOR_DST_ALPHA:
303      {
304         const float *alpha = dest[3];
305         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
306         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
307         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
308      }
309      break;
310   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
311      {
312         const float *alpha = quadColor[3];
313         float diff[4], temp[4];
314         VEC4_SUB(diff, one, dest[3]);
315         VEC4_MIN(temp, alpha, diff);
316         VEC4_MUL(source[0], quadColor[0], temp); /* R */
317         VEC4_MUL(source[1], quadColor[1], temp); /* G */
318         VEC4_MUL(source[2], quadColor[2], temp); /* B */
319      }
320      break;
321   case PIPE_BLENDFACTOR_CONST_COLOR:
322      {
323         float comp[4];
324         VEC4_SCALAR(comp, const_blend_color[0]); /* R */
325         VEC4_MUL(source[0], quadColor[0], comp); /* R */
326         VEC4_SCALAR(comp, const_blend_color[1]); /* G */
327         VEC4_MUL(source[1], quadColor[1], comp); /* G */
328         VEC4_SCALAR(comp, const_blend_color[2]); /* B */
329         VEC4_MUL(source[2], quadColor[2], comp); /* B */
330      }
331      break;
332   case PIPE_BLENDFACTOR_CONST_ALPHA:
333      {
334         float alpha[4];
335         VEC4_SCALAR(alpha, const_blend_color[3]);
336         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
337         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
338         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
339      }
340      break;
341   case PIPE_BLENDFACTOR_SRC1_COLOR:
342      VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
343      VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
344      VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */
345      break;
346   case PIPE_BLENDFACTOR_SRC1_ALPHA:
347      {
348         const float *alpha = quadColor2[3];
349         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
350         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
351         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
352      }
353      break;
354   case PIPE_BLENDFACTOR_ZERO:
355      VEC4_COPY(source[0], zero); /* R */
356      VEC4_COPY(source[1], zero); /* G */
357      VEC4_COPY(source[2], zero); /* B */
358      break;
359   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
360      {
361         float inv_comp[4];
362         VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
363         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
364         VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
365         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
366         VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
367         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
368      }
369      break;
370   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
371      {
372         float inv_alpha[4];
373         VEC4_SUB(inv_alpha, one, quadColor[3]);
374         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
375         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
376         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
377      }
378      break;
379   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
380      {
381         float inv_alpha[4];
382         VEC4_SUB(inv_alpha, one, dest[3]);
383         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
384         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
385         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
386      }
387      break;
388   case PIPE_BLENDFACTOR_INV_DST_COLOR:
389      {
390         float inv_comp[4];
391         VEC4_SUB(inv_comp, one, dest[0]); /* R */
392         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
393         VEC4_SUB(inv_comp, one, dest[1]); /* G */
394         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
395         VEC4_SUB(inv_comp, one, dest[2]); /* B */
396         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
397      }
398      break;
399   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
400      {
401         float inv_comp[4];
402         /* R */
403         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
404         VEC4_MUL(source[0], quadColor[0], inv_comp);
405         /* G */
406         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
407         VEC4_MUL(source[1], quadColor[1], inv_comp);
408         /* B */
409         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
410         VEC4_MUL(source[2], quadColor[2], inv_comp);
411      }
412      break;
413   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
414      {
415         float inv_alpha[4];
416         VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
417         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
418         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
419         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
420      }
421      break;
422   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
423      {
424         float inv_comp[4];
425         VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
426         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
427         VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
428         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
429         VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
430         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
431      }
432      break;
433   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
434      {
435         float inv_alpha[4];
436         VEC4_SUB(inv_alpha, one, quadColor2[3]);
437         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
438         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
439         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
440      }
441      break;
442   default:
443      assert(0 && "invalid rgb src factor");
444   }
445
446   /*
447    * Compute src/first term A
448    */
449   switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
450   case PIPE_BLENDFACTOR_ONE:
451      VEC4_COPY(source[3], quadColor[3]); /* A */
452      break;
453   case PIPE_BLENDFACTOR_SRC_COLOR:
454      /* fall-through */
455   case PIPE_BLENDFACTOR_SRC_ALPHA:
456      {
457         const float *alpha = quadColor[3];
458         VEC4_MUL(source[3], quadColor[3], alpha); /* A */
459      }
460      break;
461   case PIPE_BLENDFACTOR_DST_COLOR:
462      /* fall-through */
463   case PIPE_BLENDFACTOR_DST_ALPHA:
464      VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
465      break;
466   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
467      /* multiply alpha by 1.0 */
468      VEC4_COPY(source[3], quadColor[3]); /* A */
469      break;
470   case PIPE_BLENDFACTOR_CONST_COLOR:
471      /* fall-through */
472   case PIPE_BLENDFACTOR_CONST_ALPHA:
473      {
474         float comp[4];
475         VEC4_SCALAR(comp, const_blend_color[3]); /* A */
476         VEC4_MUL(source[3], quadColor[3], comp); /* A */
477      }
478      break;
479   case PIPE_BLENDFACTOR_ZERO:
480      VEC4_COPY(source[3], zero); /* A */
481      break;
482   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
483      /* fall-through */
484   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
485      {
486         float inv_alpha[4];
487         VEC4_SUB(inv_alpha, one, quadColor[3]);
488         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
489      }
490      break;
491   case PIPE_BLENDFACTOR_INV_DST_COLOR:
492      /* fall-through */
493   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
494      {
495         float inv_alpha[4];
496         VEC4_SUB(inv_alpha, one, dest[3]);
497         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
498      }
499      break;
500   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
501      /* fall-through */
502   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
503      {
504         float inv_comp[4];
505         /* A */
506         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
507         VEC4_MUL(source[3], quadColor[3], inv_comp);
508      }
509      break;
510   case PIPE_BLENDFACTOR_SRC1_COLOR:
511      /* fall-through */
512   case PIPE_BLENDFACTOR_SRC1_ALPHA:
513      {
514         const float *alpha = quadColor2[3];
515         VEC4_MUL(source[3], quadColor[3], alpha); /* A */
516      }
517      break;
518   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
519      /* fall-through */
520   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
521      {
522         float inv_alpha[4];
523         VEC4_SUB(inv_alpha, one, quadColor2[3]);
524         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
525      }
526      break;
527   default:
528      assert(0 && "invalid alpha src factor");
529   }
530
531   /* Save the original dest for use in masking */
532   VEC4_COPY(blend_dest[0], dest[0]);
533   VEC4_COPY(blend_dest[1], dest[1]);
534   VEC4_COPY(blend_dest[2], dest[2]);
535   VEC4_COPY(blend_dest[3], dest[3]);
536
537
538   /*
539    * Compute blend_dest/second term RGB
540    */
541   switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
542   case PIPE_BLENDFACTOR_ONE:
543      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
544      break;
545   case PIPE_BLENDFACTOR_SRC_COLOR:
546      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
547      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
548      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
549      break;
550   case PIPE_BLENDFACTOR_SRC_ALPHA:
551      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
552      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
553      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
554      break;
555   case PIPE_BLENDFACTOR_DST_ALPHA:
556      VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
557      VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
558      VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
559      break;
560   case PIPE_BLENDFACTOR_DST_COLOR:
561      VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
562      VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
563      VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
564      break;
565   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
566      {
567         const float *alpha = quadColor[3];
568         float diff[4], temp[4];
569         VEC4_SUB(diff, one, blend_dest[3]);
570         VEC4_MIN(temp, alpha, diff);
571         VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */
572         VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */
573         VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */
574      }
575      break;
576   case PIPE_BLENDFACTOR_CONST_COLOR:
577      {
578         float comp[4];
579         VEC4_SCALAR(comp, const_blend_color[0]); /* R */
580         VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
581         VEC4_SCALAR(comp, const_blend_color[1]); /* G */
582         VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
583         VEC4_SCALAR(comp, const_blend_color[2]); /* B */
584         VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
585      }
586      break;
587   case PIPE_BLENDFACTOR_CONST_ALPHA:
588      {
589         float comp[4];
590         VEC4_SCALAR(comp, const_blend_color[3]); /* A */
591         VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
592         VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
593         VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
594      }
595      break;
596   case PIPE_BLENDFACTOR_ZERO:
597      VEC4_COPY(blend_dest[0], zero); /* R */
598      VEC4_COPY(blend_dest[1], zero); /* G */
599      VEC4_COPY(blend_dest[2], zero); /* B */
600      break;
601   case PIPE_BLENDFACTOR_SRC1_COLOR:
602      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
603      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
604      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
605      break;
606   case PIPE_BLENDFACTOR_SRC1_ALPHA:
607      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
608      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
609      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
610      break;
611   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
612      {
613         float inv_comp[4];
614         VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
615         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
616         VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
617         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
618         VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
619         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
620      }
621      break;
622   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
623      {
624         float one_minus_alpha[TGSI_QUAD_SIZE];
625         VEC4_SUB(one_minus_alpha, one, quadColor[3]);
626         VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
627         VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
628         VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
629      }
630      break;
631   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
632      {
633         float inv_comp[4];
634         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
635         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
636         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
637         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
638      }
639      break;
640   case PIPE_BLENDFACTOR_INV_DST_COLOR:
641      {
642         float inv_comp[4];
643         VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
644         VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
645         VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
646         VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
647         VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
648         VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
649      }
650      break;
651   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
652      {
653         float inv_comp[4];
654         /* R */
655         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
656         VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
657         /* G */
658         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
659         VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
660         /* B */
661         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
662         VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
663      }
664      break;
665   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
666      {
667         float inv_comp[4];
668         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
669         VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
670         VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
671         VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
672      }
673      break;
674   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
675      {
676         float inv_comp[4];
677         VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
678         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
679         VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
680         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
681         VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
682         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
683      }
684      break;
685   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
686      {
687         float one_minus_alpha[TGSI_QUAD_SIZE];
688         VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
689         VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
690         VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
691         VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
692      }
693      break;
694   default:
695      assert(0 && "invalid rgb dst factor");
696   }
697
698   /*
699    * Compute blend_dest/second term A
700    */
701   switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
702   case PIPE_BLENDFACTOR_ONE:
703      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
704      break;
705   case PIPE_BLENDFACTOR_SRC_COLOR:
706      /* fall-through */
707   case PIPE_BLENDFACTOR_SRC_ALPHA:
708      VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
709      break;
710   case PIPE_BLENDFACTOR_DST_COLOR:
711      /* fall-through */
712   case PIPE_BLENDFACTOR_DST_ALPHA:
713      VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
714      break;
715   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
716      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
717      break;
718   case PIPE_BLENDFACTOR_CONST_COLOR:
719      /* fall-through */
720   case PIPE_BLENDFACTOR_CONST_ALPHA:
721      {
722         float comp[4];
723         VEC4_SCALAR(comp, const_blend_color[3]); /* A */
724         VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
725      }
726      break;
727   case PIPE_BLENDFACTOR_ZERO:
728      VEC4_COPY(blend_dest[3], zero); /* A */
729      break;
730   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
731      /* fall-through */
732   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
733      {
734         float one_minus_alpha[TGSI_QUAD_SIZE];
735         VEC4_SUB(one_minus_alpha, one, quadColor[3]);
736         VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
737      }
738      break;
739   case PIPE_BLENDFACTOR_INV_DST_COLOR:
740      /* fall-through */
741   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
742      {
743         float inv_comp[4];
744         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
745         VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
746      }
747      break;
748   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
749      /* fall-through */
750   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
751      {
752         float inv_comp[4];
753         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
754         VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
755      }
756      break;
757   case PIPE_BLENDFACTOR_SRC1_COLOR:
758      /* fall-through */
759   case PIPE_BLENDFACTOR_SRC1_ALPHA:
760      VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
761      break;
762   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
763      /* fall-through */
764   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
765      {
766         float one_minus_alpha[TGSI_QUAD_SIZE];
767         VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
768         VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
769      }
770      break;
771   default:
772      assert(0 && "invalid alpha dst factor");
773   }
774
775   /*
776    * Combine RGB terms
777    */
778   switch (softpipe->blend->rt[blend_index].rgb_func) {
779   case PIPE_BLEND_ADD:
780      VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
781      VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
782      VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
783      break;
784   case PIPE_BLEND_SUBTRACT:
785      VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
786      VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
787      VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
788      break;
789   case PIPE_BLEND_REVERSE_SUBTRACT:
790      VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
791      VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
792      VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
793      break;
794   case PIPE_BLEND_MIN:
795      VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
796      VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
797      VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
798      break;
799   case PIPE_BLEND_MAX:
800      VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
801      VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
802      VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
803      break;
804   default:
805      assert(0 && "invalid rgb blend func");
806   }
807
808   /*
809    * Combine A terms
810    */
811   switch (softpipe->blend->rt[blend_index].alpha_func) {
812   case PIPE_BLEND_ADD:
813      VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
814      break;
815   case PIPE_BLEND_SUBTRACT:
816      VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
817      break;
818   case PIPE_BLEND_REVERSE_SUBTRACT:
819      VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
820      break;
821   case PIPE_BLEND_MIN:
822      VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
823      break;
824   case PIPE_BLEND_MAX:
825      VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
826      break;
827   default:
828      assert(0 && "invalid alpha blend func");
829   }
830}
831
832static void
833colormask_quad(unsigned colormask,
834               float (*quadColor)[4],
835               float (*dest)[4])
836{
837   /* R */
838   if (!(colormask & PIPE_MASK_R))
839      COPY_4V(quadColor[0], dest[0]);
840
841   /* G */
842   if (!(colormask & PIPE_MASK_G))
843      COPY_4V(quadColor[1], dest[1]);
844
845   /* B */
846   if (!(colormask & PIPE_MASK_B))
847      COPY_4V(quadColor[2], dest[2]);
848
849   /* A */
850   if (!(colormask & PIPE_MASK_A))
851      COPY_4V(quadColor[3], dest[3]);
852}
853
854
855/**
856 * Clamp all colors in a quad to [0, 1]
857 */
858static void
859clamp_colors(float (*quadColor)[4])
860{
861   unsigned i, j;
862
863   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
864      for (i = 0; i < 4; i++) {
865         quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
866      }
867   }
868}
869
870
871/**
872 * If we're drawing to a luminance, luminance/alpha or intensity surface
873 * we have to adjust (rebase) the fragment/quad colors before writing them
874 * to the tile cache.  The tile cache always stores RGBA colors but if
875 * we're caching a L/A surface (for example) we need to be sure that R=G=B
876 * so that subsequent reads from the surface cache appear to return L/A
877 * values.
878 * The piglit fbo-blending-formats test will exercise this.
879 */
880static void
881rebase_colors(enum format base_format, float (*quadColor)[4])
882{
883   unsigned i;
884
885   switch (base_format) {
886   case RGB:
887      for (i = 0; i < 4; i++) {
888         /* A = 1 */
889         quadColor[3][i] = 1.0F;
890      }
891      break;
892   case LUMINANCE:
893      for (i = 0; i < 4; i++) {
894         /* B = G = R */
895         quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
896         /* A = 1 */
897         quadColor[3][i] = 1.0F;
898      }
899      break;
900   case LUMINANCE_ALPHA:
901      for (i = 0; i < 4; i++) {
902         /* B = G = R */
903         quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
904      }
905      break;
906   case INTENSITY:
907      for (i = 0; i < 4; i++) {
908         /* A = B = G = R */
909         quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
910      }
911      break;
912   default:
913      ; /* nothing */
914   }
915}
916
917static void
918blend_fallback(struct quad_stage *qs,
919               struct quad_header *quads[],
920               unsigned nr)
921{
922   const struct blend_quad_stage *bqs = blend_quad_stage(qs);
923   struct softpipe_context *softpipe = qs->softpipe;
924   const struct pipe_blend_state *blend = softpipe->blend;
925   unsigned cbuf;
926   boolean write_all;
927
928   write_all = softpipe->fs_variant->info.color0_writes_all_cbufs;
929
930   for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++)
931   {
932      /* which blend/mask state index to use: */
933      const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
934      float dest[4][TGSI_QUAD_SIZE];
935      struct softpipe_cached_tile *tile
936         = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
937                              quads[0]->input.x0,
938                              quads[0]->input.y0);
939      const boolean clamp = bqs->clamp[cbuf];
940      const float *blend_color;
941      const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
942      uint q, i, j;
943
944      if (clamp)
945         blend_color = softpipe->blend_color_clamped.color;
946      else
947         blend_color = softpipe->blend_color.color;
948
949      for (q = 0; q < nr; q++) {
950         struct quad_header *quad = quads[q];
951         float (*quadColor)[4];
952         float (*quadColor2)[4];
953         float temp_quad_color[TGSI_QUAD_SIZE][4];
954         const int itx = (quad->input.x0 & (TILE_SIZE-1));
955         const int ity = (quad->input.y0 & (TILE_SIZE-1));
956
957         if (write_all) {
958            for (j = 0; j < TGSI_QUAD_SIZE; j++) {
959               for (i = 0; i < 4; i++) {
960                  temp_quad_color[i][j] = quad->output.color[0][i][j];
961               }
962            }
963            quadColor = temp_quad_color;
964         } else {
965            quadColor = quad->output.color[cbuf];
966	    if (dual_source_blend)
967	       quadColor2 = quad->output.color[cbuf + 1];
968         }
969
970         /* If fixed-point dest color buffer, need to clamp the incoming
971          * fragment colors now.
972          */
973         if (clamp || softpipe->rasterizer->clamp_fragment_color) {
974            clamp_colors(quadColor);
975         }
976
977         /* get/swizzle dest colors
978          */
979         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
980            int x = itx + (j & 1);
981            int y = ity + (j >> 1);
982            for (i = 0; i < 4; i++) {
983               dest[i][j] = tile->data.color[y][x][i];
984            }
985         }
986
987
988         if (blend->logicop_enable) {
989            if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
990               logicop_quad( qs, quadColor, dest );
991            }
992         }
993         else if (blend->rt[blend_buf].blend_enable) {
994            blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
995
996            /* If fixed-point dest color buffer, need to clamp the outgoing
997             * fragment colors now.
998             */
999            if (clamp) {
1000               clamp_colors(quadColor);
1001            }
1002         }
1003
1004         rebase_colors(bqs->base_format[cbuf], quadColor);
1005
1006         if (blend->rt[blend_buf].colormask != 0xf)
1007            colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
1008
1009         /* Output color values
1010          */
1011         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1012            if (quad->inout.mask & (1 << j)) {
1013               int x = itx + (j & 1);
1014               int y = ity + (j >> 1);
1015               for (i = 0; i < 4; i++) { /* loop over color chans */
1016                  tile->data.color[y][x][i] = quadColor[i][j];
1017               }
1018            }
1019         }
1020      }
1021   }
1022}
1023
1024
1025static void
1026blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
1027                                         struct quad_header *quads[],
1028                                         unsigned nr)
1029{
1030   const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1031   static const float one[4] = { 1, 1, 1, 1 };
1032   float one_minus_alpha[TGSI_QUAD_SIZE];
1033   float dest[4][TGSI_QUAD_SIZE];
1034   float source[4][TGSI_QUAD_SIZE];
1035   uint i, j, q;
1036
1037   struct softpipe_cached_tile *tile
1038      = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1039                           quads[0]->input.x0,
1040                           quads[0]->input.y0);
1041
1042   for (q = 0; q < nr; q++) {
1043      struct quad_header *quad = quads[q];
1044      float (*quadColor)[4] = quad->output.color[0];
1045      const float *alpha = quadColor[3];
1046      const int itx = (quad->input.x0 & (TILE_SIZE-1));
1047      const int ity = (quad->input.y0 & (TILE_SIZE-1));
1048
1049      /* get/swizzle dest colors */
1050      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1051         int x = itx + (j & 1);
1052         int y = ity + (j >> 1);
1053         for (i = 0; i < 4; i++) {
1054            dest[i][j] = tile->data.color[y][x][i];
1055         }
1056      }
1057
1058      /* If fixed-point dest color buffer, need to clamp the incoming
1059       * fragment colors now.
1060       */
1061      if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1062         clamp_colors(quadColor);
1063      }
1064
1065      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
1066      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
1067      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
1068      VEC4_MUL(source[3], quadColor[3], alpha); /* A */
1069
1070      VEC4_SUB(one_minus_alpha, one, alpha);
1071      VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
1072      VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
1073      VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
1074      VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
1075
1076      VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
1077      VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
1078      VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
1079      VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
1080
1081      /* If fixed-point dest color buffer, need to clamp the outgoing
1082       * fragment colors now.
1083       */
1084      if (bqs->clamp[0]) {
1085         clamp_colors(quadColor);
1086      }
1087
1088      rebase_colors(bqs->base_format[0], quadColor);
1089
1090      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1091         if (quad->inout.mask & (1 << j)) {
1092            int x = itx + (j & 1);
1093            int y = ity + (j >> 1);
1094            for (i = 0; i < 4; i++) { /* loop over color chans */
1095               tile->data.color[y][x][i] = quadColor[i][j];
1096            }
1097         }
1098      }
1099   }
1100}
1101
1102static void
1103blend_single_add_one_one(struct quad_stage *qs,
1104                         struct quad_header *quads[],
1105                         unsigned nr)
1106{
1107   const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1108   float dest[4][TGSI_QUAD_SIZE];
1109   uint i, j, q;
1110
1111   struct softpipe_cached_tile *tile
1112      = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1113                           quads[0]->input.x0,
1114                           quads[0]->input.y0);
1115
1116   for (q = 0; q < nr; q++) {
1117      struct quad_header *quad = quads[q];
1118      float (*quadColor)[4] = quad->output.color[0];
1119      const int itx = (quad->input.x0 & (TILE_SIZE-1));
1120      const int ity = (quad->input.y0 & (TILE_SIZE-1));
1121
1122      /* get/swizzle dest colors */
1123      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1124         int x = itx + (j & 1);
1125         int y = ity + (j >> 1);
1126         for (i = 0; i < 4; i++) {
1127            dest[i][j] = tile->data.color[y][x][i];
1128         }
1129      }
1130
1131      /* If fixed-point dest color buffer, need to clamp the incoming
1132       * fragment colors now.
1133       */
1134      if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1135         clamp_colors(quadColor);
1136      }
1137
1138      VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
1139      VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
1140      VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
1141      VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
1142
1143      /* If fixed-point dest color buffer, need to clamp the outgoing
1144       * fragment colors now.
1145       */
1146      if (bqs->clamp[0]) {
1147         clamp_colors(quadColor);
1148      }
1149
1150      rebase_colors(bqs->base_format[0], quadColor);
1151
1152      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1153         if (quad->inout.mask & (1 << j)) {
1154            int x = itx + (j & 1);
1155            int y = ity + (j >> 1);
1156            for (i = 0; i < 4; i++) { /* loop over color chans */
1157               tile->data.color[y][x][i] = quadColor[i][j];
1158            }
1159         }
1160      }
1161   }
1162}
1163
1164
1165/**
1166 * Just copy the quad color to the framebuffer tile (respecting the writemask),
1167 * for one color buffer.
1168 * Clamping will be done, if needed (depending on the color buffer's
1169 * datatype) when we write/pack the colors later.
1170 */
1171static void
1172single_output_color(struct quad_stage *qs,
1173                    struct quad_header *quads[],
1174                    unsigned nr)
1175{
1176   const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1177   uint i, j, q;
1178
1179   struct softpipe_cached_tile *tile
1180      = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1181                           quads[0]->input.x0,
1182                           quads[0]->input.y0);
1183
1184   for (q = 0; q < nr; q++) {
1185      struct quad_header *quad = quads[q];
1186      float (*quadColor)[4] = quad->output.color[0];
1187      const int itx = (quad->input.x0 & (TILE_SIZE-1));
1188      const int ity = (quad->input.y0 & (TILE_SIZE-1));
1189
1190      if (qs->softpipe->rasterizer->clamp_fragment_color)
1191         clamp_colors(quadColor);
1192
1193      rebase_colors(bqs->base_format[0], quadColor);
1194
1195      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1196         if (quad->inout.mask & (1 << j)) {
1197            int x = itx + (j & 1);
1198            int y = ity + (j >> 1);
1199            for (i = 0; i < 4; i++) { /* loop over color chans */
1200               tile->data.color[y][x][i] = quadColor[i][j];
1201            }
1202         }
1203      }
1204   }
1205}
1206
1207static void
1208blend_noop(struct quad_stage *qs,
1209           struct quad_header *quads[],
1210           unsigned nr)
1211{
1212}
1213
1214
1215static void
1216choose_blend_quad(struct quad_stage *qs,
1217                  struct quad_header *quads[],
1218                  unsigned nr)
1219{
1220   struct blend_quad_stage *bqs = blend_quad_stage(qs);
1221   struct softpipe_context *softpipe = qs->softpipe;
1222   const struct pipe_blend_state *blend = softpipe->blend;
1223   unsigned i;
1224
1225   qs->run = blend_fallback;
1226
1227   if (softpipe->framebuffer.nr_cbufs == 0) {
1228      qs->run = blend_noop;
1229   }
1230   else if (!softpipe->blend->logicop_enable &&
1231            softpipe->blend->rt[0].colormask == 0xf &&
1232            softpipe->framebuffer.nr_cbufs == 1)
1233   {
1234      if (!blend->rt[0].blend_enable) {
1235         qs->run = single_output_color;
1236      }
1237      else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1238               blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1239               blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1240      {
1241         if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1242            if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1243                blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1244               qs->run = blend_single_add_one_one;
1245            }
1246            else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1247                blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1248               qs->run = blend_single_add_src_alpha_inv_src_alpha;
1249
1250         }
1251      }
1252   }
1253
1254   /* For each color buffer, determine if the buffer has destination alpha and
1255    * whether color clamping is needed.
1256    */
1257   for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
1258      const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format;
1259      const struct util_format_description *desc =
1260         util_format_description(format);
1261      /* assuming all or no color channels are normalized: */
1262      bqs->clamp[i] = desc->channel[0].normalized;
1263      bqs->format_type[i] = desc->channel[0].type;
1264
1265      if (util_format_is_intensity(format))
1266         bqs->base_format[i] = INTENSITY;
1267      else if (util_format_is_luminance(format))
1268         bqs->base_format[i] = LUMINANCE;
1269      else if (util_format_is_luminance_alpha(format))
1270         bqs->base_format[i] = LUMINANCE_ALPHA;
1271      else if (util_format_is_rgb_no_alpha(format))
1272         bqs->base_format[i] = RGB;
1273      else
1274         bqs->base_format[i] = RGBA;
1275   }
1276
1277   qs->run(qs, quads, nr);
1278}
1279
1280
1281static void blend_begin(struct quad_stage *qs)
1282{
1283   qs->run = choose_blend_quad;
1284}
1285
1286
1287static void blend_destroy(struct quad_stage *qs)
1288{
1289   FREE( qs );
1290}
1291
1292
1293struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1294{
1295   struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage);
1296
1297   if (!stage)
1298      return NULL;
1299
1300   stage->base.softpipe = softpipe;
1301   stage->base.begin = blend_begin;
1302   stage->base.run = choose_blend_quad;
1303   stage->base.destroy = blend_destroy;
1304
1305   return &stage->base;
1306}
1307