sp_quad_blend.c revision f79e3518b4e39cd27f679c402e715154f63107f6
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * quad blending
30 * \author Brian Paul
31 */
32
33#include "pipe/p_defines.h"
34#include "util/u_math.h"
35#include "util/u_memory.h"
36#include "util/u_format.h"
37#include "sp_context.h"
38#include "sp_state.h"
39#include "sp_quad.h"
40#include "sp_tile_cache.h"
41#include "sp_quad_pipe.h"
42
43
44#define VEC4_COPY(DST, SRC) \
45do { \
46    DST[0] = SRC[0]; \
47    DST[1] = SRC[1]; \
48    DST[2] = SRC[2]; \
49    DST[3] = SRC[3]; \
50} while(0)
51
52#define VEC4_SCALAR(DST, SRC) \
53do { \
54    DST[0] = SRC; \
55    DST[1] = SRC; \
56    DST[2] = SRC; \
57    DST[3] = SRC; \
58} while(0)
59
60#define VEC4_ADD(R, A, B) \
61do { \
62   R[0] = A[0] + B[0]; \
63   R[1] = A[1] + B[1]; \
64   R[2] = A[2] + B[2]; \
65   R[3] = A[3] + B[3]; \
66} while (0)
67
68#define VEC4_SUB(R, A, B) \
69do { \
70   R[0] = A[0] - B[0]; \
71   R[1] = A[1] - B[1]; \
72   R[2] = A[2] - B[2]; \
73   R[3] = A[3] - B[3]; \
74} while (0)
75
76/** Add and limit result to ceiling of 1.0 */
77#define VEC4_ADD_SAT(R, A, B) \
78do { \
79   R[0] = A[0] + B[0];  if (R[0] > 1.0f) R[0] = 1.0f; \
80   R[1] = A[1] + B[1];  if (R[1] > 1.0f) R[1] = 1.0f; \
81   R[2] = A[2] + B[2];  if (R[2] > 1.0f) R[2] = 1.0f; \
82   R[3] = A[3] + B[3];  if (R[3] > 1.0f) R[3] = 1.0f; \
83} while (0)
84
85/** Subtract and limit result to floor of 0.0 */
86#define VEC4_SUB_SAT(R, A, B) \
87do { \
88   R[0] = A[0] - B[0];  if (R[0] < 0.0f) R[0] = 0.0f; \
89   R[1] = A[1] - B[1];  if (R[1] < 0.0f) R[1] = 0.0f; \
90   R[2] = A[2] - B[2];  if (R[2] < 0.0f) R[2] = 0.0f; \
91   R[3] = A[3] - B[3];  if (R[3] < 0.0f) R[3] = 0.0f; \
92} while (0)
93
94#define VEC4_MUL(R, A, B) \
95do { \
96   R[0] = A[0] * B[0]; \
97   R[1] = A[1] * B[1]; \
98   R[2] = A[2] * B[2]; \
99   R[3] = A[3] * B[3]; \
100} while (0)
101
102#define VEC4_MIN(R, A, B) \
103do { \
104   R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
105   R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
106   R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
107   R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
108} while (0)
109
110#define VEC4_MAX(R, A, B) \
111do { \
112   R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
113   R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
114   R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
115   R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
116} while (0)
117
118
119
120static void
121logicop_quad(struct quad_stage *qs,
122             float (*quadColor)[4],
123             float (*dest)[4])
124{
125   struct softpipe_context *softpipe = qs->softpipe;
126   ubyte src[4][4], dst[4][4], res[4][4];
127   uint *src4 = (uint *) src;
128   uint *dst4 = (uint *) dst;
129   uint *res4 = (uint *) res;
130   uint j;
131
132
133   /* convert to ubyte */
134   for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
135      dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
136      dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
137      dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
138      dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
139
140      src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
141      src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
142      src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
143      src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
144   }
145
146   switch (softpipe->blend->logicop_func) {
147   case PIPE_LOGICOP_CLEAR:
148      for (j = 0; j < 4; j++)
149         res4[j] = 0;
150      break;
151   case PIPE_LOGICOP_NOR:
152      for (j = 0; j < 4; j++)
153         res4[j] = ~(src4[j] | dst4[j]);
154      break;
155   case PIPE_LOGICOP_AND_INVERTED:
156      for (j = 0; j < 4; j++)
157         res4[j] = ~src4[j] & dst4[j];
158      break;
159   case PIPE_LOGICOP_COPY_INVERTED:
160      for (j = 0; j < 4; j++)
161         res4[j] = ~src4[j];
162      break;
163   case PIPE_LOGICOP_AND_REVERSE:
164      for (j = 0; j < 4; j++)
165         res4[j] = src4[j] & ~dst4[j];
166      break;
167   case PIPE_LOGICOP_INVERT:
168      for (j = 0; j < 4; j++)
169         res4[j] = ~dst4[j];
170      break;
171   case PIPE_LOGICOP_XOR:
172      for (j = 0; j < 4; j++)
173         res4[j] = dst4[j] ^ src4[j];
174      break;
175   case PIPE_LOGICOP_NAND:
176      for (j = 0; j < 4; j++)
177         res4[j] = ~(src4[j] & dst4[j]);
178      break;
179   case PIPE_LOGICOP_AND:
180      for (j = 0; j < 4; j++)
181         res4[j] = src4[j] & dst4[j];
182      break;
183   case PIPE_LOGICOP_EQUIV:
184      for (j = 0; j < 4; j++)
185         res4[j] = ~(src4[j] ^ dst4[j]);
186      break;
187   case PIPE_LOGICOP_NOOP:
188      for (j = 0; j < 4; j++)
189         res4[j] = dst4[j];
190      break;
191   case PIPE_LOGICOP_OR_INVERTED:
192      for (j = 0; j < 4; j++)
193         res4[j] = ~src4[j] | dst4[j];
194      break;
195   case PIPE_LOGICOP_COPY:
196      for (j = 0; j < 4; j++)
197         res4[j] = src4[j];
198      break;
199   case PIPE_LOGICOP_OR_REVERSE:
200      for (j = 0; j < 4; j++)
201         res4[j] = src4[j] | ~dst4[j];
202      break;
203   case PIPE_LOGICOP_OR:
204      for (j = 0; j < 4; j++)
205         res4[j] = src4[j] | dst4[j];
206      break;
207   case PIPE_LOGICOP_SET:
208      for (j = 0; j < 4; j++)
209         res4[j] = ~0;
210      break;
211   default:
212      assert(0 && "invalid logicop mode");
213   }
214
215   for (j = 0; j < 4; j++) {
216      quadColor[j][0] = ubyte_to_float(res[j][0]);
217      quadColor[j][1] = ubyte_to_float(res[j][1]);
218      quadColor[j][2] = ubyte_to_float(res[j][2]);
219      quadColor[j][3] = ubyte_to_float(res[j][3]);
220   }
221}
222
223
224
225/**
226 * Do blending for a 2x2 quad for one color buffer.
227 * \param quadColor  the incoming quad colors
228 * \param dest  the destination/framebuffer quad colors
229 * \param blend_index  which set of blending terms to use
230 * \param has_dst_alpha  does the dest color buffer have an alpha channel?
231 */
232static void
233blend_quad(struct quad_stage *qs,
234           float (*quadColor)[4],
235           float (*dest)[4],
236           unsigned blend_index,
237           boolean has_dst_alpha)
238{
239   static const float zero[4] = { 0, 0, 0, 0 };
240   static const float one[4] = { 1, 1, 1, 1 };
241   struct softpipe_context *softpipe = qs->softpipe;
242   float source[4][QUAD_SIZE] = { { 0 } };
243   float blend_dest[4][QUAD_SIZE];
244
245   /*
246    * Compute src/first term RGB
247    */
248   switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
249   case PIPE_BLENDFACTOR_ONE:
250      VEC4_COPY(source[0], quadColor[0]); /* R */
251      VEC4_COPY(source[1], quadColor[1]); /* G */
252      VEC4_COPY(source[2], quadColor[2]); /* B */
253      break;
254   case PIPE_BLENDFACTOR_SRC_COLOR:
255      VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
256      VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
257      VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
258      break;
259   case PIPE_BLENDFACTOR_SRC_ALPHA:
260   {
261      const float *alpha = quadColor[3];
262      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
263      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
264      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
265   }
266   break;
267   case PIPE_BLENDFACTOR_DST_COLOR:
268      VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
269      VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
270      VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
271      break;
272   case PIPE_BLENDFACTOR_DST_ALPHA:
273      if (has_dst_alpha) {
274         const float *alpha = dest[3];
275         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
276         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
277         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
278      }
279      else {
280         VEC4_COPY(source[0], quadColor[0]); /* R */
281         VEC4_COPY(source[1], quadColor[1]); /* G */
282         VEC4_COPY(source[2], quadColor[2]); /* B */
283      }
284      break;
285   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
286      if (has_dst_alpha) {
287         const float *alpha = quadColor[3];
288         float diff[4], temp[4];
289         VEC4_SUB(diff, one, dest[3]);
290         VEC4_MIN(temp, alpha, diff);
291         VEC4_MUL(source[0], quadColor[0], temp); /* R */
292         VEC4_MUL(source[1], quadColor[1], temp); /* G */
293         VEC4_MUL(source[2], quadColor[2], temp); /* B */
294      }
295      else {
296         VEC4_COPY(source[0], zero); /* R */
297         VEC4_COPY(source[1], zero); /* G */
298         VEC4_COPY(source[2], zero); /* B */
299      }
300      break;
301   case PIPE_BLENDFACTOR_CONST_COLOR:
302   {
303      float comp[4];
304      VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
305      VEC4_MUL(source[0], quadColor[0], comp); /* R */
306      VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
307      VEC4_MUL(source[1], quadColor[1], comp); /* G */
308      VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
309      VEC4_MUL(source[2], quadColor[2], comp); /* B */
310   }
311   break;
312   case PIPE_BLENDFACTOR_CONST_ALPHA:
313   {
314      float alpha[4];
315      VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
316      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
317      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
318      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
319   }
320   break;
321   case PIPE_BLENDFACTOR_SRC1_COLOR:
322      assert(0); /* to do */
323      break;
324   case PIPE_BLENDFACTOR_SRC1_ALPHA:
325      assert(0); /* to do */
326      break;
327   case PIPE_BLENDFACTOR_ZERO:
328      VEC4_COPY(source[0], zero); /* R */
329      VEC4_COPY(source[1], zero); /* G */
330      VEC4_COPY(source[2], zero); /* B */
331      break;
332   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
333   {
334      float inv_comp[4];
335      VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
336      VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
337      VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
338      VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
339      VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
340      VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
341   }
342   break;
343   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
344   {
345      float inv_alpha[4];
346      VEC4_SUB(inv_alpha, one, quadColor[3]);
347      VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
348      VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
349      VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
350   }
351   break;
352   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
353      if (has_dst_alpha) {
354         float inv_alpha[4];
355         VEC4_SUB(inv_alpha, one, dest[3]);
356         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
357         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
358         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
359      }
360      else {
361         VEC4_COPY(source[0], zero); /* R */
362         VEC4_COPY(source[1], zero); /* G */
363         VEC4_COPY(source[2], zero); /* B */
364      }
365      break;
366   case PIPE_BLENDFACTOR_INV_DST_COLOR:
367   {
368      float inv_comp[4];
369      VEC4_SUB(inv_comp, one, dest[0]); /* R */
370      VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
371      VEC4_SUB(inv_comp, one, dest[1]); /* G */
372      VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
373      VEC4_SUB(inv_comp, one, dest[2]); /* B */
374      VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
375   }
376   break;
377   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
378   {
379      float inv_comp[4];
380      /* R */
381      VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
382      VEC4_MUL(source[0], quadColor[0], inv_comp);
383      /* G */
384      VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
385      VEC4_MUL(source[1], quadColor[1], inv_comp);
386      /* B */
387      VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
388      VEC4_MUL(source[2], quadColor[2], inv_comp);
389   }
390   break;
391   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
392   {
393      float inv_alpha[4];
394      VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]);
395      VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
396      VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
397      VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
398   }
399   break;
400   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
401      assert(0); /* to do */
402      break;
403   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
404      assert(0); /* to do */
405      break;
406   default:
407      assert(0 && "invalid rgb src factor");
408   }
409
410   /*
411    * Compute src/first term A
412    */
413   switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
414   case PIPE_BLENDFACTOR_ONE:
415      VEC4_COPY(source[3], quadColor[3]); /* A */
416      break;
417   case PIPE_BLENDFACTOR_SRC_COLOR:
418      /* fall-through */
419   case PIPE_BLENDFACTOR_SRC_ALPHA:
420   {
421      const float *alpha = quadColor[3];
422      VEC4_MUL(source[3], quadColor[3], alpha); /* A */
423   }
424   break;
425   case PIPE_BLENDFACTOR_DST_COLOR:
426      /* fall-through */
427   case PIPE_BLENDFACTOR_DST_ALPHA:
428      if (has_dst_alpha)
429         VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
430      else
431         VEC4_COPY(source[3], quadColor[3]); /* A */
432      break;
433   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
434      /* multiply alpha by 1.0 */
435      VEC4_COPY(source[3], quadColor[3]); /* A */
436      break;
437   case PIPE_BLENDFACTOR_CONST_COLOR:
438      /* fall-through */
439   case PIPE_BLENDFACTOR_CONST_ALPHA:
440   {
441      float comp[4];
442      VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
443      VEC4_MUL(source[3], quadColor[3], comp); /* A */
444   }
445   break;
446   case PIPE_BLENDFACTOR_ZERO:
447      VEC4_COPY(source[3], zero); /* A */
448      break;
449   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
450      /* fall-through */
451   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
452   {
453      float inv_alpha[4];
454      VEC4_SUB(inv_alpha, one, quadColor[3]);
455      VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
456   }
457   break;
458   case PIPE_BLENDFACTOR_INV_DST_COLOR:
459      /* fall-through */
460   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
461      if (has_dst_alpha) {
462         float inv_alpha[4];
463         VEC4_SUB(inv_alpha, one, dest[3]);
464         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
465      }
466      else {
467         VEC4_COPY(source[3], zero); /* A */
468      }
469      break;
470   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
471      /* fall-through */
472   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
473   {
474      float inv_comp[4];
475      /* A */
476      VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
477      VEC4_MUL(source[3], quadColor[3], inv_comp);
478   }
479   break;
480   default:
481      assert(0 && "invalid alpha src factor");
482   }
483
484   /* Save the original dest for use in masking */
485   VEC4_COPY(blend_dest[0], dest[0]);
486   VEC4_COPY(blend_dest[1], dest[1]);
487   VEC4_COPY(blend_dest[2], dest[2]);
488   VEC4_COPY(blend_dest[3], dest[3]);
489
490
491   /*
492    * Compute blend_dest/second term RGB
493    */
494   switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
495   case PIPE_BLENDFACTOR_ONE:
496      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
497      break;
498   case PIPE_BLENDFACTOR_SRC_COLOR:
499      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
500      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
501      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
502      break;
503   case PIPE_BLENDFACTOR_SRC_ALPHA:
504      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
505      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
506      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
507      break;
508   case PIPE_BLENDFACTOR_DST_ALPHA:
509      if (has_dst_alpha) {
510         VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
511         VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
512         VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
513      }
514      else {
515         /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
516      }
517      break;
518   case PIPE_BLENDFACTOR_DST_COLOR:
519      VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
520      VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
521      VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
522      break;
523   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
524      if (has_dst_alpha) {
525         const float *alpha = quadColor[3];
526         float diff[4], temp[4];
527         VEC4_SUB(diff, one, blend_dest[3]);
528         VEC4_MIN(temp, alpha, diff);
529         VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */
530         VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */
531         VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */
532      }
533      else {
534         VEC4_COPY(blend_dest[0], zero); /* R */
535         VEC4_COPY(blend_dest[1], zero); /* G */
536         VEC4_COPY(blend_dest[2], zero); /* B */
537      }
538      break;
539   case PIPE_BLENDFACTOR_CONST_COLOR:
540   {
541      float comp[4];
542      VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
543      VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
544      VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
545      VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
546      VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
547      VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
548   }
549   break;
550   case PIPE_BLENDFACTOR_CONST_ALPHA:
551   {
552      float comp[4];
553      VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
554      VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
555      VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
556      VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
557   }
558   break;
559   case PIPE_BLENDFACTOR_ZERO:
560      VEC4_COPY(blend_dest[0], zero); /* R */
561      VEC4_COPY(blend_dest[1], zero); /* G */
562      VEC4_COPY(blend_dest[2], zero); /* B */
563      break;
564   case PIPE_BLENDFACTOR_SRC1_COLOR:
565   case PIPE_BLENDFACTOR_SRC1_ALPHA:
566      /* XXX what are these? */
567      assert(0);
568      break;
569   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
570   {
571      float inv_comp[4];
572      VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
573      VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
574      VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
575      VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
576      VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
577      VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
578   }
579   break;
580   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
581   {
582      float one_minus_alpha[QUAD_SIZE];
583      VEC4_SUB(one_minus_alpha, one, quadColor[3]);
584      VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
585      VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
586      VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
587   }
588   break;
589   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
590      if (has_dst_alpha) {
591         float inv_comp[4];
592         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
593         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
594         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
595         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
596      }
597      else {
598         VEC4_COPY(blend_dest[0], zero); /* R */
599         VEC4_COPY(blend_dest[1], zero); /* G */
600         VEC4_COPY(blend_dest[2], zero); /* B */
601      }
602   break;
603   case PIPE_BLENDFACTOR_INV_DST_COLOR:
604   {
605      float inv_comp[4];
606      VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
607      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
608      VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
609      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
610      VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
611      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
612   }
613   break;
614   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
615   {
616      float inv_comp[4];
617      /* R */
618      VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
619      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
620      /* G */
621      VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
622      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
623      /* B */
624      VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
625      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
626   }
627   break;
628   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
629   {
630      float inv_comp[4];
631      VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
632      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
633      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
634      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
635   }
636   break;
637   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
638   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
639      /* XXX what are these? */
640      assert(0);
641      break;
642   default:
643      assert(0 && "invalid rgb dst factor");
644   }
645
646   /*
647    * Compute blend_dest/second term A
648    */
649   switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
650   case PIPE_BLENDFACTOR_ONE:
651      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
652      break;
653   case PIPE_BLENDFACTOR_SRC_COLOR:
654      /* fall-through */
655   case PIPE_BLENDFACTOR_SRC_ALPHA:
656      VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
657      break;
658   case PIPE_BLENDFACTOR_DST_COLOR:
659      /* fall-through */
660   case PIPE_BLENDFACTOR_DST_ALPHA:
661      if (has_dst_alpha) {
662         VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
663      }
664      else {
665         /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
666      }
667      break;
668   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
669      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
670      break;
671   case PIPE_BLENDFACTOR_CONST_COLOR:
672      /* fall-through */
673   case PIPE_BLENDFACTOR_CONST_ALPHA:
674   {
675      float comp[4];
676      VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
677      VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
678   }
679   break;
680   case PIPE_BLENDFACTOR_ZERO:
681      VEC4_COPY(blend_dest[3], zero); /* A */
682      break;
683   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
684      /* fall-through */
685   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
686   {
687      float one_minus_alpha[QUAD_SIZE];
688      VEC4_SUB(one_minus_alpha, one, quadColor[3]);
689      VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
690   }
691   break;
692   case PIPE_BLENDFACTOR_INV_DST_COLOR:
693      /* fall-through */
694   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
695      if (has_dst_alpha) {
696         float inv_comp[4];
697         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
698         VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
699      }
700      else {
701         VEC4_COPY(blend_dest[3], zero); /* A */
702      }
703      break;
704   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
705      /* fall-through */
706   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
707   {
708      float inv_comp[4];
709      VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
710      VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
711   }
712   break;
713   default:
714      assert(0 && "invalid alpha dst factor");
715   }
716
717   /*
718    * Combine RGB terms
719    */
720   switch (softpipe->blend->rt[blend_index].rgb_func) {
721   case PIPE_BLEND_ADD:
722      VEC4_ADD_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
723      VEC4_ADD_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
724      VEC4_ADD_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
725      break;
726   case PIPE_BLEND_SUBTRACT:
727      VEC4_SUB_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
728      VEC4_SUB_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
729      VEC4_SUB_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
730      break;
731   case PIPE_BLEND_REVERSE_SUBTRACT:
732      VEC4_SUB_SAT(quadColor[0], blend_dest[0], source[0]); /* R */
733      VEC4_SUB_SAT(quadColor[1], blend_dest[1], source[1]); /* G */
734      VEC4_SUB_SAT(quadColor[2], blend_dest[2], source[2]); /* B */
735      break;
736   case PIPE_BLEND_MIN:
737      VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
738      VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
739      VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
740      break;
741   case PIPE_BLEND_MAX:
742      VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
743      VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
744      VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
745      break;
746   default:
747      assert(0 && "invalid rgb blend func");
748   }
749
750   /*
751    * Combine A terms
752    */
753   switch (softpipe->blend->rt[blend_index].alpha_func) {
754   case PIPE_BLEND_ADD:
755      VEC4_ADD_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
756      break;
757   case PIPE_BLEND_SUBTRACT:
758      VEC4_SUB_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
759      break;
760   case PIPE_BLEND_REVERSE_SUBTRACT:
761      VEC4_SUB_SAT(quadColor[3], blend_dest[3], source[3]); /* A */
762      break;
763   case PIPE_BLEND_MIN:
764      VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
765      break;
766   case PIPE_BLEND_MAX:
767      VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
768      break;
769   default:
770      assert(0 && "invalid alpha blend func");
771   }
772}
773
774static void
775colormask_quad(unsigned colormask,
776               float (*quadColor)[4],
777               float (*dest)[4])
778{
779   /* R */
780   if (!(colormask & PIPE_MASK_R))
781      COPY_4V(quadColor[0], dest[0]);
782
783   /* G */
784   if (!(colormask & PIPE_MASK_G))
785      COPY_4V(quadColor[1], dest[1]);
786
787   /* B */
788   if (!(colormask & PIPE_MASK_B))
789      COPY_4V(quadColor[2], dest[2]);
790
791   /* A */
792   if (!(colormask & PIPE_MASK_A))
793      COPY_4V(quadColor[3], dest[3]);
794}
795
796
797static void
798blend_fallback(struct quad_stage *qs,
799               struct quad_header *quads[],
800               unsigned nr)
801{
802   struct softpipe_context *softpipe = qs->softpipe;
803   const struct pipe_blend_state *blend = softpipe->blend;
804   unsigned cbuf;
805   boolean write_all;
806
807   write_all = softpipe->fs_variant->info.color0_writes_all_cbufs;
808
809   for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++)
810   {
811      /* which blend/mask state index to use: */
812      const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
813      float dest[4][QUAD_SIZE];
814      struct softpipe_cached_tile *tile
815         = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
816                              quads[0]->input.x0,
817                              quads[0]->input.y0);
818      boolean has_dst_alpha
819         = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format);
820      uint q, i, j;
821
822      for (q = 0; q < nr; q++) {
823         struct quad_header *quad = quads[q];
824         float (*quadColor)[4];
825         float temp_quad_color[QUAD_SIZE][4];
826         const int itx = (quad->input.x0 & (TILE_SIZE-1));
827         const int ity = (quad->input.y0 & (TILE_SIZE-1));
828
829         if (write_all) {
830            for (j = 0; j < QUAD_SIZE; j++) {
831               for (i = 0; i < 4; i++) {
832                  temp_quad_color[i][j] = quad->output.color[0][i][j];
833               }
834            }
835            quadColor = temp_quad_color;
836         } else {
837            quadColor = quad->output.color[cbuf];
838         }
839
840         /* get/swizzle dest colors
841          */
842         for (j = 0; j < QUAD_SIZE; j++) {
843            int x = itx + (j & 1);
844            int y = ity + (j >> 1);
845            for (i = 0; i < 4; i++) {
846               dest[i][j] = tile->data.color[y][x][i];
847            }
848         }
849
850
851         if (blend->logicop_enable) {
852            logicop_quad( qs, quadColor, dest );
853         }
854         else if (blend->rt[blend_buf].blend_enable) {
855            blend_quad( qs, quadColor, dest, blend_buf, has_dst_alpha );
856         }
857
858         if (blend->rt[blend_buf].colormask != 0xf)
859            colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
860
861         /* Output color values
862          */
863         for (j = 0; j < QUAD_SIZE; j++) {
864            if (quad->inout.mask & (1 << j)) {
865               int x = itx + (j & 1);
866               int y = ity + (j >> 1);
867               for (i = 0; i < 4; i++) { /* loop over color chans */
868                  tile->data.color[y][x][i] = quadColor[i][j];
869               }
870            }
871         }
872      }
873   }
874}
875
876
877static void
878blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
879                                         struct quad_header *quads[],
880                                         unsigned nr)
881{
882   static const float one[4] = { 1, 1, 1, 1 };
883   float one_minus_alpha[QUAD_SIZE];
884   float dest[4][QUAD_SIZE];
885   float source[4][QUAD_SIZE];
886   uint i, j, q;
887
888   struct softpipe_cached_tile *tile
889      = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
890                           quads[0]->input.x0,
891                           quads[0]->input.y0);
892
893   for (q = 0; q < nr; q++) {
894      struct quad_header *quad = quads[q];
895      float (*quadColor)[4] = quad->output.color[0];
896      const float *alpha = quadColor[3];
897      const int itx = (quad->input.x0 & (TILE_SIZE-1));
898      const int ity = (quad->input.y0 & (TILE_SIZE-1));
899
900      /* get/swizzle dest colors */
901      for (j = 0; j < QUAD_SIZE; j++) {
902         int x = itx + (j & 1);
903         int y = ity + (j >> 1);
904         for (i = 0; i < 4; i++) {
905            dest[i][j] = tile->data.color[y][x][i];
906         }
907      }
908
909      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
910      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
911      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
912      VEC4_MUL(source[3], quadColor[3], alpha); /* A */
913
914      VEC4_SUB(one_minus_alpha, one, alpha);
915      VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
916      VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
917      VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
918      VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */
919
920      VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
921      VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
922      VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
923      VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
924
925      for (j = 0; j < QUAD_SIZE; j++) {
926         if (quad->inout.mask & (1 << j)) {
927            int x = itx + (j & 1);
928            int y = ity + (j >> 1);
929            for (i = 0; i < 4; i++) { /* loop over color chans */
930               tile->data.color[y][x][i] = quadColor[i][j];
931            }
932         }
933      }
934   }
935}
936
937static void
938blend_single_add_one_one(struct quad_stage *qs,
939                         struct quad_header *quads[],
940                         unsigned nr)
941{
942   float dest[4][QUAD_SIZE];
943   uint i, j, q;
944
945   struct softpipe_cached_tile *tile
946      = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
947                           quads[0]->input.x0,
948                           quads[0]->input.y0);
949
950   for (q = 0; q < nr; q++) {
951      struct quad_header *quad = quads[q];
952      float (*quadColor)[4] = quad->output.color[0];
953      const int itx = (quad->input.x0 & (TILE_SIZE-1));
954      const int ity = (quad->input.y0 & (TILE_SIZE-1));
955
956      /* get/swizzle dest colors */
957      for (j = 0; j < QUAD_SIZE; j++) {
958         int x = itx + (j & 1);
959         int y = ity + (j >> 1);
960         for (i = 0; i < 4; i++) {
961            dest[i][j] = tile->data.color[y][x][i];
962         }
963      }
964
965      VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */
966      VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */
967      VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */
968      VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */
969
970      for (j = 0; j < QUAD_SIZE; j++) {
971         if (quad->inout.mask & (1 << j)) {
972            int x = itx + (j & 1);
973            int y = ity + (j >> 1);
974            for (i = 0; i < 4; i++) { /* loop over color chans */
975               tile->data.color[y][x][i] = quadColor[i][j];
976            }
977         }
978      }
979   }
980}
981
982
983static void
984single_output_color(struct quad_stage *qs,
985                    struct quad_header *quads[],
986                    unsigned nr)
987{
988   uint i, j, q;
989
990   struct softpipe_cached_tile *tile
991      = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
992                           quads[0]->input.x0,
993                           quads[0]->input.y0);
994
995   for (q = 0; q < nr; q++) {
996      struct quad_header *quad = quads[q];
997      float (*quadColor)[4] = quad->output.color[0];
998      const int itx = (quad->input.x0 & (TILE_SIZE-1));
999      const int ity = (quad->input.y0 & (TILE_SIZE-1));
1000
1001      for (j = 0; j < QUAD_SIZE; j++) {
1002         if (quad->inout.mask & (1 << j)) {
1003            int x = itx + (j & 1);
1004            int y = ity + (j >> 1);
1005            for (i = 0; i < 4; i++) { /* loop over color chans */
1006               tile->data.color[y][x][i] = quadColor[i][j];
1007            }
1008         }
1009      }
1010   }
1011}
1012
1013static void
1014blend_noop(struct quad_stage *qs,
1015           struct quad_header *quads[],
1016           unsigned nr)
1017{
1018}
1019
1020
1021static void
1022choose_blend_quad(struct quad_stage *qs,
1023                  struct quad_header *quads[],
1024                  unsigned nr)
1025{
1026   struct softpipe_context *softpipe = qs->softpipe;
1027   const struct pipe_blend_state *blend = softpipe->blend;
1028
1029   qs->run = blend_fallback;
1030
1031   if (softpipe->framebuffer.nr_cbufs == 0) {
1032      qs->run = blend_noop;
1033   }
1034   else if (!softpipe->blend->logicop_enable &&
1035            softpipe->blend->rt[0].colormask == 0xf &&
1036            softpipe->framebuffer.nr_cbufs == 1)
1037   {
1038      if (!blend->rt[0].blend_enable) {
1039         qs->run = single_output_color;
1040      }
1041      else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1042               blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1043               blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1044      {
1045         if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1046            if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1047                blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1048               qs->run = blend_single_add_one_one;
1049            }
1050            else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1051                blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1052               qs->run = blend_single_add_src_alpha_inv_src_alpha;
1053
1054         }
1055      }
1056   }
1057
1058   qs->run(qs, quads, nr);
1059}
1060
1061
1062static void blend_begin(struct quad_stage *qs)
1063{
1064   qs->run = choose_blend_quad;
1065}
1066
1067
1068static void blend_destroy(struct quad_stage *qs)
1069{
1070   FREE( qs );
1071}
1072
1073
1074struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1075{
1076   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
1077
1078   stage->softpipe = softpipe;
1079   stage->begin = blend_begin;
1080   stage->run = choose_blend_quad;
1081   stage->destroy = blend_destroy;
1082
1083   return stage;
1084}
1085