lp_bld_blend_aos.c revision f4b3430a36ea88707f59ce147a140a1a559378c5
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46#include "pipe/p_state.h"
47#include "util/u_debug.h"
48#include "util/u_format.h"
49
50#include "gallivm/lp_bld_type.h"
51#include "gallivm/lp_bld_const.h"
52#include "gallivm/lp_bld_arit.h"
53#include "gallivm/lp_bld_logic.h"
54#include "gallivm/lp_bld_swizzle.h"
55#include "gallivm/lp_bld_bitarit.h"
56#include "gallivm/lp_bld_debug.h"
57
58#include "lp_bld_blend.h"
59
60
61/**
62 * We may the same values several times, so we keep them here to avoid
63 * recomputing them. Also reusing the values allows us to do simplifications
64 * that LLVM optimization passes wouldn't normally be able to do.
65 */
66struct lp_build_blend_aos_context
67{
68   struct lp_build_context base;
69
70   LLVMValueRef src;
71   LLVMValueRef src_alpha;
72   LLVMValueRef src1;
73   LLVMValueRef src1_alpha;
74   LLVMValueRef dst;
75   LLVMValueRef const_;
76   LLVMValueRef const_alpha;
77
78   LLVMValueRef inv_src;
79   LLVMValueRef inv_src_alpha;
80   LLVMValueRef inv_dst;
81   LLVMValueRef inv_const;
82   LLVMValueRef inv_const_alpha;
83   LLVMValueRef saturate;
84
85   LLVMValueRef rgb_src_factor;
86   LLVMValueRef alpha_src_factor;
87   LLVMValueRef rgb_dst_factor;
88   LLVMValueRef alpha_dst_factor;
89};
90
91
92static LLVMValueRef
93lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
94                                 unsigned factor,
95                                 boolean alpha)
96{
97   LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src;
98   LLVMValueRef src1_alpha = bld->src1_alpha ? bld->src1_alpha : bld->src1;
99   LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_;
100
101   switch (factor) {
102   case PIPE_BLENDFACTOR_ZERO:
103      return bld->base.zero;
104   case PIPE_BLENDFACTOR_ONE:
105      return bld->base.one;
106   case PIPE_BLENDFACTOR_SRC_COLOR:
107      return bld->src;
108   case PIPE_BLENDFACTOR_SRC_ALPHA:
109      return src_alpha;
110   case PIPE_BLENDFACTOR_DST_COLOR:
111   case PIPE_BLENDFACTOR_DST_ALPHA:
112      return bld->dst;
113   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
114      if(alpha)
115         return bld->base.one;
116      else {
117         /*
118          * if there's separate src_alpha there's no dst alpha hence the complement
119          * is zero but for unclamped float inputs min can be non-zero (negative).
120          */
121         if (bld->src_alpha) {
122            if (!bld->saturate)
123               bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero);
124         }
125         else {
126            if(!bld->inv_dst)
127               bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
128            if(!bld->saturate)
129               bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst);
130         }
131         return bld->saturate;
132      }
133   case PIPE_BLENDFACTOR_CONST_COLOR:
134      return bld->const_;
135   case PIPE_BLENDFACTOR_CONST_ALPHA:
136      return const_alpha;
137   case PIPE_BLENDFACTOR_SRC1_COLOR:
138      return bld->src1;
139   case PIPE_BLENDFACTOR_SRC1_ALPHA:
140      return src1_alpha;
141   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
142      if(!bld->inv_src)
143         bld->inv_src = lp_build_comp(&bld->base, bld->src);
144      return bld->inv_src;
145   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
146      if(!bld->inv_src_alpha)
147         bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha);
148      return bld->inv_src_alpha;
149   case PIPE_BLENDFACTOR_INV_DST_COLOR:
150   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
151      if(!bld->inv_dst)
152         bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
153      return bld->inv_dst;
154   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
155      if(!bld->inv_const)
156         bld->inv_const = lp_build_comp(&bld->base, bld->const_);
157      return bld->inv_const;
158   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
159      if(!bld->inv_const_alpha)
160         bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha);
161      return bld->inv_const_alpha;
162   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
163      return lp_build_comp(&bld->base, bld->src1);
164   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
165      return lp_build_comp(&bld->base, src1_alpha);
166   default:
167      assert(0);
168      return bld->base.zero;
169   }
170}
171
172
173enum lp_build_blend_swizzle {
174   LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
175   LP_BUILD_BLEND_SWIZZLE_AAAA = 1
176};
177
178
179/**
180 * How should we shuffle the base factor.
181 */
182static enum lp_build_blend_swizzle
183lp_build_blend_factor_swizzle(unsigned factor)
184{
185   switch (factor) {
186   case PIPE_BLENDFACTOR_ONE:
187   case PIPE_BLENDFACTOR_ZERO:
188   case PIPE_BLENDFACTOR_SRC_COLOR:
189   case PIPE_BLENDFACTOR_DST_COLOR:
190   case PIPE_BLENDFACTOR_CONST_COLOR:
191   case PIPE_BLENDFACTOR_SRC1_COLOR:
192   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
193   case PIPE_BLENDFACTOR_INV_DST_COLOR:
194   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
195   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
196      return LP_BUILD_BLEND_SWIZZLE_RGBA;
197   case PIPE_BLENDFACTOR_SRC_ALPHA:
198   case PIPE_BLENDFACTOR_DST_ALPHA:
199   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
200   case PIPE_BLENDFACTOR_SRC1_ALPHA:
201   case PIPE_BLENDFACTOR_CONST_ALPHA:
202   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
203   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
204   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
205   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
206      return LP_BUILD_BLEND_SWIZZLE_AAAA;
207   default:
208      assert(0);
209      return LP_BUILD_BLEND_SWIZZLE_RGBA;
210   }
211}
212
213
214static LLVMValueRef
215lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
216                       LLVMValueRef rgb,
217                       LLVMValueRef alpha,
218                       enum lp_build_blend_swizzle rgb_swizzle,
219                       unsigned alpha_swizzle,
220                       unsigned num_channels)
221{
222   LLVMValueRef swizzled_rgb;
223
224   switch (rgb_swizzle) {
225   case LP_BUILD_BLEND_SWIZZLE_RGBA:
226      swizzled_rgb = rgb;
227      break;
228   case LP_BUILD_BLEND_SWIZZLE_AAAA:
229      swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels);
230      break;
231   default:
232      assert(0);
233      swizzled_rgb = bld->base.undef;
234   }
235
236   if (rgb != alpha) {
237      swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
238                                         alpha, swizzled_rgb,
239                                         num_channels);
240   }
241
242   return swizzled_rgb;
243}
244
245/**
246 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
247 */
248static LLVMValueRef
249lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
250                      unsigned rgb_factor,
251                      unsigned alpha_factor,
252                      unsigned alpha_swizzle,
253                      unsigned num_channels)
254{
255   LLVMValueRef rgb_factor_, alpha_factor_;
256   enum lp_build_blend_swizzle rgb_swizzle;
257
258   if (alpha_swizzle == UTIL_FORMAT_SWIZZLE_X && num_channels == 1) {
259      return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
260   }
261
262   rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
263
264   if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
265      rgb_swizzle   = lp_build_blend_factor_swizzle(rgb_factor);
266      alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
267      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
268   } else {
269      return rgb_factor_;
270   }
271}
272
273
274/**
275 * Performs blending of src and dst pixels
276 *
277 * @param blend         the blend state of the shader variant
278 * @param cbuf_format   format of the colour buffer
279 * @param type          data type of the pixel vector
280 * @param rt            render target index
281 * @param src           blend src
282 * @param src_alpha     blend src alpha (if not included in src)
283 * @param src1          second blend src (for dual source blend)
284 * @param src1_alpha    second blend src alpha (if not included in src1)
285 * @param dst           blend dst
286 * @param mask          optional mask to apply to the blending result
287 * @param const_        const blend color
288 * @param const_alpha   const blend color alpha (if not included in const_)
289 * @param swizzle       swizzle values for RGBA
290 *
291 * @return the result of blending src and dst
292 */
293LLVMValueRef
294lp_build_blend_aos(struct gallivm_state *gallivm,
295                   const struct pipe_blend_state *blend,
296                   enum pipe_format cbuf_format,
297                   struct lp_type type,
298                   unsigned rt,
299                   LLVMValueRef src,
300                   LLVMValueRef src_alpha,
301                   LLVMValueRef src1,
302                   LLVMValueRef src1_alpha,
303                   LLVMValueRef dst,
304                   LLVMValueRef mask,
305                   LLVMValueRef const_,
306                   LLVMValueRef const_alpha,
307                   const unsigned char swizzle[4],
308                   int nr_channels)
309{
310   const struct pipe_rt_blend_state * state = &blend->rt[rt];
311   const struct util_format_description * desc;
312   struct lp_build_blend_aos_context bld;
313   LLVMValueRef src_factor, dst_factor;
314   LLVMValueRef result;
315   unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
316   unsigned i;
317
318   desc = util_format_description(cbuf_format);
319
320   /* Setup build context */
321   memset(&bld, 0, sizeof bld);
322   lp_build_context_init(&bld.base, gallivm, type);
323   bld.src = src;
324   bld.src1 = src1;
325   bld.dst = dst;
326   bld.const_ = const_;
327   bld.src_alpha = src_alpha;
328   bld.src1_alpha = src1_alpha;
329   bld.const_alpha = const_alpha;
330
331   /* Find the alpha channel if not provided seperately */
332   if (!src_alpha) {
333      for (i = 0; i < 4; ++i) {
334         if (swizzle[i] == 3) {
335            alpha_swizzle = i;
336         }
337      }
338   }
339
340   if (blend->logicop_enable) {
341      if(!type.floating) {
342         result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst);
343      }
344      else {
345         result = src;
346      }
347   } else if (!state->blend_enable) {
348      result = src;
349   } else {
350      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
351
352      src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
353                                         state->alpha_src_factor,
354                                         alpha_swizzle,
355                                         nr_channels);
356
357      dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
358                                         state->alpha_dst_factor,
359                                         alpha_swizzle,
360                                         nr_channels);
361
362      result = lp_build_blend(&bld.base,
363                              state->rgb_func,
364                              state->rgb_src_factor,
365                              state->rgb_dst_factor,
366                              src,
367                              dst,
368                              src_factor,
369                              dst_factor,
370                              rgb_alpha_same,
371                              false);
372
373      if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
374         LLVMValueRef alpha;
375
376         alpha = lp_build_blend(&bld.base,
377                                state->alpha_func,
378                                state->alpha_src_factor,
379                                state->alpha_dst_factor,
380                                src,
381                                dst,
382                                src_factor,
383                                dst_factor,
384                                rgb_alpha_same,
385                                false);
386
387         result = lp_build_blend_swizzle(&bld,
388                                         result,
389                                         alpha,
390                                         LP_BUILD_BLEND_SWIZZLE_RGBA,
391                                         alpha_swizzle,
392                                         nr_channels);
393      }
394   }
395
396   /* Check if color mask is necessary */
397   if (!util_format_colormask_full(desc, state->colormask)) {
398      LLVMValueRef color_mask;
399
400      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
401      lp_build_name(color_mask, "color_mask");
402
403      /* Combine with input mask if necessary */
404      if (mask) {
405         /* We can be blending floating values but masks are always integer... */
406         unsigned floating = bld.base.type.floating;
407         bld.base.type.floating = 0;
408
409         mask = lp_build_and(&bld.base, color_mask, mask);
410
411         bld.base.type.floating = floating;
412      } else {
413         mask = color_mask;
414      }
415   }
416
417   /* Apply mask, if one exists */
418   if (mask) {
419      result = lp_build_select(&bld.base, mask, result, dst);
420   }
421
422   return result;
423}
424