lp_bld_blend_aos.c revision c23fd547c060c4137eab0f878a1028c5903384eb
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46#include "pipe/p_state.h"
47#include "util/u_debug.h"
48#include "util/u_format.h"
49
50#include "gallivm/lp_bld_type.h"
51#include "gallivm/lp_bld_const.h"
52#include "gallivm/lp_bld_arit.h"
53#include "gallivm/lp_bld_logic.h"
54#include "gallivm/lp_bld_swizzle.h"
55#include "gallivm/lp_bld_bitarit.h"
56#include "gallivm/lp_bld_debug.h"
57
58#include "lp_bld_blend.h"
59
60
61/**
62 * We may the same values several times, so we keep them here to avoid
63 * recomputing them. Also reusing the values allows us to do simplifications
64 * that LLVM optimization passes wouldn't normally be able to do.
65 */
66struct lp_build_blend_aos_context
67{
68   struct lp_build_context base;
69
70   LLVMValueRef src;
71   LLVMValueRef dst;
72   LLVMValueRef const_;
73
74   LLVMValueRef inv_src;
75   LLVMValueRef inv_dst;
76   LLVMValueRef inv_const;
77   LLVMValueRef saturate;
78
79   LLVMValueRef rgb_src_factor;
80   LLVMValueRef alpha_src_factor;
81   LLVMValueRef rgb_dst_factor;
82   LLVMValueRef alpha_dst_factor;
83};
84
85
86static LLVMValueRef
87lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
88                                 unsigned factor,
89                                 boolean alpha)
90{
91   switch (factor) {
92   case PIPE_BLENDFACTOR_ZERO:
93      return bld->base.zero;
94   case PIPE_BLENDFACTOR_ONE:
95      return bld->base.one;
96   case PIPE_BLENDFACTOR_SRC_COLOR:
97   case PIPE_BLENDFACTOR_SRC_ALPHA:
98      return bld->src;
99   case PIPE_BLENDFACTOR_DST_COLOR:
100   case PIPE_BLENDFACTOR_DST_ALPHA:
101      return bld->dst;
102   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
103      if(alpha)
104         return bld->base.one;
105      else {
106         if(!bld->inv_dst)
107            bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
108         if(!bld->saturate)
109            bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
110         return bld->saturate;
111      }
112   case PIPE_BLENDFACTOR_CONST_COLOR:
113   case PIPE_BLENDFACTOR_CONST_ALPHA:
114      return bld->const_;
115   case PIPE_BLENDFACTOR_SRC1_COLOR:
116   case PIPE_BLENDFACTOR_SRC1_ALPHA:
117      /* TODO */
118      assert(0);
119      return bld->base.zero;
120   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
121   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
122      if(!bld->inv_src)
123         bld->inv_src = lp_build_comp(&bld->base, bld->src);
124      return bld->inv_src;
125   case PIPE_BLENDFACTOR_INV_DST_COLOR:
126   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
127      if(!bld->inv_dst)
128         bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
129      return bld->inv_dst;
130   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
131   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
132      if(!bld->inv_const)
133         bld->inv_const = lp_build_comp(&bld->base, bld->const_);
134      return bld->inv_const;
135   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
136   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
137      /* TODO */
138      assert(0);
139      return bld->base.zero;
140   default:
141      assert(0);
142      return bld->base.zero;
143   }
144}
145
146
147enum lp_build_blend_swizzle {
148   LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
149   LP_BUILD_BLEND_SWIZZLE_AAAA = 1
150};
151
152
153/**
154 * How should we shuffle the base factor.
155 */
156static enum lp_build_blend_swizzle
157lp_build_blend_factor_swizzle(unsigned factor)
158{
159   switch (factor) {
160   case PIPE_BLENDFACTOR_ONE:
161   case PIPE_BLENDFACTOR_ZERO:
162   case PIPE_BLENDFACTOR_SRC_COLOR:
163   case PIPE_BLENDFACTOR_DST_COLOR:
164   case PIPE_BLENDFACTOR_CONST_COLOR:
165   case PIPE_BLENDFACTOR_SRC1_COLOR:
166   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
167   case PIPE_BLENDFACTOR_INV_DST_COLOR:
168   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
169   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
170      return LP_BUILD_BLEND_SWIZZLE_RGBA;
171   case PIPE_BLENDFACTOR_SRC_ALPHA:
172   case PIPE_BLENDFACTOR_DST_ALPHA:
173   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
174   case PIPE_BLENDFACTOR_SRC1_ALPHA:
175   case PIPE_BLENDFACTOR_CONST_ALPHA:
176   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
177   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
178   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
179   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
180      return LP_BUILD_BLEND_SWIZZLE_AAAA;
181   default:
182      assert(0);
183      return LP_BUILD_BLEND_SWIZZLE_RGBA;
184   }
185}
186
187
188static LLVMValueRef
189lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
190                       LLVMValueRef rgb,
191                       LLVMValueRef alpha,
192                       enum lp_build_blend_swizzle rgb_swizzle,
193                       unsigned alpha_swizzle)
194{
195   LLVMValueRef swizzled_rgb;
196
197   switch (rgb_swizzle) {
198   case LP_BUILD_BLEND_SWIZZLE_RGBA:
199      swizzled_rgb = rgb;
200      break;
201   case LP_BUILD_BLEND_SWIZZLE_AAAA:
202      swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle);
203      break;
204   default:
205      assert(0);
206      swizzled_rgb = bld->base.undef;
207   }
208
209   if (rgb != alpha) {
210      swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
211                                         alpha, swizzled_rgb);
212   }
213
214   return swizzled_rgb;
215}
216
217
218/**
219 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
220 */
221static LLVMValueRef
222lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
223                      LLVMValueRef factor1,
224                      unsigned rgb_factor,
225                      unsigned alpha_factor,
226                      unsigned alpha_swizzle)
227{
228   LLVMValueRef rgb_factor_;
229   LLVMValueRef alpha_factor_;
230   LLVMValueRef factor2;
231   enum lp_build_blend_swizzle rgb_swizzle;
232
233   rgb_factor_   = lp_build_blend_factor_unswizzled(bld, rgb_factor,   FALSE);
234   alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
235
236   rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
237
238   factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
239
240   return lp_build_mul(&bld->base, factor1, factor2);
241}
242
243
244/**
245 * Is (a OP b) == (b OP a)?
246 */
247boolean
248lp_build_blend_func_commutative(unsigned func)
249{
250   switch (func) {
251   case PIPE_BLEND_ADD:
252   case PIPE_BLEND_MIN:
253   case PIPE_BLEND_MAX:
254      return TRUE;
255   case PIPE_BLEND_SUBTRACT:
256   case PIPE_BLEND_REVERSE_SUBTRACT:
257      return FALSE;
258   default:
259      assert(0);
260      return TRUE;
261   }
262}
263
264
265boolean
266lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
267{
268   if(rgb_func == alpha_func)
269      return FALSE;
270   if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
271      return TRUE;
272   if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
273      return TRUE;
274   return FALSE;
275}
276
277
278/**
279 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
280 */
281LLVMValueRef
282lp_build_blend_func(struct lp_build_context *bld,
283                    unsigned func,
284                    LLVMValueRef term1,
285                    LLVMValueRef term2)
286{
287   switch (func) {
288   case PIPE_BLEND_ADD:
289      return lp_build_add(bld, term1, term2);
290   case PIPE_BLEND_SUBTRACT:
291      return lp_build_sub(bld, term1, term2);
292   case PIPE_BLEND_REVERSE_SUBTRACT:
293      return lp_build_sub(bld, term2, term1);
294   case PIPE_BLEND_MIN:
295      return lp_build_min(bld, term1, term2);
296   case PIPE_BLEND_MAX:
297      return lp_build_max(bld, term1, term2);
298   default:
299      assert(0);
300      return bld->zero;
301   }
302}
303
304
305/**
306 * Performs blending of src and dst pixels
307 *
308 * @param blend         the blend state of the shader variant
309 * @param cbuf_format   format of the colour buffer
310 * @param type          data type of the pixel vector
311 * @param rt            rt number
312 * @param src           blend src
313 * @param dst           blend dst
314 * @param mask          optional mask to apply to the blending result
315 * @param const_        const blend color
316 * @param swizzle       swizzle values for RGBA
317 *
318 * @return the result of blending src and dst
319 */
320LLVMValueRef
321lp_build_blend_aos(struct gallivm_state *gallivm,
322                   const struct pipe_blend_state *blend,
323                   const enum pipe_format *cbuf_format,
324                   struct lp_type type,
325                   unsigned rt,
326                   LLVMValueRef src,
327                   LLVMValueRef dst,
328                   LLVMValueRef mask,
329                   LLVMValueRef const_,
330                   const unsigned char swizzle[4])
331{
332   struct lp_build_blend_aos_context bld;
333   LLVMValueRef src_term;
334   LLVMValueRef dst_term;
335   LLVMValueRef result;
336   unsigned alpha_swizzle = swizzle[3];
337   boolean fullcolormask;
338
339   /* Setup build context */
340   memset(&bld, 0, sizeof bld);
341   lp_build_context_init(&bld.base, gallivm, type);
342   bld.src = src;
343   bld.dst = dst;
344   bld.const_ = const_;
345
346   if (!blend->rt[rt].blend_enable) {
347      result = src;
348   } else {
349
350      /* TODO: There are still a few optimization opportunities here. For certain
351       * combinations it is possible to reorder the operations and therefore saving
352       * some instructions. */
353
354      src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor,
355                                       blend->rt[rt].alpha_src_factor, alpha_swizzle);
356      dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor,
357                                       blend->rt[rt].alpha_dst_factor, alpha_swizzle);
358
359      lp_build_name(src_term, "src_term");
360      lp_build_name(dst_term, "dst_term");
361
362      if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) {
363         result = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term);
364      }
365      else {
366         /* Seperate RGB / A functions */
367
368         LLVMValueRef rgb;
369         LLVMValueRef alpha;
370
371         rgb   = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func,   src_term, dst_term);
372         alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term);
373
374         result = lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
375      }
376   }
377
378   /* Check if color mask is necessary */
379   fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), blend->rt[rt].colormask);
380
381   if (!fullcolormask) {
382      LLVMValueRef color_mask;
383
384      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, blend->rt[rt].colormask, swizzle);
385      lp_build_name(color_mask, "color_mask");
386
387      /* Combine with input mask if necessary */
388      if (mask) {
389         mask = lp_build_and(&bld.base, color_mask, mask);
390      } else {
391         mask = color_mask;
392      }
393   }
394
395   /* Apply mask, if one exists */
396   if (mask) {
397      result = lp_build_select(&bld.base, mask, result, dst);
398   }
399
400   return result;
401}
402