lp_bld_blend_aos.c revision 38f6f23fcf37247fd709d1c612d08bfa9b124e69
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46#include "pipe/p_state.h"
47#include "util/u_debug.h"
48
49#include "lp_bld_type.h"
50#include "lp_bld_const.h"
51#include "lp_bld_arit.h"
52#include "lp_bld_logic.h"
53#include "lp_bld_swizzle.h"
54#include "lp_bld_blend.h"
55#include "lp_bld_debug.h"
56
57
58/**
59 * We may the same values several times, so we keep them here to avoid
60 * recomputing them. Also reusing the values allows us to do simplifications
61 * that LLVM optimization passes wouldn't normally be able to do.
62 */
63struct lp_build_blend_aos_context
64{
65   struct lp_build_context base;
66
67   LLVMValueRef src;
68   LLVMValueRef dst;
69   LLVMValueRef const_;
70
71   LLVMValueRef inv_src;
72   LLVMValueRef inv_dst;
73   LLVMValueRef inv_const;
74   LLVMValueRef saturate;
75
76   LLVMValueRef rgb_src_factor;
77   LLVMValueRef alpha_src_factor;
78   LLVMValueRef rgb_dst_factor;
79   LLVMValueRef alpha_dst_factor;
80};
81
82
83static LLVMValueRef
84lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
85                                 unsigned factor,
86                                 boolean alpha)
87{
88   switch (factor) {
89   case PIPE_BLENDFACTOR_ZERO:
90      return bld->base.zero;
91   case PIPE_BLENDFACTOR_ONE:
92      return bld->base.one;
93   case PIPE_BLENDFACTOR_SRC_COLOR:
94   case PIPE_BLENDFACTOR_SRC_ALPHA:
95      return bld->src;
96   case PIPE_BLENDFACTOR_DST_COLOR:
97   case PIPE_BLENDFACTOR_DST_ALPHA:
98      return bld->dst;
99   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
100      if(alpha)
101         return bld->base.one;
102      else {
103         if(!bld->inv_dst)
104            bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
105         if(!bld->saturate)
106            bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
107         return bld->saturate;
108      }
109   case PIPE_BLENDFACTOR_CONST_COLOR:
110   case PIPE_BLENDFACTOR_CONST_ALPHA:
111      return bld->const_;
112   case PIPE_BLENDFACTOR_SRC1_COLOR:
113   case PIPE_BLENDFACTOR_SRC1_ALPHA:
114      /* TODO */
115      assert(0);
116      return bld->base.zero;
117   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
118   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
119      if(!bld->inv_src)
120         bld->inv_src = lp_build_comp(&bld->base, bld->src);
121      return bld->inv_src;
122   case PIPE_BLENDFACTOR_INV_DST_COLOR:
123   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
124      if(!bld->inv_dst)
125         bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
126      return bld->inv_dst;
127   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
128   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
129      if(!bld->inv_const)
130         bld->inv_const = lp_build_comp(&bld->base, bld->const_);
131      return bld->inv_const;
132   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
133   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
134      /* TODO */
135      assert(0);
136      return bld->base.zero;
137   default:
138      assert(0);
139      return bld->base.zero;
140   }
141}
142
143
144enum lp_build_blend_swizzle {
145   LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
146   LP_BUILD_BLEND_SWIZZLE_AAAA = 1
147};
148
149
150/**
151 * How should we shuffle the base factor.
152 */
153static enum lp_build_blend_swizzle
154lp_build_blend_factor_swizzle(unsigned factor)
155{
156   switch (factor) {
157   case PIPE_BLENDFACTOR_ONE:
158   case PIPE_BLENDFACTOR_ZERO:
159   case PIPE_BLENDFACTOR_SRC_COLOR:
160   case PIPE_BLENDFACTOR_DST_COLOR:
161   case PIPE_BLENDFACTOR_CONST_COLOR:
162   case PIPE_BLENDFACTOR_SRC1_COLOR:
163   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
164   case PIPE_BLENDFACTOR_INV_DST_COLOR:
165   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
166   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
167      return LP_BUILD_BLEND_SWIZZLE_RGBA;
168   case PIPE_BLENDFACTOR_SRC_ALPHA:
169   case PIPE_BLENDFACTOR_DST_ALPHA:
170   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
171   case PIPE_BLENDFACTOR_SRC1_ALPHA:
172   case PIPE_BLENDFACTOR_CONST_ALPHA:
173   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
174   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
175   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
176   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
177      return LP_BUILD_BLEND_SWIZZLE_AAAA;
178   default:
179      assert(0);
180      return LP_BUILD_BLEND_SWIZZLE_RGBA;
181   }
182}
183
184
185static LLVMValueRef
186lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
187                       LLVMValueRef rgb,
188                       LLVMValueRef alpha,
189                       enum lp_build_blend_swizzle rgb_swizzle,
190                       unsigned alpha_swizzle)
191{
192   if(rgb == alpha) {
193      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
194         return rgb;
195      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
196         return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
197   }
198   else {
199      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
200         boolean cond[4] = {0, 0, 0, 0};
201         cond[alpha_swizzle] = 1;
202         return lp_build_select_aos(&bld->base, alpha, rgb, cond);
203      }
204      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
205         unsigned char swizzle[4];
206         swizzle[0] = alpha_swizzle;
207         swizzle[1] = alpha_swizzle;
208         swizzle[2] = alpha_swizzle;
209         swizzle[3] = alpha_swizzle;
210         swizzle[alpha_swizzle] += 4;
211         return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
212      }
213   }
214   assert(0);
215   return bld->base.undef;
216}
217
218
219/**
220 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
221 */
222static LLVMValueRef
223lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
224                      LLVMValueRef factor1,
225                      unsigned rgb_factor,
226                      unsigned alpha_factor,
227                      unsigned alpha_swizzle)
228{
229   LLVMValueRef rgb_factor_;
230   LLVMValueRef alpha_factor_;
231   LLVMValueRef factor2;
232   enum lp_build_blend_swizzle rgb_swizzle;
233
234   rgb_factor_   = lp_build_blend_factor_unswizzled(bld, rgb_factor,   FALSE);
235   alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
236
237   rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
238
239   factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
240
241   return lp_build_mul(&bld->base, factor1, factor2);
242}
243
244
245boolean
246lp_build_blend_func_commutative(unsigned func)
247{
248   switch (func) {
249   case PIPE_BLEND_ADD:
250   case PIPE_BLEND_MIN:
251   case PIPE_BLEND_MAX:
252      return TRUE;
253   case PIPE_BLEND_SUBTRACT:
254   case PIPE_BLEND_REVERSE_SUBTRACT:
255      return FALSE;
256   default:
257      assert(0);
258      return TRUE;
259   }
260}
261
262
263boolean
264lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
265{
266   if(rgb_func == alpha_func)
267      return FALSE;
268   if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
269      return TRUE;
270   if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
271      return TRUE;
272   return FALSE;
273}
274
275
276/**
277 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
278 */
279LLVMValueRef
280lp_build_blend_func(struct lp_build_context *bld,
281                    unsigned func,
282                    LLVMValueRef term1,
283                    LLVMValueRef term2)
284{
285   switch (func) {
286   case PIPE_BLEND_ADD:
287      return lp_build_add(bld, term1, term2);
288      break;
289   case PIPE_BLEND_SUBTRACT:
290      return lp_build_sub(bld, term1, term2);
291   case PIPE_BLEND_REVERSE_SUBTRACT:
292      return lp_build_sub(bld, term2, term1);
293   case PIPE_BLEND_MIN:
294      return lp_build_min(bld, term1, term2);
295   case PIPE_BLEND_MAX:
296      return lp_build_max(bld, term1, term2);
297   default:
298      assert(0);
299      return bld->zero;
300   }
301}
302
303
304LLVMValueRef
305lp_build_blend_aos(LLVMBuilderRef builder,
306                   const struct pipe_blend_state *blend,
307                   struct lp_type type,
308                   LLVMValueRef src,
309                   LLVMValueRef dst,
310                   LLVMValueRef const_,
311                   unsigned alpha_swizzle)
312{
313   struct lp_build_blend_aos_context bld;
314   LLVMValueRef src_term;
315   LLVMValueRef dst_term;
316
317   /* FIXME */
318   assert(blend->independent_blend_enable == 0);
319   assert(blend->rt[0].colormask == 0xf);
320
321   if(!blend->rt[0].blend_enable)
322      return src;
323
324   /* It makes no sense to blend unless values are normalized */
325   assert(type.norm);
326
327   /* Setup build context */
328   memset(&bld, 0, sizeof bld);
329   lp_build_context_init(&bld.base, builder, type);
330   bld.src = src;
331   bld.dst = dst;
332   bld.const_ = const_;
333
334   /* TODO: There are still a few optimization opportunities here. For certain
335    * combinations it is possible to reorder the operations and therefore saving
336    * some instructions. */
337
338   src_term = lp_build_blend_factor(&bld, src, blend->rt[0].rgb_src_factor,
339                                    blend->rt[0].alpha_src_factor, alpha_swizzle);
340   dst_term = lp_build_blend_factor(&bld, dst, blend->rt[0].rgb_dst_factor,
341                                    blend->rt[0].alpha_dst_factor, alpha_swizzle);
342
343   lp_build_name(src_term, "src_term");
344   lp_build_name(dst_term, "dst_term");
345
346   if(blend->rt[0].rgb_func == blend->rt[0].alpha_func) {
347      return lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term);
348   }
349   else {
350      /* Seperate RGB / A functions */
351
352      LLVMValueRef rgb;
353      LLVMValueRef alpha;
354
355      rgb   = lp_build_blend_func(&bld.base, blend->rt[0].rgb_func,   src_term, dst_term);
356      alpha = lp_build_blend_func(&bld.base, blend->rt[0].alpha_func, src_term, dst_term);
357
358      return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
359   }
360}
361