lp_bld_blend_aos.c revision 5811ed87d732101ab8cfbd087bc99d8c6c963f30
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46#include "pipe/p_state.h"
47
48#include "lp_bld_type.h"
49#include "lp_bld_const.h"
50#include "lp_bld_arit.h"
51#include "lp_bld_logic.h"
52#include "lp_bld_swizzle.h"
53#include "lp_bld_blend.h"
54#include "lp_bld_debug.h"
55
56
57/**
58 * We may the same values several times, so we keep them here to avoid
59 * recomputing them. Also reusing the values allows us to do simplifications
60 * that LLVM optimization passes wouldn't normally be able to do.
61 */
62struct lp_build_blend_aos_context
63{
64   struct lp_build_context base;
65
66   LLVMValueRef src;
67   LLVMValueRef dst;
68   LLVMValueRef const_;
69
70   LLVMValueRef inv_src;
71   LLVMValueRef inv_dst;
72   LLVMValueRef inv_const;
73   LLVMValueRef saturate;
74
75   LLVMValueRef rgb_src_factor;
76   LLVMValueRef alpha_src_factor;
77   LLVMValueRef rgb_dst_factor;
78   LLVMValueRef alpha_dst_factor;
79};
80
81
82static LLVMValueRef
83lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
84                                 unsigned factor,
85                                 boolean alpha)
86{
87   switch (factor) {
88   case PIPE_BLENDFACTOR_ZERO:
89      return bld->base.zero;
90   case PIPE_BLENDFACTOR_ONE:
91      return bld->base.one;
92   case PIPE_BLENDFACTOR_SRC_COLOR:
93   case PIPE_BLENDFACTOR_SRC_ALPHA:
94      return bld->src;
95   case PIPE_BLENDFACTOR_DST_COLOR:
96   case PIPE_BLENDFACTOR_DST_ALPHA:
97      return bld->dst;
98   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
99      if(alpha)
100         return bld->base.one;
101      else {
102         if(!bld->inv_dst)
103            bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
104         if(!bld->saturate)
105            bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
106         return bld->saturate;
107      }
108   case PIPE_BLENDFACTOR_CONST_COLOR:
109   case PIPE_BLENDFACTOR_CONST_ALPHA:
110      return bld->const_;
111   case PIPE_BLENDFACTOR_SRC1_COLOR:
112   case PIPE_BLENDFACTOR_SRC1_ALPHA:
113      /* TODO */
114      assert(0);
115      return bld->base.zero;
116   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
117   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
118      if(!bld->inv_src)
119         bld->inv_src = lp_build_comp(&bld->base, bld->src);
120      return bld->inv_src;
121   case PIPE_BLENDFACTOR_INV_DST_COLOR:
122   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
123      if(!bld->inv_dst)
124         bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
125      return bld->inv_dst;
126   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
127   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
128      if(!bld->inv_const)
129         bld->inv_const = lp_build_comp(&bld->base, bld->const_);
130      return bld->inv_const;
131   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
132   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
133      /* TODO */
134      assert(0);
135      return bld->base.zero;
136   default:
137      assert(0);
138      return bld->base.zero;
139   }
140}
141
142
143enum lp_build_blend_swizzle {
144   LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
145   LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
146};
147
148
149/**
150 * How should we shuffle the base factor.
151 */
152static enum lp_build_blend_swizzle
153lp_build_blend_factor_swizzle(unsigned factor)
154{
155   switch (factor) {
156   case PIPE_BLENDFACTOR_ONE:
157   case PIPE_BLENDFACTOR_ZERO:
158   case PIPE_BLENDFACTOR_SRC_COLOR:
159   case PIPE_BLENDFACTOR_DST_COLOR:
160   case PIPE_BLENDFACTOR_CONST_COLOR:
161   case PIPE_BLENDFACTOR_SRC1_COLOR:
162   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
163   case PIPE_BLENDFACTOR_INV_DST_COLOR:
164   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
165   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
166      return LP_BUILD_BLEND_SWIZZLE_RGBA;
167   case PIPE_BLENDFACTOR_SRC_ALPHA:
168   case PIPE_BLENDFACTOR_DST_ALPHA:
169   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
170   case PIPE_BLENDFACTOR_SRC1_ALPHA:
171   case PIPE_BLENDFACTOR_CONST_ALPHA:
172   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
173   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
174   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
175   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
176      return LP_BUILD_BLEND_SWIZZLE_AAAA;
177   default:
178      assert(0);
179      return LP_BUILD_BLEND_SWIZZLE_RGBA;
180   }
181}
182
183
184static LLVMValueRef
185lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
186                       LLVMValueRef rgb,
187                       LLVMValueRef alpha,
188                       enum lp_build_blend_swizzle rgb_swizzle,
189                       unsigned alpha_swizzle)
190{
191   if(rgb == alpha) {
192      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
193         return rgb;
194      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
195         return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
196   }
197   else {
198      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
199         boolean cond[4] = {0, 0, 0, 0};
200         cond[alpha_swizzle] = 1;
201         return lp_build_select_aos(&bld->base, alpha, rgb, cond);
202      }
203      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
204         unsigned char swizzle[4];
205         swizzle[0] = alpha_swizzle;
206         swizzle[1] = alpha_swizzle;
207         swizzle[2] = alpha_swizzle;
208         swizzle[3] = alpha_swizzle;
209         swizzle[alpha_swizzle] += 4;
210         return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
211      }
212   }
213   assert(0);
214   return bld->base.undef;
215}
216
217
218/**
219 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
220 */
221static LLVMValueRef
222lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
223                      LLVMValueRef factor1,
224                      unsigned rgb_factor,
225                      unsigned alpha_factor,
226                      unsigned alpha_swizzle)
227{
228   LLVMValueRef rgb_factor_;
229   LLVMValueRef alpha_factor_;
230   LLVMValueRef factor2;
231   enum lp_build_blend_swizzle rgb_swizzle;
232
233   rgb_factor_   = lp_build_blend_factor_unswizzled(bld, rgb_factor,   FALSE);
234   alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
235
236   rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
237
238   factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
239
240   return lp_build_mul(&bld->base, factor1, factor2);
241}
242
243
244boolean
245lp_build_blend_func_commutative(unsigned func)
246{
247   switch (func) {
248   case PIPE_BLEND_ADD:
249   case PIPE_BLEND_MIN:
250   case PIPE_BLEND_MAX:
251      return TRUE;
252   case PIPE_BLEND_SUBTRACT:
253   case PIPE_BLEND_REVERSE_SUBTRACT:
254      return FALSE;
255   default:
256      assert(0);
257      return TRUE;
258   }
259}
260
261
262boolean
263lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
264{
265   if(rgb_func == alpha_func)
266      return FALSE;
267   if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
268      return TRUE;
269   if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
270      return TRUE;
271   return FALSE;
272}
273
274
275/**
276 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
277 */
278LLVMValueRef
279lp_build_blend_func(struct lp_build_context *bld,
280                    unsigned func,
281                    LLVMValueRef term1,
282                    LLVMValueRef term2)
283{
284   switch (func) {
285   case PIPE_BLEND_ADD:
286      return lp_build_add(bld, term1, term2);
287      break;
288   case PIPE_BLEND_SUBTRACT:
289      return lp_build_sub(bld, term1, term2);
290   case PIPE_BLEND_REVERSE_SUBTRACT:
291      return lp_build_sub(bld, term2, term1);
292   case PIPE_BLEND_MIN:
293      return lp_build_min(bld, term1, term2);
294   case PIPE_BLEND_MAX:
295      return lp_build_max(bld, term1, term2);
296   default:
297      assert(0);
298      return bld->zero;
299   }
300}
301
302
303LLVMValueRef
304lp_build_blend_aos(LLVMBuilderRef builder,
305                   const struct pipe_blend_state *blend,
306                   union lp_type type,
307                   LLVMValueRef src,
308                   LLVMValueRef dst,
309                   LLVMValueRef const_,
310                   unsigned alpha_swizzle)
311{
312   struct lp_build_blend_aos_context bld;
313   LLVMValueRef src_term;
314   LLVMValueRef dst_term;
315
316   /* FIXME */
317   assert(blend->colormask == 0xf);
318
319   if(!blend->blend_enable)
320      return src;
321
322   /* It makes no sense to blend unless values are normalized */
323   assert(type.norm);
324
325   /* Setup build context */
326   memset(&bld, 0, sizeof bld);
327   lp_build_context_init(&bld.base, builder, type);
328   bld.src = src;
329   bld.dst = dst;
330   bld.const_ = const_;
331
332   /* TODO: There are still a few optimization opportunities here. For certain
333    * combinations it is possible to reorder the operations and therefore saving
334    * some instructions. */
335
336   src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
337   dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
338
339   lp_build_name(src_term, "src_term");
340   lp_build_name(dst_term, "dst_term");
341
342   if(blend->rgb_func == blend->alpha_func) {
343      return lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term);
344   }
345   else {
346      /* Seperate RGB / A functions */
347
348      LLVMValueRef rgb;
349      LLVMValueRef alpha;
350
351      rgb   = lp_build_blend_func(&bld.base, blend->rgb_func,   src_term, dst_term);
352      alpha = lp_build_blend_func(&bld.base, blend->alpha_func, src_term, dst_term);
353
354      return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
355   }
356}
357