lp_bld_blend_aos.c revision bd34f61a7a84b039b1c9d4aafbdb8f7656c8dd11
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29/** 30 * @file 31 * Blend LLVM IR generation -- AoS layout. 32 * 33 * AoS blending is in general much slower than SoA, but there are some cases 34 * where it might be faster. In particular, if a pixel is rendered only once 35 * then the overhead of tiling and untiling will dominate over the speedup that 36 * SoA gives. So we might want to detect such cases and fallback to AoS in the 37 * future, but for now this function is here for historical/benchmarking 38 * purposes. 39 * 40 * Run lp_blend_test after any change to this file. 41 * 42 * @author Jose Fonseca <jfonseca@vmware.com> 43 */ 44 45 46#include "pipe/p_state.h" 47#include "util/u_debug.h" 48 49#include "gallivm/lp_bld_type.h" 50#include "gallivm/lp_bld_const.h" 51#include "gallivm/lp_bld_arit.h" 52#include "gallivm/lp_bld_logic.h" 53#include "gallivm/lp_bld_swizzle.h" 54#include "gallivm/lp_bld_debug.h" 55 56#include "lp_bld_blend.h" 57 58 59/** 60 * We may the same values several times, so we keep them here to avoid 61 * recomputing them. Also reusing the values allows us to do simplifications 62 * that LLVM optimization passes wouldn't normally be able to do. 63 */ 64struct lp_build_blend_aos_context 65{ 66 struct lp_build_context base; 67 68 LLVMValueRef src; 69 LLVMValueRef dst; 70 LLVMValueRef const_; 71 72 LLVMValueRef inv_src; 73 LLVMValueRef inv_dst; 74 LLVMValueRef inv_const; 75 LLVMValueRef saturate; 76 77 LLVMValueRef rgb_src_factor; 78 LLVMValueRef alpha_src_factor; 79 LLVMValueRef rgb_dst_factor; 80 LLVMValueRef alpha_dst_factor; 81}; 82 83 84static LLVMValueRef 85lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, 86 unsigned factor, 87 boolean alpha) 88{ 89 switch (factor) { 90 case PIPE_BLENDFACTOR_ZERO: 91 return bld->base.zero; 92 case PIPE_BLENDFACTOR_ONE: 93 return bld->base.one; 94 case PIPE_BLENDFACTOR_SRC_COLOR: 95 case PIPE_BLENDFACTOR_SRC_ALPHA: 96 return bld->src; 97 case PIPE_BLENDFACTOR_DST_COLOR: 98 case PIPE_BLENDFACTOR_DST_ALPHA: 99 return bld->dst; 100 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 101 if(alpha) 102 return bld->base.one; 103 else { 104 if(!bld->inv_dst) 105 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 106 if(!bld->saturate) 107 bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); 108 return bld->saturate; 109 } 110 case PIPE_BLENDFACTOR_CONST_COLOR: 111 case PIPE_BLENDFACTOR_CONST_ALPHA: 112 return bld->const_; 113 case PIPE_BLENDFACTOR_SRC1_COLOR: 114 case PIPE_BLENDFACTOR_SRC1_ALPHA: 115 /* TODO */ 116 assert(0); 117 return bld->base.zero; 118 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 119 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 120 if(!bld->inv_src) 121 bld->inv_src = lp_build_comp(&bld->base, bld->src); 122 return bld->inv_src; 123 case PIPE_BLENDFACTOR_INV_DST_COLOR: 124 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 125 if(!bld->inv_dst) 126 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 127 return bld->inv_dst; 128 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 129 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 130 if(!bld->inv_const) 131 bld->inv_const = lp_build_comp(&bld->base, bld->const_); 132 return bld->inv_const; 133 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 134 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 135 /* TODO */ 136 assert(0); 137 return bld->base.zero; 138 default: 139 assert(0); 140 return bld->base.zero; 141 } 142} 143 144 145enum lp_build_blend_swizzle { 146 LP_BUILD_BLEND_SWIZZLE_RGBA = 0, 147 LP_BUILD_BLEND_SWIZZLE_AAAA = 1 148}; 149 150 151/** 152 * How should we shuffle the base factor. 153 */ 154static enum lp_build_blend_swizzle 155lp_build_blend_factor_swizzle(unsigned factor) 156{ 157 switch (factor) { 158 case PIPE_BLENDFACTOR_ONE: 159 case PIPE_BLENDFACTOR_ZERO: 160 case PIPE_BLENDFACTOR_SRC_COLOR: 161 case PIPE_BLENDFACTOR_DST_COLOR: 162 case PIPE_BLENDFACTOR_CONST_COLOR: 163 case PIPE_BLENDFACTOR_SRC1_COLOR: 164 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 165 case PIPE_BLENDFACTOR_INV_DST_COLOR: 166 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 167 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 168 return LP_BUILD_BLEND_SWIZZLE_RGBA; 169 case PIPE_BLENDFACTOR_SRC_ALPHA: 170 case PIPE_BLENDFACTOR_DST_ALPHA: 171 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 172 case PIPE_BLENDFACTOR_SRC1_ALPHA: 173 case PIPE_BLENDFACTOR_CONST_ALPHA: 174 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 175 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 176 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 177 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 178 return LP_BUILD_BLEND_SWIZZLE_AAAA; 179 default: 180 assert(0); 181 return LP_BUILD_BLEND_SWIZZLE_RGBA; 182 } 183} 184 185 186static LLVMValueRef 187lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, 188 LLVMValueRef rgb, 189 LLVMValueRef alpha, 190 enum lp_build_blend_swizzle rgb_swizzle, 191 unsigned alpha_swizzle) 192{ 193 if(rgb == alpha) { 194 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) 195 return rgb; 196 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) 197 return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); 198 } 199 else { 200 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) { 201 boolean cond[4] = {0, 0, 0, 0}; 202 cond[alpha_swizzle] = 1; 203 return lp_build_select_aos(&bld->base, alpha, rgb, cond); 204 } 205 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { 206 unsigned char swizzle[4]; 207 swizzle[0] = alpha_swizzle; 208 swizzle[1] = alpha_swizzle; 209 swizzle[2] = alpha_swizzle; 210 swizzle[3] = alpha_swizzle; 211 swizzle[alpha_swizzle] += 4; 212 return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle); 213 } 214 } 215 assert(0); 216 return bld->base.undef; 217} 218 219 220/** 221 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml 222 */ 223static LLVMValueRef 224lp_build_blend_factor(struct lp_build_blend_aos_context *bld, 225 LLVMValueRef factor1, 226 unsigned rgb_factor, 227 unsigned alpha_factor, 228 unsigned alpha_swizzle) 229{ 230 LLVMValueRef rgb_factor_; 231 LLVMValueRef alpha_factor_; 232 LLVMValueRef factor2; 233 enum lp_build_blend_swizzle rgb_swizzle; 234 235 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); 236 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); 237 238 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); 239 240 factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); 241 242 return lp_build_mul(&bld->base, factor1, factor2); 243} 244 245 246/** 247 * Is (a OP b) == (b OP a)? 248 */ 249boolean 250lp_build_blend_func_commutative(unsigned func) 251{ 252 switch (func) { 253 case PIPE_BLEND_ADD: 254 case PIPE_BLEND_MIN: 255 case PIPE_BLEND_MAX: 256 return TRUE; 257 case PIPE_BLEND_SUBTRACT: 258 case PIPE_BLEND_REVERSE_SUBTRACT: 259 return FALSE; 260 default: 261 assert(0); 262 return TRUE; 263 } 264} 265 266 267boolean 268lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) 269{ 270 if(rgb_func == alpha_func) 271 return FALSE; 272 if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) 273 return TRUE; 274 if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) 275 return TRUE; 276 return FALSE; 277} 278 279 280/** 281 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml 282 */ 283LLVMValueRef 284lp_build_blend_func(struct lp_build_context *bld, 285 unsigned func, 286 LLVMValueRef term1, 287 LLVMValueRef term2) 288{ 289 switch (func) { 290 case PIPE_BLEND_ADD: 291 return lp_build_add(bld, term1, term2); 292 case PIPE_BLEND_SUBTRACT: 293 return lp_build_sub(bld, term1, term2); 294 case PIPE_BLEND_REVERSE_SUBTRACT: 295 return lp_build_sub(bld, term2, term1); 296 case PIPE_BLEND_MIN: 297 return lp_build_min(bld, term1, term2); 298 case PIPE_BLEND_MAX: 299 return lp_build_max(bld, term1, term2); 300 default: 301 assert(0); 302 return bld->zero; 303 } 304} 305 306 307LLVMValueRef 308lp_build_blend_aos(LLVMBuilderRef builder, 309 const struct pipe_blend_state *blend, 310 struct lp_type type, 311 LLVMValueRef src, 312 LLVMValueRef dst, 313 LLVMValueRef const_, 314 unsigned alpha_swizzle) 315{ 316 struct lp_build_blend_aos_context bld; 317 LLVMValueRef src_term; 318 LLVMValueRef dst_term; 319 320 /* FIXME */ 321 assert(blend->independent_blend_enable == 0); 322 assert(blend->rt[0].colormask == 0xf); 323 324 if(!blend->rt[0].blend_enable) 325 return src; 326 327 /* It makes no sense to blend unless values are normalized */ 328 assert(type.norm); 329 330 /* Setup build context */ 331 memset(&bld, 0, sizeof bld); 332 lp_build_context_init(&bld.base, builder, type); 333 bld.src = src; 334 bld.dst = dst; 335 bld.const_ = const_; 336 337 /* TODO: There are still a few optimization opportunities here. For certain 338 * combinations it is possible to reorder the operations and therefore saving 339 * some instructions. */ 340 341 src_term = lp_build_blend_factor(&bld, src, blend->rt[0].rgb_src_factor, 342 blend->rt[0].alpha_src_factor, alpha_swizzle); 343 dst_term = lp_build_blend_factor(&bld, dst, blend->rt[0].rgb_dst_factor, 344 blend->rt[0].alpha_dst_factor, alpha_swizzle); 345 346 lp_build_name(src_term, "src_term"); 347 lp_build_name(dst_term, "dst_term"); 348 349 if(blend->rt[0].rgb_func == blend->rt[0].alpha_func) { 350 return lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); 351 } 352 else { 353 /* Seperate RGB / A functions */ 354 355 LLVMValueRef rgb; 356 LLVMValueRef alpha; 357 358 rgb = lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); 359 alpha = lp_build_blend_func(&bld.base, blend->rt[0].alpha_func, src_term, dst_term); 360 361 return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); 362 } 363} 364