lp_bld_blend_aos.c revision c23fd547c060c4137eab0f878a1028c5903384eb
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29/** 30 * @file 31 * Blend LLVM IR generation -- AoS layout. 32 * 33 * AoS blending is in general much slower than SoA, but there are some cases 34 * where it might be faster. In particular, if a pixel is rendered only once 35 * then the overhead of tiling and untiling will dominate over the speedup that 36 * SoA gives. So we might want to detect such cases and fallback to AoS in the 37 * future, but for now this function is here for historical/benchmarking 38 * purposes. 39 * 40 * Run lp_blend_test after any change to this file. 41 * 42 * @author Jose Fonseca <jfonseca@vmware.com> 43 */ 44 45 46#include "pipe/p_state.h" 47#include "util/u_debug.h" 48#include "util/u_format.h" 49 50#include "gallivm/lp_bld_type.h" 51#include "gallivm/lp_bld_const.h" 52#include "gallivm/lp_bld_arit.h" 53#include "gallivm/lp_bld_logic.h" 54#include "gallivm/lp_bld_swizzle.h" 55#include "gallivm/lp_bld_bitarit.h" 56#include "gallivm/lp_bld_debug.h" 57 58#include "lp_bld_blend.h" 59 60 61/** 62 * We may the same values several times, so we keep them here to avoid 63 * recomputing them. Also reusing the values allows us to do simplifications 64 * that LLVM optimization passes wouldn't normally be able to do. 65 */ 66struct lp_build_blend_aos_context 67{ 68 struct lp_build_context base; 69 70 LLVMValueRef src; 71 LLVMValueRef dst; 72 LLVMValueRef const_; 73 74 LLVMValueRef inv_src; 75 LLVMValueRef inv_dst; 76 LLVMValueRef inv_const; 77 LLVMValueRef saturate; 78 79 LLVMValueRef rgb_src_factor; 80 LLVMValueRef alpha_src_factor; 81 LLVMValueRef rgb_dst_factor; 82 LLVMValueRef alpha_dst_factor; 83}; 84 85 86static LLVMValueRef 87lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, 88 unsigned factor, 89 boolean alpha) 90{ 91 switch (factor) { 92 case PIPE_BLENDFACTOR_ZERO: 93 return bld->base.zero; 94 case PIPE_BLENDFACTOR_ONE: 95 return bld->base.one; 96 case PIPE_BLENDFACTOR_SRC_COLOR: 97 case PIPE_BLENDFACTOR_SRC_ALPHA: 98 return bld->src; 99 case PIPE_BLENDFACTOR_DST_COLOR: 100 case PIPE_BLENDFACTOR_DST_ALPHA: 101 return bld->dst; 102 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 103 if(alpha) 104 return bld->base.one; 105 else { 106 if(!bld->inv_dst) 107 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 108 if(!bld->saturate) 109 bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); 110 return bld->saturate; 111 } 112 case PIPE_BLENDFACTOR_CONST_COLOR: 113 case PIPE_BLENDFACTOR_CONST_ALPHA: 114 return bld->const_; 115 case PIPE_BLENDFACTOR_SRC1_COLOR: 116 case PIPE_BLENDFACTOR_SRC1_ALPHA: 117 /* TODO */ 118 assert(0); 119 return bld->base.zero; 120 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 121 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 122 if(!bld->inv_src) 123 bld->inv_src = lp_build_comp(&bld->base, bld->src); 124 return bld->inv_src; 125 case PIPE_BLENDFACTOR_INV_DST_COLOR: 126 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 127 if(!bld->inv_dst) 128 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 129 return bld->inv_dst; 130 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 131 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 132 if(!bld->inv_const) 133 bld->inv_const = lp_build_comp(&bld->base, bld->const_); 134 return bld->inv_const; 135 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 136 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 137 /* TODO */ 138 assert(0); 139 return bld->base.zero; 140 default: 141 assert(0); 142 return bld->base.zero; 143 } 144} 145 146 147enum lp_build_blend_swizzle { 148 LP_BUILD_BLEND_SWIZZLE_RGBA = 0, 149 LP_BUILD_BLEND_SWIZZLE_AAAA = 1 150}; 151 152 153/** 154 * How should we shuffle the base factor. 155 */ 156static enum lp_build_blend_swizzle 157lp_build_blend_factor_swizzle(unsigned factor) 158{ 159 switch (factor) { 160 case PIPE_BLENDFACTOR_ONE: 161 case PIPE_BLENDFACTOR_ZERO: 162 case PIPE_BLENDFACTOR_SRC_COLOR: 163 case PIPE_BLENDFACTOR_DST_COLOR: 164 case PIPE_BLENDFACTOR_CONST_COLOR: 165 case PIPE_BLENDFACTOR_SRC1_COLOR: 166 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 167 case PIPE_BLENDFACTOR_INV_DST_COLOR: 168 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 169 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 170 return LP_BUILD_BLEND_SWIZZLE_RGBA; 171 case PIPE_BLENDFACTOR_SRC_ALPHA: 172 case PIPE_BLENDFACTOR_DST_ALPHA: 173 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 174 case PIPE_BLENDFACTOR_SRC1_ALPHA: 175 case PIPE_BLENDFACTOR_CONST_ALPHA: 176 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 177 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 178 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 179 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 180 return LP_BUILD_BLEND_SWIZZLE_AAAA; 181 default: 182 assert(0); 183 return LP_BUILD_BLEND_SWIZZLE_RGBA; 184 } 185} 186 187 188static LLVMValueRef 189lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, 190 LLVMValueRef rgb, 191 LLVMValueRef alpha, 192 enum lp_build_blend_swizzle rgb_swizzle, 193 unsigned alpha_swizzle) 194{ 195 LLVMValueRef swizzled_rgb; 196 197 switch (rgb_swizzle) { 198 case LP_BUILD_BLEND_SWIZZLE_RGBA: 199 swizzled_rgb = rgb; 200 break; 201 case LP_BUILD_BLEND_SWIZZLE_AAAA: 202 swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle); 203 break; 204 default: 205 assert(0); 206 swizzled_rgb = bld->base.undef; 207 } 208 209 if (rgb != alpha) { 210 swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle, 211 alpha, swizzled_rgb); 212 } 213 214 return swizzled_rgb; 215} 216 217 218/** 219 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml 220 */ 221static LLVMValueRef 222lp_build_blend_factor(struct lp_build_blend_aos_context *bld, 223 LLVMValueRef factor1, 224 unsigned rgb_factor, 225 unsigned alpha_factor, 226 unsigned alpha_swizzle) 227{ 228 LLVMValueRef rgb_factor_; 229 LLVMValueRef alpha_factor_; 230 LLVMValueRef factor2; 231 enum lp_build_blend_swizzle rgb_swizzle; 232 233 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); 234 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); 235 236 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); 237 238 factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); 239 240 return lp_build_mul(&bld->base, factor1, factor2); 241} 242 243 244/** 245 * Is (a OP b) == (b OP a)? 246 */ 247boolean 248lp_build_blend_func_commutative(unsigned func) 249{ 250 switch (func) { 251 case PIPE_BLEND_ADD: 252 case PIPE_BLEND_MIN: 253 case PIPE_BLEND_MAX: 254 return TRUE; 255 case PIPE_BLEND_SUBTRACT: 256 case PIPE_BLEND_REVERSE_SUBTRACT: 257 return FALSE; 258 default: 259 assert(0); 260 return TRUE; 261 } 262} 263 264 265boolean 266lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) 267{ 268 if(rgb_func == alpha_func) 269 return FALSE; 270 if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) 271 return TRUE; 272 if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) 273 return TRUE; 274 return FALSE; 275} 276 277 278/** 279 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml 280 */ 281LLVMValueRef 282lp_build_blend_func(struct lp_build_context *bld, 283 unsigned func, 284 LLVMValueRef term1, 285 LLVMValueRef term2) 286{ 287 switch (func) { 288 case PIPE_BLEND_ADD: 289 return lp_build_add(bld, term1, term2); 290 case PIPE_BLEND_SUBTRACT: 291 return lp_build_sub(bld, term1, term2); 292 case PIPE_BLEND_REVERSE_SUBTRACT: 293 return lp_build_sub(bld, term2, term1); 294 case PIPE_BLEND_MIN: 295 return lp_build_min(bld, term1, term2); 296 case PIPE_BLEND_MAX: 297 return lp_build_max(bld, term1, term2); 298 default: 299 assert(0); 300 return bld->zero; 301 } 302} 303 304 305/** 306 * Performs blending of src and dst pixels 307 * 308 * @param blend the blend state of the shader variant 309 * @param cbuf_format format of the colour buffer 310 * @param type data type of the pixel vector 311 * @param rt rt number 312 * @param src blend src 313 * @param dst blend dst 314 * @param mask optional mask to apply to the blending result 315 * @param const_ const blend color 316 * @param swizzle swizzle values for RGBA 317 * 318 * @return the result of blending src and dst 319 */ 320LLVMValueRef 321lp_build_blend_aos(struct gallivm_state *gallivm, 322 const struct pipe_blend_state *blend, 323 const enum pipe_format *cbuf_format, 324 struct lp_type type, 325 unsigned rt, 326 LLVMValueRef src, 327 LLVMValueRef dst, 328 LLVMValueRef mask, 329 LLVMValueRef const_, 330 const unsigned char swizzle[4]) 331{ 332 struct lp_build_blend_aos_context bld; 333 LLVMValueRef src_term; 334 LLVMValueRef dst_term; 335 LLVMValueRef result; 336 unsigned alpha_swizzle = swizzle[3]; 337 boolean fullcolormask; 338 339 /* Setup build context */ 340 memset(&bld, 0, sizeof bld); 341 lp_build_context_init(&bld.base, gallivm, type); 342 bld.src = src; 343 bld.dst = dst; 344 bld.const_ = const_; 345 346 if (!blend->rt[rt].blend_enable) { 347 result = src; 348 } else { 349 350 /* TODO: There are still a few optimization opportunities here. For certain 351 * combinations it is possible to reorder the operations and therefore saving 352 * some instructions. */ 353 354 src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor, 355 blend->rt[rt].alpha_src_factor, alpha_swizzle); 356 dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor, 357 blend->rt[rt].alpha_dst_factor, alpha_swizzle); 358 359 lp_build_name(src_term, "src_term"); 360 lp_build_name(dst_term, "dst_term"); 361 362 if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) { 363 result = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); 364 } 365 else { 366 /* Seperate RGB / A functions */ 367 368 LLVMValueRef rgb; 369 LLVMValueRef alpha; 370 371 rgb = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); 372 alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term); 373 374 result = lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); 375 } 376 } 377 378 /* Check if color mask is necessary */ 379 fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), blend->rt[rt].colormask); 380 381 if (!fullcolormask) { 382 LLVMValueRef color_mask; 383 384 color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, blend->rt[rt].colormask, swizzle); 385 lp_build_name(color_mask, "color_mask"); 386 387 /* Combine with input mask if necessary */ 388 if (mask) { 389 mask = lp_build_and(&bld.base, color_mask, mask); 390 } else { 391 mask = color_mask; 392 } 393 } 394 395 /* Apply mask, if one exists */ 396 if (mask) { 397 result = lp_build_select(&bld.base, mask, result, dst); 398 } 399 400 return result; 401} 402