lp_bld_blend_aos.c revision f4b3430a36ea88707f59ce147a140a1a559378c5
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29/** 30 * @file 31 * Blend LLVM IR generation -- AoS layout. 32 * 33 * AoS blending is in general much slower than SoA, but there are some cases 34 * where it might be faster. In particular, if a pixel is rendered only once 35 * then the overhead of tiling and untiling will dominate over the speedup that 36 * SoA gives. So we might want to detect such cases and fallback to AoS in the 37 * future, but for now this function is here for historical/benchmarking 38 * purposes. 39 * 40 * Run lp_blend_test after any change to this file. 41 * 42 * @author Jose Fonseca <jfonseca@vmware.com> 43 */ 44 45 46#include "pipe/p_state.h" 47#include "util/u_debug.h" 48#include "util/u_format.h" 49 50#include "gallivm/lp_bld_type.h" 51#include "gallivm/lp_bld_const.h" 52#include "gallivm/lp_bld_arit.h" 53#include "gallivm/lp_bld_logic.h" 54#include "gallivm/lp_bld_swizzle.h" 55#include "gallivm/lp_bld_bitarit.h" 56#include "gallivm/lp_bld_debug.h" 57 58#include "lp_bld_blend.h" 59 60 61/** 62 * We may the same values several times, so we keep them here to avoid 63 * recomputing them. Also reusing the values allows us to do simplifications 64 * that LLVM optimization passes wouldn't normally be able to do. 65 */ 66struct lp_build_blend_aos_context 67{ 68 struct lp_build_context base; 69 70 LLVMValueRef src; 71 LLVMValueRef src_alpha; 72 LLVMValueRef src1; 73 LLVMValueRef src1_alpha; 74 LLVMValueRef dst; 75 LLVMValueRef const_; 76 LLVMValueRef const_alpha; 77 78 LLVMValueRef inv_src; 79 LLVMValueRef inv_src_alpha; 80 LLVMValueRef inv_dst; 81 LLVMValueRef inv_const; 82 LLVMValueRef inv_const_alpha; 83 LLVMValueRef saturate; 84 85 LLVMValueRef rgb_src_factor; 86 LLVMValueRef alpha_src_factor; 87 LLVMValueRef rgb_dst_factor; 88 LLVMValueRef alpha_dst_factor; 89}; 90 91 92static LLVMValueRef 93lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, 94 unsigned factor, 95 boolean alpha) 96{ 97 LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src; 98 LLVMValueRef src1_alpha = bld->src1_alpha ? bld->src1_alpha : bld->src1; 99 LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_; 100 101 switch (factor) { 102 case PIPE_BLENDFACTOR_ZERO: 103 return bld->base.zero; 104 case PIPE_BLENDFACTOR_ONE: 105 return bld->base.one; 106 case PIPE_BLENDFACTOR_SRC_COLOR: 107 return bld->src; 108 case PIPE_BLENDFACTOR_SRC_ALPHA: 109 return src_alpha; 110 case PIPE_BLENDFACTOR_DST_COLOR: 111 case PIPE_BLENDFACTOR_DST_ALPHA: 112 return bld->dst; 113 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 114 if(alpha) 115 return bld->base.one; 116 else { 117 /* 118 * if there's separate src_alpha there's no dst alpha hence the complement 119 * is zero but for unclamped float inputs min can be non-zero (negative). 120 */ 121 if (bld->src_alpha) { 122 if (!bld->saturate) 123 bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero); 124 } 125 else { 126 if(!bld->inv_dst) 127 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 128 if(!bld->saturate) 129 bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst); 130 } 131 return bld->saturate; 132 } 133 case PIPE_BLENDFACTOR_CONST_COLOR: 134 return bld->const_; 135 case PIPE_BLENDFACTOR_CONST_ALPHA: 136 return const_alpha; 137 case PIPE_BLENDFACTOR_SRC1_COLOR: 138 return bld->src1; 139 case PIPE_BLENDFACTOR_SRC1_ALPHA: 140 return src1_alpha; 141 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 142 if(!bld->inv_src) 143 bld->inv_src = lp_build_comp(&bld->base, bld->src); 144 return bld->inv_src; 145 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 146 if(!bld->inv_src_alpha) 147 bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha); 148 return bld->inv_src_alpha; 149 case PIPE_BLENDFACTOR_INV_DST_COLOR: 150 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 151 if(!bld->inv_dst) 152 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 153 return bld->inv_dst; 154 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 155 if(!bld->inv_const) 156 bld->inv_const = lp_build_comp(&bld->base, bld->const_); 157 return bld->inv_const; 158 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 159 if(!bld->inv_const_alpha) 160 bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha); 161 return bld->inv_const_alpha; 162 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 163 return lp_build_comp(&bld->base, bld->src1); 164 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 165 return lp_build_comp(&bld->base, src1_alpha); 166 default: 167 assert(0); 168 return bld->base.zero; 169 } 170} 171 172 173enum lp_build_blend_swizzle { 174 LP_BUILD_BLEND_SWIZZLE_RGBA = 0, 175 LP_BUILD_BLEND_SWIZZLE_AAAA = 1 176}; 177 178 179/** 180 * How should we shuffle the base factor. 181 */ 182static enum lp_build_blend_swizzle 183lp_build_blend_factor_swizzle(unsigned factor) 184{ 185 switch (factor) { 186 case PIPE_BLENDFACTOR_ONE: 187 case PIPE_BLENDFACTOR_ZERO: 188 case PIPE_BLENDFACTOR_SRC_COLOR: 189 case PIPE_BLENDFACTOR_DST_COLOR: 190 case PIPE_BLENDFACTOR_CONST_COLOR: 191 case PIPE_BLENDFACTOR_SRC1_COLOR: 192 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 193 case PIPE_BLENDFACTOR_INV_DST_COLOR: 194 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 195 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 196 return LP_BUILD_BLEND_SWIZZLE_RGBA; 197 case PIPE_BLENDFACTOR_SRC_ALPHA: 198 case PIPE_BLENDFACTOR_DST_ALPHA: 199 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 200 case PIPE_BLENDFACTOR_SRC1_ALPHA: 201 case PIPE_BLENDFACTOR_CONST_ALPHA: 202 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 203 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 204 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 205 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 206 return LP_BUILD_BLEND_SWIZZLE_AAAA; 207 default: 208 assert(0); 209 return LP_BUILD_BLEND_SWIZZLE_RGBA; 210 } 211} 212 213 214static LLVMValueRef 215lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, 216 LLVMValueRef rgb, 217 LLVMValueRef alpha, 218 enum lp_build_blend_swizzle rgb_swizzle, 219 unsigned alpha_swizzle, 220 unsigned num_channels) 221{ 222 LLVMValueRef swizzled_rgb; 223 224 switch (rgb_swizzle) { 225 case LP_BUILD_BLEND_SWIZZLE_RGBA: 226 swizzled_rgb = rgb; 227 break; 228 case LP_BUILD_BLEND_SWIZZLE_AAAA: 229 swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels); 230 break; 231 default: 232 assert(0); 233 swizzled_rgb = bld->base.undef; 234 } 235 236 if (rgb != alpha) { 237 swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle, 238 alpha, swizzled_rgb, 239 num_channels); 240 } 241 242 return swizzled_rgb; 243} 244 245/** 246 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml 247 */ 248static LLVMValueRef 249lp_build_blend_factor(struct lp_build_blend_aos_context *bld, 250 unsigned rgb_factor, 251 unsigned alpha_factor, 252 unsigned alpha_swizzle, 253 unsigned num_channels) 254{ 255 LLVMValueRef rgb_factor_, alpha_factor_; 256 enum lp_build_blend_swizzle rgb_swizzle; 257 258 if (alpha_swizzle == UTIL_FORMAT_SWIZZLE_X && num_channels == 1) { 259 return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); 260 } 261 262 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); 263 264 if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { 265 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); 266 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); 267 return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels); 268 } else { 269 return rgb_factor_; 270 } 271} 272 273 274/** 275 * Performs blending of src and dst pixels 276 * 277 * @param blend the blend state of the shader variant 278 * @param cbuf_format format of the colour buffer 279 * @param type data type of the pixel vector 280 * @param rt render target index 281 * @param src blend src 282 * @param src_alpha blend src alpha (if not included in src) 283 * @param src1 second blend src (for dual source blend) 284 * @param src1_alpha second blend src alpha (if not included in src1) 285 * @param dst blend dst 286 * @param mask optional mask to apply to the blending result 287 * @param const_ const blend color 288 * @param const_alpha const blend color alpha (if not included in const_) 289 * @param swizzle swizzle values for RGBA 290 * 291 * @return the result of blending src and dst 292 */ 293LLVMValueRef 294lp_build_blend_aos(struct gallivm_state *gallivm, 295 const struct pipe_blend_state *blend, 296 enum pipe_format cbuf_format, 297 struct lp_type type, 298 unsigned rt, 299 LLVMValueRef src, 300 LLVMValueRef src_alpha, 301 LLVMValueRef src1, 302 LLVMValueRef src1_alpha, 303 LLVMValueRef dst, 304 LLVMValueRef mask, 305 LLVMValueRef const_, 306 LLVMValueRef const_alpha, 307 const unsigned char swizzle[4], 308 int nr_channels) 309{ 310 const struct pipe_rt_blend_state * state = &blend->rt[rt]; 311 const struct util_format_description * desc; 312 struct lp_build_blend_aos_context bld; 313 LLVMValueRef src_factor, dst_factor; 314 LLVMValueRef result; 315 unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; 316 unsigned i; 317 318 desc = util_format_description(cbuf_format); 319 320 /* Setup build context */ 321 memset(&bld, 0, sizeof bld); 322 lp_build_context_init(&bld.base, gallivm, type); 323 bld.src = src; 324 bld.src1 = src1; 325 bld.dst = dst; 326 bld.const_ = const_; 327 bld.src_alpha = src_alpha; 328 bld.src1_alpha = src1_alpha; 329 bld.const_alpha = const_alpha; 330 331 /* Find the alpha channel if not provided seperately */ 332 if (!src_alpha) { 333 for (i = 0; i < 4; ++i) { 334 if (swizzle[i] == 3) { 335 alpha_swizzle = i; 336 } 337 } 338 } 339 340 if (blend->logicop_enable) { 341 if(!type.floating) { 342 result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst); 343 } 344 else { 345 result = src; 346 } 347 } else if (!state->blend_enable) { 348 result = src; 349 } else { 350 boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1; 351 352 src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, 353 state->alpha_src_factor, 354 alpha_swizzle, 355 nr_channels); 356 357 dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, 358 state->alpha_dst_factor, 359 alpha_swizzle, 360 nr_channels); 361 362 result = lp_build_blend(&bld.base, 363 state->rgb_func, 364 state->rgb_src_factor, 365 state->rgb_dst_factor, 366 src, 367 dst, 368 src_factor, 369 dst_factor, 370 rgb_alpha_same, 371 false); 372 373 if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { 374 LLVMValueRef alpha; 375 376 alpha = lp_build_blend(&bld.base, 377 state->alpha_func, 378 state->alpha_src_factor, 379 state->alpha_dst_factor, 380 src, 381 dst, 382 src_factor, 383 dst_factor, 384 rgb_alpha_same, 385 false); 386 387 result = lp_build_blend_swizzle(&bld, 388 result, 389 alpha, 390 LP_BUILD_BLEND_SWIZZLE_RGBA, 391 alpha_swizzle, 392 nr_channels); 393 } 394 } 395 396 /* Check if color mask is necessary */ 397 if (!util_format_colormask_full(desc, state->colormask)) { 398 LLVMValueRef color_mask; 399 400 color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle); 401 lp_build_name(color_mask, "color_mask"); 402 403 /* Combine with input mask if necessary */ 404 if (mask) { 405 /* We can be blending floating values but masks are always integer... */ 406 unsigned floating = bld.base.type.floating; 407 bld.base.type.floating = 0; 408 409 mask = lp_build_and(&bld.base, color_mask, mask); 410 411 bld.base.type.floating = floating; 412 } else { 413 mask = color_mask; 414 } 415 } 416 417 /* Apply mask, if one exists */ 418 if (mask) { 419 result = lp_build_select(&bld.base, mask, result, dst); 420 } 421 422 return result; 423} 424