lp_bld_blend_aos.c revision 0b0f4628d6fb8276a9f1c336a785a838b602bca8
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29/** 30 * @file 31 * Blend LLVM IR generation -- AoS layout. 32 * 33 * AoS blending is in general much slower than SoA, but there are some cases 34 * where it might be faster. In particular, if a pixel is rendered only once 35 * then the overhead of tiling and untiling will dominate over the speedup that 36 * SoA gives. So we might want to detect such cases and fallback to AoS in the 37 * future, but for now this function is here for historical/benchmarking 38 * purposes. 39 * 40 * Run lp_blend_test after any change to this file. 41 * 42 * @author Jose Fonseca <jfonseca@vmware.com> 43 */ 44 45 46#include "pipe/p_state.h" 47#include "util/u_debug.h" 48#include "util/u_format.h" 49 50#include "gallivm/lp_bld_type.h" 51#include "gallivm/lp_bld_const.h" 52#include "gallivm/lp_bld_arit.h" 53#include "gallivm/lp_bld_logic.h" 54#include "gallivm/lp_bld_swizzle.h" 55#include "gallivm/lp_bld_debug.h" 56 57#include "lp_bld_blend.h" 58 59 60/** 61 * We may the same values several times, so we keep them here to avoid 62 * recomputing them. Also reusing the values allows us to do simplifications 63 * that LLVM optimization passes wouldn't normally be able to do. 64 */ 65struct lp_build_blend_aos_context 66{ 67 struct lp_build_context base; 68 69 LLVMValueRef src; 70 LLVMValueRef dst; 71 LLVMValueRef const_; 72 73 LLVMValueRef inv_src; 74 LLVMValueRef inv_dst; 75 LLVMValueRef inv_const; 76 LLVMValueRef saturate; 77 78 LLVMValueRef rgb_src_factor; 79 LLVMValueRef alpha_src_factor; 80 LLVMValueRef rgb_dst_factor; 81 LLVMValueRef alpha_dst_factor; 82}; 83 84 85static LLVMValueRef 86lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, 87 unsigned factor, 88 boolean alpha) 89{ 90 switch (factor) { 91 case PIPE_BLENDFACTOR_ZERO: 92 return bld->base.zero; 93 case PIPE_BLENDFACTOR_ONE: 94 return bld->base.one; 95 case PIPE_BLENDFACTOR_SRC_COLOR: 96 case PIPE_BLENDFACTOR_SRC_ALPHA: 97 return bld->src; 98 case PIPE_BLENDFACTOR_DST_COLOR: 99 case PIPE_BLENDFACTOR_DST_ALPHA: 100 return bld->dst; 101 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 102 if(alpha) 103 return bld->base.one; 104 else { 105 if(!bld->inv_dst) 106 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 107 if(!bld->saturate) 108 bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); 109 return bld->saturate; 110 } 111 case PIPE_BLENDFACTOR_CONST_COLOR: 112 case PIPE_BLENDFACTOR_CONST_ALPHA: 113 return bld->const_; 114 case PIPE_BLENDFACTOR_SRC1_COLOR: 115 case PIPE_BLENDFACTOR_SRC1_ALPHA: 116 /* TODO */ 117 assert(0); 118 return bld->base.zero; 119 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 120 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 121 if(!bld->inv_src) 122 bld->inv_src = lp_build_comp(&bld->base, bld->src); 123 return bld->inv_src; 124 case PIPE_BLENDFACTOR_INV_DST_COLOR: 125 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 126 if(!bld->inv_dst) 127 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 128 return bld->inv_dst; 129 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 130 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 131 if(!bld->inv_const) 132 bld->inv_const = lp_build_comp(&bld->base, bld->const_); 133 return bld->inv_const; 134 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 135 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 136 /* TODO */ 137 assert(0); 138 return bld->base.zero; 139 default: 140 assert(0); 141 return bld->base.zero; 142 } 143} 144 145 146enum lp_build_blend_swizzle { 147 LP_BUILD_BLEND_SWIZZLE_RGBA = 0, 148 LP_BUILD_BLEND_SWIZZLE_AAAA = 1 149}; 150 151 152/** 153 * How should we shuffle the base factor. 154 */ 155static enum lp_build_blend_swizzle 156lp_build_blend_factor_swizzle(unsigned factor) 157{ 158 switch (factor) { 159 case PIPE_BLENDFACTOR_ONE: 160 case PIPE_BLENDFACTOR_ZERO: 161 case PIPE_BLENDFACTOR_SRC_COLOR: 162 case PIPE_BLENDFACTOR_DST_COLOR: 163 case PIPE_BLENDFACTOR_CONST_COLOR: 164 case PIPE_BLENDFACTOR_SRC1_COLOR: 165 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 166 case PIPE_BLENDFACTOR_INV_DST_COLOR: 167 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 168 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 169 return LP_BUILD_BLEND_SWIZZLE_RGBA; 170 case PIPE_BLENDFACTOR_SRC_ALPHA: 171 case PIPE_BLENDFACTOR_DST_ALPHA: 172 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 173 case PIPE_BLENDFACTOR_SRC1_ALPHA: 174 case PIPE_BLENDFACTOR_CONST_ALPHA: 175 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 176 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 177 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 178 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 179 return LP_BUILD_BLEND_SWIZZLE_AAAA; 180 default: 181 assert(0); 182 return LP_BUILD_BLEND_SWIZZLE_RGBA; 183 } 184} 185 186 187static LLVMValueRef 188lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, 189 LLVMValueRef rgb, 190 LLVMValueRef alpha, 191 enum lp_build_blend_swizzle rgb_swizzle, 192 unsigned alpha_swizzle) 193{ 194 LLVMValueRef swizzled_rgb; 195 196 switch (rgb_swizzle) { 197 case LP_BUILD_BLEND_SWIZZLE_RGBA: 198 swizzled_rgb = rgb; 199 break; 200 case LP_BUILD_BLEND_SWIZZLE_AAAA: 201 swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle); 202 break; 203 default: 204 assert(0); 205 swizzled_rgb = bld->base.undef; 206 } 207 208 if (rgb != alpha) { 209 swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle, 210 alpha, swizzled_rgb); 211 } 212 213 return swizzled_rgb; 214} 215 216 217/** 218 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml 219 */ 220static LLVMValueRef 221lp_build_blend_factor(struct lp_build_blend_aos_context *bld, 222 LLVMValueRef factor1, 223 unsigned rgb_factor, 224 unsigned alpha_factor, 225 unsigned alpha_swizzle) 226{ 227 LLVMValueRef rgb_factor_; 228 LLVMValueRef alpha_factor_; 229 LLVMValueRef factor2; 230 enum lp_build_blend_swizzle rgb_swizzle; 231 232 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); 233 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); 234 235 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); 236 237 factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); 238 239 return lp_build_mul(&bld->base, factor1, factor2); 240} 241 242 243/** 244 * Is (a OP b) == (b OP a)? 245 */ 246boolean 247lp_build_blend_func_commutative(unsigned func) 248{ 249 switch (func) { 250 case PIPE_BLEND_ADD: 251 case PIPE_BLEND_MIN: 252 case PIPE_BLEND_MAX: 253 return TRUE; 254 case PIPE_BLEND_SUBTRACT: 255 case PIPE_BLEND_REVERSE_SUBTRACT: 256 return FALSE; 257 default: 258 assert(0); 259 return TRUE; 260 } 261} 262 263 264boolean 265lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) 266{ 267 if(rgb_func == alpha_func) 268 return FALSE; 269 if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) 270 return TRUE; 271 if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) 272 return TRUE; 273 return FALSE; 274} 275 276 277/** 278 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml 279 */ 280LLVMValueRef 281lp_build_blend_func(struct lp_build_context *bld, 282 unsigned func, 283 LLVMValueRef term1, 284 LLVMValueRef term2) 285{ 286 switch (func) { 287 case PIPE_BLEND_ADD: 288 return lp_build_add(bld, term1, term2); 289 case PIPE_BLEND_SUBTRACT: 290 return lp_build_sub(bld, term1, term2); 291 case PIPE_BLEND_REVERSE_SUBTRACT: 292 return lp_build_sub(bld, term2, term1); 293 case PIPE_BLEND_MIN: 294 return lp_build_min(bld, term1, term2); 295 case PIPE_BLEND_MAX: 296 return lp_build_max(bld, term1, term2); 297 default: 298 assert(0); 299 return bld->zero; 300 } 301} 302 303 304LLVMValueRef 305lp_build_blend_aos(struct gallivm_state *gallivm, 306 const struct pipe_blend_state *blend, 307 const enum pipe_format *cbuf_format, 308 struct lp_type type, 309 unsigned rt, 310 LLVMValueRef src, 311 LLVMValueRef dst, 312 LLVMValueRef const_, 313 const unsigned char swizzle[4]) 314{ 315 struct lp_build_blend_aos_context bld; 316 LLVMValueRef src_term; 317 LLVMValueRef dst_term; 318 LLVMValueRef result; 319 unsigned alpha_swizzle = swizzle[3]; 320 boolean fullcolormask; 321 322 /* Setup build context */ 323 memset(&bld, 0, sizeof bld); 324 lp_build_context_init(&bld.base, gallivm, type); 325 bld.src = src; 326 bld.dst = dst; 327 bld.const_ = const_; 328 329 if (!blend->rt[rt].blend_enable) { 330 result = src; 331 } else { 332 333 /* TODO: There are still a few optimization opportunities here. For certain 334 * combinations it is possible to reorder the operations and therefore saving 335 * some instructions. */ 336 337 src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor, 338 blend->rt[rt].alpha_src_factor, alpha_swizzle); 339 dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor, 340 blend->rt[rt].alpha_dst_factor, alpha_swizzle); 341 342 lp_build_name(src_term, "src_term"); 343 lp_build_name(dst_term, "dst_term"); 344 345 if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) { 346 result = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); 347 } 348 else { 349 /* Seperate RGB / A functions */ 350 351 LLVMValueRef rgb; 352 LLVMValueRef alpha; 353 354 rgb = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); 355 alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term); 356 357 result = lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); 358 } 359 } 360 361 /* Apply color masking if necessary */ 362 fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), blend->rt[rt].colormask); 363 364 if (!fullcolormask) { 365 LLVMValueRef mask; 366 unsigned mask_swizzle; 367 368 /* Swizzle the color mask to ensure it matches target format */ 369 mask_swizzle = 370 ((blend->rt[rt].colormask & (1 << swizzle[0])) >> swizzle[0]) 371 | (((blend->rt[rt].colormask & (1 << swizzle[1])) >> swizzle[1]) << 1) 372 | (((blend->rt[rt].colormask & (1 << swizzle[2])) >> swizzle[2]) << 2) 373 | (((blend->rt[rt].colormask & (1 << swizzle[3])) >> swizzle[3]) << 3); 374 375 mask = lp_build_const_mask_aos(gallivm, bld.base.type, mask_swizzle); 376 377 result = lp_build_select(&bld.base, mask, result, dst); 378 } 379 380 return result; 381} 382