vc4_nir_lower_blend.c revision c30b22c421d2139135519449a68bf3120710a552
1/* 2 * Copyright © 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** 25 * Implements most of the fixed function fragment pipeline in shader code. 26 * 27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops, 28 * or color mask. Instead, you read the current contents of the destination 29 * from the tile buffer after having waited for the scoreboard (which is 30 * handled by vc4_qpu_emit.c), then do math using your output color and that 31 * destination value, and update the output color appropriately. 32 * 33 * Once this pass is done, the color write will either have one component (for 34 * single sample) with packed argb8888, or 4 components with the per-sample 35 * argb8888 result. 36 */ 37 38/** 39 * Lowers fixed-function blending to a load of the destination color and a 40 * series of ALU operations before the store of the output. 41 */ 42#include "util/u_format.h" 43#include "vc4_qir.h" 44#include "compiler/nir/nir_builder.h" 45#include "vc4_context.h" 46 47static bool 48blend_depends_on_dst_color(struct vc4_compile *c) 49{ 50 return (c->fs_key->blend.blend_enable || 51 c->fs_key->blend.colormask != 0xf || 52 c->fs_key->logicop_func != PIPE_LOGICOP_COPY); 53} 54 55/** Emits a load of the previous fragment color from the tile buffer. */ 56static nir_ssa_def * 57vc4_nir_get_dst_color(nir_builder *b, int sample) 58{ 59 nir_intrinsic_instr *load = 60 nir_intrinsic_instr_create(b->shader, 61 nir_intrinsic_load_input); 62 load->num_components = 1; 63 nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample); 64 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 65 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); 66 nir_builder_instr_insert(b, &load->instr); 67 return &load->dest.ssa; 68} 69 70static nir_ssa_def * 71vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb) 72{ 73 nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045)); 74 nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92)); 75 nir_ssa_def *high = nir_fpow(b, 76 nir_fmul(b, 77 nir_fadd(b, srgb, 78 nir_imm_float(b, 0.055)), 79 nir_imm_float(b, 1.0 / 1.055)), 80 nir_imm_float(b, 2.4)); 81 82 return nir_bcsel(b, is_low, low, high); 83} 84 85static nir_ssa_def * 86vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear) 87{ 88 nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308)); 89 nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92)); 90 nir_ssa_def *high = nir_fsub(b, 91 nir_fmul(b, 92 nir_imm_float(b, 1.055), 93 nir_fpow(b, 94 linear, 95 nir_imm_float(b, 0.41666))), 96 nir_imm_float(b, 0.055)); 97 98 return nir_bcsel(b, is_low, low, high); 99} 100 101static nir_ssa_def * 102vc4_blend_channel_f(nir_builder *b, 103 nir_ssa_def **src, 104 nir_ssa_def **dst, 105 unsigned factor, 106 int channel) 107{ 108 switch(factor) { 109 case PIPE_BLENDFACTOR_ONE: 110 return nir_imm_float(b, 1.0); 111 case PIPE_BLENDFACTOR_SRC_COLOR: 112 return src[channel]; 113 case PIPE_BLENDFACTOR_SRC_ALPHA: 114 return src[3]; 115 case PIPE_BLENDFACTOR_DST_ALPHA: 116 return dst[3]; 117 case PIPE_BLENDFACTOR_DST_COLOR: 118 return dst[channel]; 119 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 120 if (channel != 3) { 121 return nir_fmin(b, 122 src[3], 123 nir_fsub(b, 124 nir_imm_float(b, 1.0), 125 dst[3])); 126 } else { 127 return nir_imm_float(b, 1.0); 128 } 129 case PIPE_BLENDFACTOR_CONST_COLOR: 130 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel); 131 case PIPE_BLENDFACTOR_CONST_ALPHA: 132 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W); 133 case PIPE_BLENDFACTOR_ZERO: 134 return nir_imm_float(b, 0.0); 135 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 136 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]); 137 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 138 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]); 139 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 140 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]); 141 case PIPE_BLENDFACTOR_INV_DST_COLOR: 142 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]); 143 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 144 return nir_fsub(b, nir_imm_float(b, 1.0), 145 vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel)); 146 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 147 return nir_fsub(b, nir_imm_float(b, 1.0), 148 vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W)); 149 150 default: 151 case PIPE_BLENDFACTOR_SRC1_COLOR: 152 case PIPE_BLENDFACTOR_SRC1_ALPHA: 153 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 154 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 155 /* Unsupported. */ 156 fprintf(stderr, "Unknown blend factor %d\n", factor); 157 return nir_imm_float(b, 1.0); 158 } 159} 160 161static nir_ssa_def * 162vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1, 163 int chan) 164{ 165 unsigned chan_mask = 0xff << (chan * 8); 166 return nir_ior(b, 167 nir_iand(b, src0, nir_imm_int(b, ~chan_mask)), 168 nir_iand(b, src1, nir_imm_int(b, chan_mask))); 169} 170 171static nir_ssa_def * 172vc4_blend_channel_i(nir_builder *b, 173 nir_ssa_def *src, 174 nir_ssa_def *dst, 175 nir_ssa_def *src_a, 176 nir_ssa_def *dst_a, 177 unsigned factor, 178 int a_chan) 179{ 180 switch (factor) { 181 case PIPE_BLENDFACTOR_ONE: 182 return nir_imm_int(b, ~0); 183 case PIPE_BLENDFACTOR_SRC_COLOR: 184 return src; 185 case PIPE_BLENDFACTOR_SRC_ALPHA: 186 return src_a; 187 case PIPE_BLENDFACTOR_DST_ALPHA: 188 return dst_a; 189 case PIPE_BLENDFACTOR_DST_COLOR: 190 return dst; 191 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 192 return vc4_nir_set_packed_chan(b, 193 nir_umin_4x8(b, 194 src_a, 195 nir_inot(b, dst_a)), 196 nir_imm_int(b, ~0), 197 a_chan); 198 case PIPE_BLENDFACTOR_CONST_COLOR: 199 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA); 200 case PIPE_BLENDFACTOR_CONST_ALPHA: 201 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA); 202 case PIPE_BLENDFACTOR_ZERO: 203 return nir_imm_int(b, 0); 204 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 205 return nir_inot(b, src); 206 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 207 return nir_inot(b, src_a); 208 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 209 return nir_inot(b, dst_a); 210 case PIPE_BLENDFACTOR_INV_DST_COLOR: 211 return nir_inot(b, dst); 212 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 213 return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA)); 214 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 215 return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA)); 216 217 default: 218 case PIPE_BLENDFACTOR_SRC1_COLOR: 219 case PIPE_BLENDFACTOR_SRC1_ALPHA: 220 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 221 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 222 /* Unsupported. */ 223 fprintf(stderr, "Unknown blend factor %d\n", factor); 224 return nir_imm_int(b, ~0); 225 } 226} 227 228static nir_ssa_def * 229vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, 230 unsigned func) 231{ 232 switch (func) { 233 case PIPE_BLEND_ADD: 234 return nir_fadd(b, src, dst); 235 case PIPE_BLEND_SUBTRACT: 236 return nir_fsub(b, src, dst); 237 case PIPE_BLEND_REVERSE_SUBTRACT: 238 return nir_fsub(b, dst, src); 239 case PIPE_BLEND_MIN: 240 return nir_fmin(b, src, dst); 241 case PIPE_BLEND_MAX: 242 return nir_fmax(b, src, dst); 243 244 default: 245 /* Unsupported. */ 246 fprintf(stderr, "Unknown blend func %d\n", func); 247 return src; 248 249 } 250} 251 252static nir_ssa_def * 253vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, 254 unsigned func) 255{ 256 switch (func) { 257 case PIPE_BLEND_ADD: 258 return nir_usadd_4x8(b, src, dst); 259 case PIPE_BLEND_SUBTRACT: 260 return nir_ussub_4x8(b, src, dst); 261 case PIPE_BLEND_REVERSE_SUBTRACT: 262 return nir_ussub_4x8(b, dst, src); 263 case PIPE_BLEND_MIN: 264 return nir_umin_4x8(b, src, dst); 265 case PIPE_BLEND_MAX: 266 return nir_umax_4x8(b, src, dst); 267 268 default: 269 /* Unsupported. */ 270 fprintf(stderr, "Unknown blend func %d\n", func); 271 return src; 272 273 } 274} 275 276static void 277vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, 278 nir_ssa_def **src_color, nir_ssa_def **dst_color) 279{ 280 struct pipe_rt_blend_state *blend = &c->fs_key->blend; 281 282 if (!blend->blend_enable) { 283 for (int i = 0; i < 4; i++) 284 result[i] = src_color[i]; 285 return; 286 } 287 288 /* Clamp the src color to [0, 1]. Dest is already clamped. */ 289 for (int i = 0; i < 4; i++) 290 src_color[i] = nir_fsat(b, src_color[i]); 291 292 nir_ssa_def *src_blend[4], *dst_blend[4]; 293 for (int i = 0; i < 4; i++) { 294 int src_factor = ((i != 3) ? blend->rgb_src_factor : 295 blend->alpha_src_factor); 296 int dst_factor = ((i != 3) ? blend->rgb_dst_factor : 297 blend->alpha_dst_factor); 298 src_blend[i] = nir_fmul(b, src_color[i], 299 vc4_blend_channel_f(b, 300 src_color, dst_color, 301 src_factor, i)); 302 dst_blend[i] = nir_fmul(b, dst_color[i], 303 vc4_blend_channel_f(b, 304 src_color, dst_color, 305 dst_factor, i)); 306 } 307 308 for (int i = 0; i < 4; i++) { 309 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i], 310 ((i != 3) ? blend->rgb_func : 311 blend->alpha_func)); 312 } 313} 314 315static nir_ssa_def * 316vc4_nir_splat(nir_builder *b, nir_ssa_def *src) 317{ 318 nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8))); 319 return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16))); 320} 321 322static nir_ssa_def * 323vc4_do_blending_i(struct vc4_compile *c, nir_builder *b, 324 nir_ssa_def *src_color, nir_ssa_def *dst_color, 325 nir_ssa_def *src_float_a) 326{ 327 struct pipe_rt_blend_state *blend = &c->fs_key->blend; 328 329 if (!blend->blend_enable) 330 return src_color; 331 332 enum pipe_format color_format = c->fs_key->color_format; 333 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); 334 nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff); 335 nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a); 336 nir_ssa_def *dst_a; 337 int alpha_chan; 338 for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) { 339 if (format_swiz[alpha_chan] == 3) 340 break; 341 } 342 if (alpha_chan != 4) { 343 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8); 344 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color, 345 shift), imm_0xff)); 346 } else { 347 dst_a = nir_imm_int(b, ~0); 348 } 349 350 nir_ssa_def *src_factor = vc4_blend_channel_i(b, 351 src_color, dst_color, 352 src_a, dst_a, 353 blend->rgb_src_factor, 354 alpha_chan); 355 nir_ssa_def *dst_factor = vc4_blend_channel_i(b, 356 src_color, dst_color, 357 src_a, dst_a, 358 blend->rgb_dst_factor, 359 alpha_chan); 360 361 if (alpha_chan != 4 && 362 blend->alpha_src_factor != blend->rgb_src_factor) { 363 nir_ssa_def *src_alpha_factor = 364 vc4_blend_channel_i(b, 365 src_color, dst_color, 366 src_a, dst_a, 367 blend->alpha_src_factor, 368 alpha_chan); 369 src_factor = vc4_nir_set_packed_chan(b, src_factor, 370 src_alpha_factor, 371 alpha_chan); 372 } 373 if (alpha_chan != 4 && 374 blend->alpha_dst_factor != blend->rgb_dst_factor) { 375 nir_ssa_def *dst_alpha_factor = 376 vc4_blend_channel_i(b, 377 src_color, dst_color, 378 src_a, dst_a, 379 blend->alpha_dst_factor, 380 alpha_chan); 381 dst_factor = vc4_nir_set_packed_chan(b, dst_factor, 382 dst_alpha_factor, 383 alpha_chan); 384 } 385 nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor); 386 nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor); 387 388 nir_ssa_def *result = 389 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func); 390 if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) { 391 nir_ssa_def *result_a = vc4_blend_func_i(b, 392 src_blend, 393 dst_blend, 394 blend->alpha_func); 395 result = vc4_nir_set_packed_chan(b, result, result_a, 396 alpha_chan); 397 } 398 return result; 399} 400 401static nir_ssa_def * 402vc4_logicop(nir_builder *b, int logicop_func, 403 nir_ssa_def *src, nir_ssa_def *dst) 404{ 405 switch (logicop_func) { 406 case PIPE_LOGICOP_CLEAR: 407 return nir_imm_int(b, 0); 408 case PIPE_LOGICOP_NOR: 409 return nir_inot(b, nir_ior(b, src, dst)); 410 case PIPE_LOGICOP_AND_INVERTED: 411 return nir_iand(b, nir_inot(b, src), dst); 412 case PIPE_LOGICOP_COPY_INVERTED: 413 return nir_inot(b, src); 414 case PIPE_LOGICOP_AND_REVERSE: 415 return nir_iand(b, src, nir_inot(b, dst)); 416 case PIPE_LOGICOP_INVERT: 417 return nir_inot(b, dst); 418 case PIPE_LOGICOP_XOR: 419 return nir_ixor(b, src, dst); 420 case PIPE_LOGICOP_NAND: 421 return nir_inot(b, nir_iand(b, src, dst)); 422 case PIPE_LOGICOP_AND: 423 return nir_iand(b, src, dst); 424 case PIPE_LOGICOP_EQUIV: 425 return nir_inot(b, nir_ixor(b, src, dst)); 426 case PIPE_LOGICOP_NOOP: 427 return dst; 428 case PIPE_LOGICOP_OR_INVERTED: 429 return nir_ior(b, nir_inot(b, src), dst); 430 case PIPE_LOGICOP_OR_REVERSE: 431 return nir_ior(b, src, nir_inot(b, dst)); 432 case PIPE_LOGICOP_OR: 433 return nir_ior(b, src, dst); 434 case PIPE_LOGICOP_SET: 435 return nir_imm_int(b, ~0); 436 default: 437 fprintf(stderr, "Unknown logic op %d\n", logicop_func); 438 /* FALLTHROUGH */ 439 case PIPE_LOGICOP_COPY: 440 return src; 441 } 442} 443 444static nir_ssa_def * 445vc4_nir_pipe_compare_func(nir_builder *b, int func, 446 nir_ssa_def *src0, nir_ssa_def *src1) 447{ 448 switch (func) { 449 default: 450 fprintf(stderr, "Unknown compare func %d\n", func); 451 /* FALLTHROUGH */ 452 case PIPE_FUNC_NEVER: 453 return nir_imm_int(b, 0); 454 case PIPE_FUNC_ALWAYS: 455 return nir_imm_int(b, ~0); 456 case PIPE_FUNC_EQUAL: 457 return nir_feq(b, src0, src1); 458 case PIPE_FUNC_NOTEQUAL: 459 return nir_fne(b, src0, src1); 460 case PIPE_FUNC_GREATER: 461 return nir_flt(b, src1, src0); 462 case PIPE_FUNC_GEQUAL: 463 return nir_fge(b, src0, src1); 464 case PIPE_FUNC_LESS: 465 return nir_flt(b, src0, src1); 466 case PIPE_FUNC_LEQUAL: 467 return nir_fge(b, src1, src0); 468 } 469} 470 471static void 472vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b, 473 nir_ssa_def *alpha) 474{ 475 if (!c->fs_key->alpha_test) 476 return; 477 478 nir_ssa_def *alpha_ref = 479 vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF); 480 nir_ssa_def *condition = 481 vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func, 482 alpha, alpha_ref); 483 484 nir_intrinsic_instr *discard = 485 nir_intrinsic_instr_create(b->shader, 486 nir_intrinsic_discard_if); 487 discard->num_components = 1; 488 discard->src[0] = nir_src_for_ssa(nir_inot(b, condition)); 489 nir_builder_instr_insert(b, &discard->instr); 490} 491 492static nir_ssa_def * 493vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b, 494 nir_ssa_def **colors) 495{ 496 enum pipe_format color_format = c->fs_key->color_format; 497 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); 498 499 nir_ssa_def *swizzled[4]; 500 for (int i = 0; i < 4; i++) { 501 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors, 502 format_swiz[i]); 503 } 504 505 return nir_pack_unorm_4x8(b, 506 nir_vec4(b, 507 swizzled[0], swizzled[1], 508 swizzled[2], swizzled[3])); 509 510} 511 512static nir_ssa_def * 513vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src, 514 int sample) 515{ 516 enum pipe_format color_format = c->fs_key->color_format; 517 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); 518 bool srgb = util_format_is_srgb(color_format); 519 520 /* Pull out the float src/dst color components. */ 521 nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample); 522 nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color); 523 nir_ssa_def *src_color[4], *unpacked_dst_color[4]; 524 for (unsigned i = 0; i < 4; i++) { 525 src_color[i] = nir_channel(b, src, i); 526 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i); 527 } 528 529 if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa) 530 src_color[3] = nir_imm_float(b, 1.0); 531 532 vc4_nir_emit_alpha_test_discard(c, b, src_color[3]); 533 534 nir_ssa_def *packed_color; 535 if (srgb) { 536 /* Unswizzle the destination color. */ 537 nir_ssa_def *dst_color[4]; 538 for (unsigned i = 0; i < 4; i++) { 539 dst_color[i] = vc4_nir_get_swizzled_channel(b, 540 unpacked_dst_color, 541 format_swiz[i]); 542 } 543 544 /* Turn dst color to linear. */ 545 for (int i = 0; i < 3; i++) 546 dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]); 547 548 nir_ssa_def *blend_color[4]; 549 vc4_do_blending_f(c, b, blend_color, src_color, dst_color); 550 551 /* sRGB encode the output color */ 552 for (int i = 0; i < 3; i++) 553 blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]); 554 555 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color); 556 } else { 557 nir_ssa_def *packed_src_color = 558 vc4_nir_swizzle_and_pack(c, b, src_color); 559 560 packed_color = 561 vc4_do_blending_i(c, b, 562 packed_src_color, packed_dst_color, 563 src_color[3]); 564 } 565 566 packed_color = vc4_logicop(b, c->fs_key->logicop_func, 567 packed_color, packed_dst_color); 568 569 /* If the bit isn't set in the color mask, then just return the 570 * original dst color, instead. 571 */ 572 uint32_t colormask = 0xffffffff; 573 for (int i = 0; i < 4; i++) { 574 if (format_swiz[i] < 4 && 575 !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) { 576 colormask &= ~(0xff << (i * 8)); 577 } 578 } 579 580 return nir_ior(b, 581 nir_iand(b, packed_color, 582 nir_imm_int(b, colormask)), 583 nir_iand(b, packed_dst_color, 584 nir_imm_int(b, ~colormask))); 585} 586 587static int 588vc4_nir_next_output_driver_location(nir_shader *s) 589{ 590 int maxloc = -1; 591 592 nir_foreach_variable(var, &s->outputs) 593 maxloc = MAX2(maxloc, (int)var->data.driver_location); 594 595 return maxloc + 1; 596} 597 598static void 599vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b, 600 nir_ssa_def *val) 601{ 602 nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out, 603 glsl_uint_type(), 604 "sample_mask"); 605 sample_mask->data.driver_location = 606 vc4_nir_next_output_driver_location(c->s); 607 sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK; 608 609 nir_intrinsic_instr *intr = 610 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output); 611 intr->num_components = 1; 612 nir_intrinsic_set_base(intr, sample_mask->data.driver_location); 613 614 intr->src[0] = nir_src_for_ssa(val); 615 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); 616 nir_builder_instr_insert(b, &intr->instr); 617} 618 619static void 620vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, 621 nir_intrinsic_instr *intr) 622{ 623 nir_ssa_def *frag_color = intr->src[0].ssa; 624 625 if (c->fs_key->sample_coverage) { 626 nir_intrinsic_instr *load = 627 nir_intrinsic_instr_create(b->shader, 628 nir_intrinsic_load_sample_mask_in); 629 load->num_components = 1; 630 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); 631 nir_builder_instr_insert(b, &load->instr); 632 633 nir_ssa_def *bitmask = &load->dest.ssa; 634 635 vc4_nir_store_sample_mask(c, b, bitmask); 636 } else if (c->fs_key->sample_alpha_to_coverage) { 637 nir_ssa_def *a = nir_channel(b, frag_color, 3); 638 639 /* XXX: We should do a nice dither based on the fragment 640 * coordinate, instead. 641 */ 642 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES); 643 nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples)); 644 nir_ssa_def *bitmask = nir_isub(b, 645 nir_ishl(b, 646 nir_imm_int(b, 1), 647 num_bits), 648 nir_imm_int(b, 1)); 649 vc4_nir_store_sample_mask(c, b, bitmask); 650 } 651 652 /* The TLB color read returns each sample in turn, so if our blending 653 * depends on the destination color, we're going to have to run the 654 * blending function separately for each destination sample value, and 655 * then output the per-sample color using TLB_COLOR_MS. 656 */ 657 nir_ssa_def *blend_output; 658 if (c->fs_key->msaa && blend_depends_on_dst_color(c)) { 659 c->msaa_per_sample_output = true; 660 661 nir_ssa_def *samples[4]; 662 for (int i = 0; i < VC4_MAX_SAMPLES; i++) 663 samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i); 664 blend_output = nir_vec4(b, 665 samples[0], samples[1], 666 samples[2], samples[3]); 667 } else { 668 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0); 669 } 670 671 nir_instr_rewrite_src(&intr->instr, &intr->src[0], 672 nir_src_for_ssa(blend_output)); 673 intr->num_components = blend_output->num_components; 674} 675 676static bool 677vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c) 678{ 679 nir_foreach_instr_safe(instr, block) { 680 if (instr->type != nir_instr_type_intrinsic) 681 continue; 682 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 683 if (intr->intrinsic != nir_intrinsic_store_output) 684 continue; 685 686 nir_variable *output_var = NULL; 687 nir_foreach_variable(var, &c->s->outputs) { 688 if (var->data.driver_location == 689 nir_intrinsic_base(intr)) { 690 output_var = var; 691 break; 692 } 693 } 694 assert(output_var); 695 696 if (output_var->data.location != FRAG_RESULT_COLOR && 697 output_var->data.location != FRAG_RESULT_DATA0) { 698 continue; 699 } 700 701 nir_function_impl *impl = 702 nir_cf_node_get_function(&block->cf_node); 703 nir_builder b; 704 nir_builder_init(&b, impl); 705 b.cursor = nir_before_instr(&intr->instr); 706 vc4_nir_lower_blend_instr(c, &b, intr); 707 } 708 return true; 709} 710 711void 712vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c) 713{ 714 nir_foreach_function(function, s) { 715 if (function->impl) { 716 nir_foreach_block(block, function->impl) { 717 vc4_nir_lower_blend_block(block, c); 718 } 719 720 nir_metadata_preserve(function->impl, 721 nir_metadata_block_index | 722 nir_metadata_dominance); 723 } 724 } 725} 726