1/* 2 * Copyright © 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** 25 * Implements most of the fixed function fragment pipeline in shader code. 26 * 27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops, 28 * or color mask. Instead, you read the current contents of the destination 29 * from the tile buffer after having waited for the scoreboard (which is 30 * handled by vc4_qpu_emit.c), then do math using your output color and that 31 * destination value, and update the output color appropriately. 32 * 33 * Once this pass is done, the color write will either have one component (for 34 * single sample) with packed argb8888, or 4 components with the per-sample 35 * argb8888 result. 36 */ 37 38/** 39 * Lowers fixed-function blending to a load of the destination color and a 40 * series of ALU operations before the store of the output. 41 */ 42#include "util/u_format.h" 43#include "vc4_qir.h" 44#include "compiler/nir/nir_builder.h" 45#include "vc4_context.h" 46 47static bool 48blend_depends_on_dst_color(struct vc4_compile *c) 49{ 50 return (c->fs_key->blend.blend_enable || 51 c->fs_key->blend.colormask != 0xf || 52 c->fs_key->logicop_func != PIPE_LOGICOP_COPY); 53} 54 55/** Emits a load of the previous fragment color from the tile buffer. */ 56static nir_ssa_def * 57vc4_nir_get_dst_color(nir_builder *b, int sample) 58{ 59 nir_intrinsic_instr *load = 60 nir_intrinsic_instr_create(b->shader, 61 nir_intrinsic_load_input); 62 load->num_components = 1; 63 nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample); 64 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 65 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); 66 nir_builder_instr_insert(b, &load->instr); 67 return &load->dest.ssa; 68} 69 70static nir_ssa_def * 71vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb) 72{ 73 nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045)); 74 nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92)); 75 nir_ssa_def *high = nir_fpow(b, 76 nir_fmul(b, 77 nir_fadd(b, srgb, 78 nir_imm_float(b, 0.055)), 79 nir_imm_float(b, 1.0 / 1.055)), 80 nir_imm_float(b, 2.4)); 81 82 return nir_bcsel(b, is_low, low, high); 83} 84 85static nir_ssa_def * 86vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear) 87{ 88 nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308)); 89 nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92)); 90 nir_ssa_def *high = nir_fsub(b, 91 nir_fmul(b, 92 nir_imm_float(b, 1.055), 93 nir_fpow(b, 94 linear, 95 nir_imm_float(b, 0.41666))), 96 nir_imm_float(b, 0.055)); 97 98 return nir_bcsel(b, is_low, low, high); 99} 100 101static nir_ssa_def * 102vc4_blend_channel_f(nir_builder *b, 103 nir_ssa_def **src, 104 nir_ssa_def **dst, 105 unsigned factor, 106 int channel) 107{ 108 switch(factor) { 109 case PIPE_BLENDFACTOR_ONE: 110 return nir_imm_float(b, 1.0); 111 case PIPE_BLENDFACTOR_SRC_COLOR: 112 return src[channel]; 113 case PIPE_BLENDFACTOR_SRC_ALPHA: 114 return src[3]; 115 case PIPE_BLENDFACTOR_DST_ALPHA: 116 return dst[3]; 117 case PIPE_BLENDFACTOR_DST_COLOR: 118 return dst[channel]; 119 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 120 if (channel != 3) { 121 return nir_fmin(b, 122 src[3], 123 nir_fsub(b, 124 nir_imm_float(b, 1.0), 125 dst[3])); 126 } else { 127 return nir_imm_float(b, 1.0); 128 } 129 case PIPE_BLENDFACTOR_CONST_COLOR: 130 return nir_load_system_value(b, 131 nir_intrinsic_load_blend_const_color_r_float + 132 channel, 133 0); 134 case PIPE_BLENDFACTOR_CONST_ALPHA: 135 return nir_load_blend_const_color_a_float(b); 136 case PIPE_BLENDFACTOR_ZERO: 137 return nir_imm_float(b, 0.0); 138 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 139 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]); 140 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 141 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]); 142 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 143 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]); 144 case PIPE_BLENDFACTOR_INV_DST_COLOR: 145 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]); 146 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 147 return nir_fsub(b, nir_imm_float(b, 1.0), 148 nir_load_system_value(b, 149 nir_intrinsic_load_blend_const_color_r_float + 150 channel, 151 0)); 152 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 153 return nir_fsub(b, nir_imm_float(b, 1.0), 154 nir_load_blend_const_color_a_float(b)); 155 156 default: 157 case PIPE_BLENDFACTOR_SRC1_COLOR: 158 case PIPE_BLENDFACTOR_SRC1_ALPHA: 159 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 160 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 161 /* Unsupported. */ 162 fprintf(stderr, "Unknown blend factor %d\n", factor); 163 return nir_imm_float(b, 1.0); 164 } 165} 166 167static nir_ssa_def * 168vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1, 169 int chan) 170{ 171 unsigned chan_mask = 0xff << (chan * 8); 172 return nir_ior(b, 173 nir_iand(b, src0, nir_imm_int(b, ~chan_mask)), 174 nir_iand(b, src1, nir_imm_int(b, chan_mask))); 175} 176 177static nir_ssa_def * 178vc4_blend_channel_i(nir_builder *b, 179 nir_ssa_def *src, 180 nir_ssa_def *dst, 181 nir_ssa_def *src_a, 182 nir_ssa_def *dst_a, 183 unsigned factor, 184 int a_chan) 185{ 186 switch (factor) { 187 case PIPE_BLENDFACTOR_ONE: 188 return nir_imm_int(b, ~0); 189 case PIPE_BLENDFACTOR_SRC_COLOR: 190 return src; 191 case PIPE_BLENDFACTOR_SRC_ALPHA: 192 return src_a; 193 case PIPE_BLENDFACTOR_DST_ALPHA: 194 return dst_a; 195 case PIPE_BLENDFACTOR_DST_COLOR: 196 return dst; 197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 198 return vc4_nir_set_packed_chan(b, 199 nir_umin_4x8(b, 200 src_a, 201 nir_inot(b, dst_a)), 202 nir_imm_int(b, ~0), 203 a_chan); 204 case PIPE_BLENDFACTOR_CONST_COLOR: 205 return nir_load_blend_const_color_rgba8888_unorm(b); 206 case PIPE_BLENDFACTOR_CONST_ALPHA: 207 return nir_load_blend_const_color_aaaa8888_unorm(b); 208 case PIPE_BLENDFACTOR_ZERO: 209 return nir_imm_int(b, 0); 210 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 211 return nir_inot(b, src); 212 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 213 return nir_inot(b, src_a); 214 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 215 return nir_inot(b, dst_a); 216 case PIPE_BLENDFACTOR_INV_DST_COLOR: 217 return nir_inot(b, dst); 218 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 219 return nir_inot(b, 220 nir_load_blend_const_color_rgba8888_unorm(b)); 221 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 222 return nir_inot(b, 223 nir_load_blend_const_color_aaaa8888_unorm(b)); 224 225 default: 226 case PIPE_BLENDFACTOR_SRC1_COLOR: 227 case PIPE_BLENDFACTOR_SRC1_ALPHA: 228 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 229 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 230 /* Unsupported. */ 231 fprintf(stderr, "Unknown blend factor %d\n", factor); 232 return nir_imm_int(b, ~0); 233 } 234} 235 236static nir_ssa_def * 237vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, 238 unsigned func) 239{ 240 switch (func) { 241 case PIPE_BLEND_ADD: 242 return nir_fadd(b, src, dst); 243 case PIPE_BLEND_SUBTRACT: 244 return nir_fsub(b, src, dst); 245 case PIPE_BLEND_REVERSE_SUBTRACT: 246 return nir_fsub(b, dst, src); 247 case PIPE_BLEND_MIN: 248 return nir_fmin(b, src, dst); 249 case PIPE_BLEND_MAX: 250 return nir_fmax(b, src, dst); 251 252 default: 253 /* Unsupported. */ 254 fprintf(stderr, "Unknown blend func %d\n", func); 255 return src; 256 257 } 258} 259 260static nir_ssa_def * 261vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, 262 unsigned func) 263{ 264 switch (func) { 265 case PIPE_BLEND_ADD: 266 return nir_usadd_4x8(b, src, dst); 267 case PIPE_BLEND_SUBTRACT: 268 return nir_ussub_4x8(b, src, dst); 269 case PIPE_BLEND_REVERSE_SUBTRACT: 270 return nir_ussub_4x8(b, dst, src); 271 case PIPE_BLEND_MIN: 272 return nir_umin_4x8(b, src, dst); 273 case PIPE_BLEND_MAX: 274 return nir_umax_4x8(b, src, dst); 275 276 default: 277 /* Unsupported. */ 278 fprintf(stderr, "Unknown blend func %d\n", func); 279 return src; 280 281 } 282} 283 284static void 285vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, 286 nir_ssa_def **src_color, nir_ssa_def **dst_color) 287{ 288 struct pipe_rt_blend_state *blend = &c->fs_key->blend; 289 290 if (!blend->blend_enable) { 291 for (int i = 0; i < 4; i++) 292 result[i] = src_color[i]; 293 return; 294 } 295 296 /* Clamp the src color to [0, 1]. Dest is already clamped. */ 297 for (int i = 0; i < 4; i++) 298 src_color[i] = nir_fsat(b, src_color[i]); 299 300 nir_ssa_def *src_blend[4], *dst_blend[4]; 301 for (int i = 0; i < 4; i++) { 302 int src_factor = ((i != 3) ? blend->rgb_src_factor : 303 blend->alpha_src_factor); 304 int dst_factor = ((i != 3) ? blend->rgb_dst_factor : 305 blend->alpha_dst_factor); 306 src_blend[i] = nir_fmul(b, src_color[i], 307 vc4_blend_channel_f(b, 308 src_color, dst_color, 309 src_factor, i)); 310 dst_blend[i] = nir_fmul(b, dst_color[i], 311 vc4_blend_channel_f(b, 312 src_color, dst_color, 313 dst_factor, i)); 314 } 315 316 for (int i = 0; i < 4; i++) { 317 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i], 318 ((i != 3) ? blend->rgb_func : 319 blend->alpha_func)); 320 } 321} 322 323static nir_ssa_def * 324vc4_nir_splat(nir_builder *b, nir_ssa_def *src) 325{ 326 nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8))); 327 return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16))); 328} 329 330static nir_ssa_def * 331vc4_do_blending_i(struct vc4_compile *c, nir_builder *b, 332 nir_ssa_def *src_color, nir_ssa_def *dst_color, 333 nir_ssa_def *src_float_a) 334{ 335 struct pipe_rt_blend_state *blend = &c->fs_key->blend; 336 337 if (!blend->blend_enable) 338 return src_color; 339 340 enum pipe_format color_format = c->fs_key->color_format; 341 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); 342 nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff); 343 nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a); 344 nir_ssa_def *dst_a; 345 int alpha_chan; 346 for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) { 347 if (format_swiz[alpha_chan] == 3) 348 break; 349 } 350 if (alpha_chan != 4) { 351 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8); 352 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color, 353 shift), imm_0xff)); 354 } else { 355 dst_a = nir_imm_int(b, ~0); 356 } 357 358 nir_ssa_def *src_factor = vc4_blend_channel_i(b, 359 src_color, dst_color, 360 src_a, dst_a, 361 blend->rgb_src_factor, 362 alpha_chan); 363 nir_ssa_def *dst_factor = vc4_blend_channel_i(b, 364 src_color, dst_color, 365 src_a, dst_a, 366 blend->rgb_dst_factor, 367 alpha_chan); 368 369 if (alpha_chan != 4 && 370 blend->alpha_src_factor != blend->rgb_src_factor) { 371 nir_ssa_def *src_alpha_factor = 372 vc4_blend_channel_i(b, 373 src_color, dst_color, 374 src_a, dst_a, 375 blend->alpha_src_factor, 376 alpha_chan); 377 src_factor = vc4_nir_set_packed_chan(b, src_factor, 378 src_alpha_factor, 379 alpha_chan); 380 } 381 if (alpha_chan != 4 && 382 blend->alpha_dst_factor != blend->rgb_dst_factor) { 383 nir_ssa_def *dst_alpha_factor = 384 vc4_blend_channel_i(b, 385 src_color, dst_color, 386 src_a, dst_a, 387 blend->alpha_dst_factor, 388 alpha_chan); 389 dst_factor = vc4_nir_set_packed_chan(b, dst_factor, 390 dst_alpha_factor, 391 alpha_chan); 392 } 393 nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor); 394 nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor); 395 396 nir_ssa_def *result = 397 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func); 398 if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) { 399 nir_ssa_def *result_a = vc4_blend_func_i(b, 400 src_blend, 401 dst_blend, 402 blend->alpha_func); 403 result = vc4_nir_set_packed_chan(b, result, result_a, 404 alpha_chan); 405 } 406 return result; 407} 408 409static nir_ssa_def * 410vc4_logicop(nir_builder *b, int logicop_func, 411 nir_ssa_def *src, nir_ssa_def *dst) 412{ 413 switch (logicop_func) { 414 case PIPE_LOGICOP_CLEAR: 415 return nir_imm_int(b, 0); 416 case PIPE_LOGICOP_NOR: 417 return nir_inot(b, nir_ior(b, src, dst)); 418 case PIPE_LOGICOP_AND_INVERTED: 419 return nir_iand(b, nir_inot(b, src), dst); 420 case PIPE_LOGICOP_COPY_INVERTED: 421 return nir_inot(b, src); 422 case PIPE_LOGICOP_AND_REVERSE: 423 return nir_iand(b, src, nir_inot(b, dst)); 424 case PIPE_LOGICOP_INVERT: 425 return nir_inot(b, dst); 426 case PIPE_LOGICOP_XOR: 427 return nir_ixor(b, src, dst); 428 case PIPE_LOGICOP_NAND: 429 return nir_inot(b, nir_iand(b, src, dst)); 430 case PIPE_LOGICOP_AND: 431 return nir_iand(b, src, dst); 432 case PIPE_LOGICOP_EQUIV: 433 return nir_inot(b, nir_ixor(b, src, dst)); 434 case PIPE_LOGICOP_NOOP: 435 return dst; 436 case PIPE_LOGICOP_OR_INVERTED: 437 return nir_ior(b, nir_inot(b, src), dst); 438 case PIPE_LOGICOP_OR_REVERSE: 439 return nir_ior(b, src, nir_inot(b, dst)); 440 case PIPE_LOGICOP_OR: 441 return nir_ior(b, src, dst); 442 case PIPE_LOGICOP_SET: 443 return nir_imm_int(b, ~0); 444 default: 445 fprintf(stderr, "Unknown logic op %d\n", logicop_func); 446 /* FALLTHROUGH */ 447 case PIPE_LOGICOP_COPY: 448 return src; 449 } 450} 451 452static nir_ssa_def * 453vc4_nir_pipe_compare_func(nir_builder *b, int func, 454 nir_ssa_def *src0, nir_ssa_def *src1) 455{ 456 switch (func) { 457 default: 458 fprintf(stderr, "Unknown compare func %d\n", func); 459 /* FALLTHROUGH */ 460 case PIPE_FUNC_NEVER: 461 return nir_imm_int(b, 0); 462 case PIPE_FUNC_ALWAYS: 463 return nir_imm_int(b, ~0); 464 case PIPE_FUNC_EQUAL: 465 return nir_feq(b, src0, src1); 466 case PIPE_FUNC_NOTEQUAL: 467 return nir_fne(b, src0, src1); 468 case PIPE_FUNC_GREATER: 469 return nir_flt(b, src1, src0); 470 case PIPE_FUNC_GEQUAL: 471 return nir_fge(b, src0, src1); 472 case PIPE_FUNC_LESS: 473 return nir_flt(b, src0, src1); 474 case PIPE_FUNC_LEQUAL: 475 return nir_fge(b, src1, src0); 476 } 477} 478 479static void 480vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b, 481 nir_ssa_def *alpha) 482{ 483 if (!c->fs_key->alpha_test) 484 return; 485 486 nir_ssa_def *condition = 487 vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func, 488 alpha, 489 nir_load_alpha_ref_float(b)); 490 491 nir_intrinsic_instr *discard = 492 nir_intrinsic_instr_create(b->shader, 493 nir_intrinsic_discard_if); 494 discard->num_components = 1; 495 discard->src[0] = nir_src_for_ssa(nir_inot(b, condition)); 496 nir_builder_instr_insert(b, &discard->instr); 497 c->s->info->fs.uses_discard = true; 498} 499 500static nir_ssa_def * 501vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b, 502 nir_ssa_def **colors) 503{ 504 enum pipe_format color_format = c->fs_key->color_format; 505 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); 506 507 nir_ssa_def *swizzled[4]; 508 for (int i = 0; i < 4; i++) { 509 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors, 510 format_swiz[i]); 511 } 512 513 return nir_pack_unorm_4x8(b, 514 nir_vec4(b, 515 swizzled[0], swizzled[1], 516 swizzled[2], swizzled[3])); 517 518} 519 520static nir_ssa_def * 521vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src, 522 int sample) 523{ 524 enum pipe_format color_format = c->fs_key->color_format; 525 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); 526 bool srgb = util_format_is_srgb(color_format); 527 528 /* Pull out the float src/dst color components. */ 529 nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample); 530 nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color); 531 nir_ssa_def *src_color[4], *unpacked_dst_color[4]; 532 for (unsigned i = 0; i < 4; i++) { 533 src_color[i] = nir_channel(b, src, i); 534 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i); 535 } 536 537 if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa) 538 src_color[3] = nir_imm_float(b, 1.0); 539 540 vc4_nir_emit_alpha_test_discard(c, b, src_color[3]); 541 542 nir_ssa_def *packed_color; 543 if (srgb) { 544 /* Unswizzle the destination color. */ 545 nir_ssa_def *dst_color[4]; 546 for (unsigned i = 0; i < 4; i++) { 547 dst_color[i] = vc4_nir_get_swizzled_channel(b, 548 unpacked_dst_color, 549 format_swiz[i]); 550 } 551 552 /* Turn dst color to linear. */ 553 for (int i = 0; i < 3; i++) 554 dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]); 555 556 nir_ssa_def *blend_color[4]; 557 vc4_do_blending_f(c, b, blend_color, src_color, dst_color); 558 559 /* sRGB encode the output color */ 560 for (int i = 0; i < 3; i++) 561 blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]); 562 563 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color); 564 } else { 565 nir_ssa_def *packed_src_color = 566 vc4_nir_swizzle_and_pack(c, b, src_color); 567 568 packed_color = 569 vc4_do_blending_i(c, b, 570 packed_src_color, packed_dst_color, 571 src_color[3]); 572 } 573 574 packed_color = vc4_logicop(b, c->fs_key->logicop_func, 575 packed_color, packed_dst_color); 576 577 /* If the bit isn't set in the color mask, then just return the 578 * original dst color, instead. 579 */ 580 uint32_t colormask = 0xffffffff; 581 for (int i = 0; i < 4; i++) { 582 if (format_swiz[i] < 4 && 583 !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) { 584 colormask &= ~(0xff << (i * 8)); 585 } 586 } 587 588 return nir_ior(b, 589 nir_iand(b, packed_color, 590 nir_imm_int(b, colormask)), 591 nir_iand(b, packed_dst_color, 592 nir_imm_int(b, ~colormask))); 593} 594 595static int 596vc4_nir_next_output_driver_location(nir_shader *s) 597{ 598 int maxloc = -1; 599 600 nir_foreach_variable(var, &s->outputs) 601 maxloc = MAX2(maxloc, (int)var->data.driver_location); 602 603 return maxloc + 1; 604} 605 606static void 607vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b, 608 nir_ssa_def *val) 609{ 610 nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out, 611 glsl_uint_type(), 612 "sample_mask"); 613 sample_mask->data.driver_location = 614 vc4_nir_next_output_driver_location(c->s); 615 sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK; 616 617 nir_intrinsic_instr *intr = 618 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output); 619 intr->num_components = 1; 620 nir_intrinsic_set_base(intr, sample_mask->data.driver_location); 621 622 intr->src[0] = nir_src_for_ssa(val); 623 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); 624 nir_builder_instr_insert(b, &intr->instr); 625} 626 627static void 628vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, 629 nir_intrinsic_instr *intr) 630{ 631 nir_ssa_def *frag_color = intr->src[0].ssa; 632 633 if (c->fs_key->sample_coverage) { 634 nir_intrinsic_instr *load = 635 nir_intrinsic_instr_create(b->shader, 636 nir_intrinsic_load_sample_mask_in); 637 load->num_components = 1; 638 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); 639 nir_builder_instr_insert(b, &load->instr); 640 641 nir_ssa_def *bitmask = &load->dest.ssa; 642 643 vc4_nir_store_sample_mask(c, b, bitmask); 644 } else if (c->fs_key->sample_alpha_to_coverage) { 645 nir_ssa_def *a = nir_channel(b, frag_color, 3); 646 647 /* XXX: We should do a nice dither based on the fragment 648 * coordinate, instead. 649 */ 650 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES); 651 nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples)); 652 nir_ssa_def *bitmask = nir_isub(b, 653 nir_ishl(b, 654 nir_imm_int(b, 1), 655 num_bits), 656 nir_imm_int(b, 1)); 657 vc4_nir_store_sample_mask(c, b, bitmask); 658 } 659 660 /* The TLB color read returns each sample in turn, so if our blending 661 * depends on the destination color, we're going to have to run the 662 * blending function separately for each destination sample value, and 663 * then output the per-sample color using TLB_COLOR_MS. 664 */ 665 nir_ssa_def *blend_output; 666 if (c->fs_key->msaa && blend_depends_on_dst_color(c)) { 667 c->msaa_per_sample_output = true; 668 669 nir_ssa_def *samples[4]; 670 for (int i = 0; i < VC4_MAX_SAMPLES; i++) 671 samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i); 672 blend_output = nir_vec4(b, 673 samples[0], samples[1], 674 samples[2], samples[3]); 675 } else { 676 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0); 677 } 678 679 nir_instr_rewrite_src(&intr->instr, &intr->src[0], 680 nir_src_for_ssa(blend_output)); 681 intr->num_components = blend_output->num_components; 682} 683 684static bool 685vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c) 686{ 687 nir_foreach_instr_safe(instr, block) { 688 if (instr->type != nir_instr_type_intrinsic) 689 continue; 690 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 691 if (intr->intrinsic != nir_intrinsic_store_output) 692 continue; 693 694 nir_variable *output_var = NULL; 695 nir_foreach_variable(var, &c->s->outputs) { 696 if (var->data.driver_location == 697 nir_intrinsic_base(intr)) { 698 output_var = var; 699 break; 700 } 701 } 702 assert(output_var); 703 704 if (output_var->data.location != FRAG_RESULT_COLOR && 705 output_var->data.location != FRAG_RESULT_DATA0) { 706 continue; 707 } 708 709 nir_function_impl *impl = 710 nir_cf_node_get_function(&block->cf_node); 711 nir_builder b; 712 nir_builder_init(&b, impl); 713 b.cursor = nir_before_instr(&intr->instr); 714 vc4_nir_lower_blend_instr(c, &b, intr); 715 } 716 return true; 717} 718 719void 720vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c) 721{ 722 nir_foreach_function(function, s) { 723 if (function->impl) { 724 nir_foreach_block(block, function->impl) { 725 vc4_nir_lower_blend_block(block, c); 726 } 727 728 nir_metadata_preserve(function->impl, 729 nir_metadata_block_index | 730 nir_metadata_dominance); 731 } 732 } 733} 734