vc4_nir_lower_blend.c revision c30b22c421d2139135519449a68bf3120710a552
1/*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * Implements most of the fixed function fragment pipeline in shader code.
26 *
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask.  Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
32 *
33 * Once this pass is done, the color write will either have one component (for
34 * single sample) with packed argb8888, or 4 components with the per-sample
35 * argb8888 result.
36 */
37
38/**
39 * Lowers fixed-function blending to a load of the destination color and a
40 * series of ALU operations before the store of the output.
41 */
42#include "util/u_format.h"
43#include "vc4_qir.h"
44#include "compiler/nir/nir_builder.h"
45#include "vc4_context.h"
46
47static bool
48blend_depends_on_dst_color(struct vc4_compile *c)
49{
50        return (c->fs_key->blend.blend_enable ||
51                c->fs_key->blend.colormask != 0xf ||
52                c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
53}
54
55/** Emits a load of the previous fragment color from the tile buffer. */
56static nir_ssa_def *
57vc4_nir_get_dst_color(nir_builder *b, int sample)
58{
59        nir_intrinsic_instr *load =
60                nir_intrinsic_instr_create(b->shader,
61                                           nir_intrinsic_load_input);
62        load->num_components = 1;
63        nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
64        load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
65        nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
66        nir_builder_instr_insert(b, &load->instr);
67        return &load->dest.ssa;
68}
69
70static  nir_ssa_def *
71vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
72{
73        nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
74        nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
75        nir_ssa_def *high = nir_fpow(b,
76                                     nir_fmul(b,
77                                              nir_fadd(b, srgb,
78                                                       nir_imm_float(b, 0.055)),
79                                              nir_imm_float(b, 1.0 / 1.055)),
80                                     nir_imm_float(b, 2.4));
81
82        return nir_bcsel(b, is_low, low, high);
83}
84
85static  nir_ssa_def *
86vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
87{
88        nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
89        nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
90        nir_ssa_def *high = nir_fsub(b,
91                                     nir_fmul(b,
92                                              nir_imm_float(b, 1.055),
93                                              nir_fpow(b,
94                                                       linear,
95                                                       nir_imm_float(b, 0.41666))),
96                                     nir_imm_float(b, 0.055));
97
98        return nir_bcsel(b, is_low, low, high);
99}
100
101static nir_ssa_def *
102vc4_blend_channel_f(nir_builder *b,
103                    nir_ssa_def **src,
104                    nir_ssa_def **dst,
105                    unsigned factor,
106                    int channel)
107{
108        switch(factor) {
109        case PIPE_BLENDFACTOR_ONE:
110                return nir_imm_float(b, 1.0);
111        case PIPE_BLENDFACTOR_SRC_COLOR:
112                return src[channel];
113        case PIPE_BLENDFACTOR_SRC_ALPHA:
114                return src[3];
115        case PIPE_BLENDFACTOR_DST_ALPHA:
116                return dst[3];
117        case PIPE_BLENDFACTOR_DST_COLOR:
118                return dst[channel];
119        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
120                if (channel != 3) {
121                        return nir_fmin(b,
122                                        src[3],
123                                        nir_fsub(b,
124                                                 nir_imm_float(b, 1.0),
125                                                 dst[3]));
126                } else {
127                        return nir_imm_float(b, 1.0);
128                }
129        case PIPE_BLENDFACTOR_CONST_COLOR:
130                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel);
131        case PIPE_BLENDFACTOR_CONST_ALPHA:
132                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W);
133        case PIPE_BLENDFACTOR_ZERO:
134                return nir_imm_float(b, 0.0);
135        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
136                return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
137        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
138                return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
139        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
140                return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
141        case PIPE_BLENDFACTOR_INV_DST_COLOR:
142                return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
143        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
144                return nir_fsub(b, nir_imm_float(b, 1.0),
145                                vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel));
146        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
147                return nir_fsub(b, nir_imm_float(b, 1.0),
148                                vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W));
149
150        default:
151        case PIPE_BLENDFACTOR_SRC1_COLOR:
152        case PIPE_BLENDFACTOR_SRC1_ALPHA:
153        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
154        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
155                /* Unsupported. */
156                fprintf(stderr, "Unknown blend factor %d\n", factor);
157                return nir_imm_float(b, 1.0);
158        }
159}
160
161static nir_ssa_def *
162vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
163                        int chan)
164{
165        unsigned chan_mask = 0xff << (chan * 8);
166        return nir_ior(b,
167                       nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
168                       nir_iand(b, src1, nir_imm_int(b, chan_mask)));
169}
170
171static nir_ssa_def *
172vc4_blend_channel_i(nir_builder *b,
173                    nir_ssa_def *src,
174                    nir_ssa_def *dst,
175                    nir_ssa_def *src_a,
176                    nir_ssa_def *dst_a,
177                    unsigned factor,
178                    int a_chan)
179{
180        switch (factor) {
181        case PIPE_BLENDFACTOR_ONE:
182                return nir_imm_int(b, ~0);
183        case PIPE_BLENDFACTOR_SRC_COLOR:
184                return src;
185        case PIPE_BLENDFACTOR_SRC_ALPHA:
186                return src_a;
187        case PIPE_BLENDFACTOR_DST_ALPHA:
188                return dst_a;
189        case PIPE_BLENDFACTOR_DST_COLOR:
190                return dst;
191        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
192                return vc4_nir_set_packed_chan(b,
193                                               nir_umin_4x8(b,
194                                                            src_a,
195                                                            nir_inot(b, dst_a)),
196                                               nir_imm_int(b, ~0),
197                                               a_chan);
198        case PIPE_BLENDFACTOR_CONST_COLOR:
199                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA);
200        case PIPE_BLENDFACTOR_CONST_ALPHA:
201                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA);
202        case PIPE_BLENDFACTOR_ZERO:
203                return nir_imm_int(b, 0);
204        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
205                return nir_inot(b, src);
206        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
207                return nir_inot(b, src_a);
208        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
209                return nir_inot(b, dst_a);
210        case PIPE_BLENDFACTOR_INV_DST_COLOR:
211                return nir_inot(b, dst);
212        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
213                return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA));
214        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
215                return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA));
216
217        default:
218        case PIPE_BLENDFACTOR_SRC1_COLOR:
219        case PIPE_BLENDFACTOR_SRC1_ALPHA:
220        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
221        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
222                /* Unsupported. */
223                fprintf(stderr, "Unknown blend factor %d\n", factor);
224                return nir_imm_int(b, ~0);
225        }
226}
227
228static nir_ssa_def *
229vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
230                 unsigned func)
231{
232        switch (func) {
233        case PIPE_BLEND_ADD:
234                return nir_fadd(b, src, dst);
235        case PIPE_BLEND_SUBTRACT:
236                return nir_fsub(b, src, dst);
237        case PIPE_BLEND_REVERSE_SUBTRACT:
238                return nir_fsub(b, dst, src);
239        case PIPE_BLEND_MIN:
240                return nir_fmin(b, src, dst);
241        case PIPE_BLEND_MAX:
242                return nir_fmax(b, src, dst);
243
244        default:
245                /* Unsupported. */
246                fprintf(stderr, "Unknown blend func %d\n", func);
247                return src;
248
249        }
250}
251
252static nir_ssa_def *
253vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
254                 unsigned func)
255{
256        switch (func) {
257        case PIPE_BLEND_ADD:
258                return nir_usadd_4x8(b, src, dst);
259        case PIPE_BLEND_SUBTRACT:
260                return nir_ussub_4x8(b, src, dst);
261        case PIPE_BLEND_REVERSE_SUBTRACT:
262                return nir_ussub_4x8(b, dst, src);
263        case PIPE_BLEND_MIN:
264                return nir_umin_4x8(b, src, dst);
265        case PIPE_BLEND_MAX:
266                return nir_umax_4x8(b, src, dst);
267
268        default:
269                /* Unsupported. */
270                fprintf(stderr, "Unknown blend func %d\n", func);
271                return src;
272
273        }
274}
275
276static void
277vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
278                  nir_ssa_def **src_color, nir_ssa_def **dst_color)
279{
280        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
281
282        if (!blend->blend_enable) {
283                for (int i = 0; i < 4; i++)
284                        result[i] = src_color[i];
285                return;
286        }
287
288        /* Clamp the src color to [0, 1].  Dest is already clamped. */
289        for (int i = 0; i < 4; i++)
290                src_color[i] = nir_fsat(b, src_color[i]);
291
292        nir_ssa_def *src_blend[4], *dst_blend[4];
293        for (int i = 0; i < 4; i++) {
294                int src_factor = ((i != 3) ? blend->rgb_src_factor :
295                                  blend->alpha_src_factor);
296                int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
297                                  blend->alpha_dst_factor);
298                src_blend[i] = nir_fmul(b, src_color[i],
299                                        vc4_blend_channel_f(b,
300                                                            src_color, dst_color,
301                                                            src_factor, i));
302                dst_blend[i] = nir_fmul(b, dst_color[i],
303                                        vc4_blend_channel_f(b,
304                                                            src_color, dst_color,
305                                                            dst_factor, i));
306        }
307
308        for (int i = 0; i < 4; i++) {
309                result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
310                                             ((i != 3) ? blend->rgb_func :
311                                              blend->alpha_func));
312        }
313}
314
315static nir_ssa_def *
316vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
317{
318        nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
319        return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
320}
321
322static nir_ssa_def *
323vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
324                  nir_ssa_def *src_color, nir_ssa_def *dst_color,
325                  nir_ssa_def *src_float_a)
326{
327        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
328
329        if (!blend->blend_enable)
330                return src_color;
331
332        enum pipe_format color_format = c->fs_key->color_format;
333        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
334        nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
335        nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
336        nir_ssa_def *dst_a;
337        int alpha_chan;
338        for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
339                if (format_swiz[alpha_chan] == 3)
340                        break;
341        }
342        if (alpha_chan != 4) {
343                nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
344                dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
345                                                              shift), imm_0xff));
346        } else {
347                dst_a = nir_imm_int(b, ~0);
348        }
349
350        nir_ssa_def *src_factor = vc4_blend_channel_i(b,
351                                                      src_color, dst_color,
352                                                      src_a, dst_a,
353                                                      blend->rgb_src_factor,
354                                                      alpha_chan);
355        nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
356                                                      src_color, dst_color,
357                                                      src_a, dst_a,
358                                                      blend->rgb_dst_factor,
359                                                      alpha_chan);
360
361        if (alpha_chan != 4 &&
362            blend->alpha_src_factor != blend->rgb_src_factor) {
363                nir_ssa_def *src_alpha_factor =
364                        vc4_blend_channel_i(b,
365                                            src_color, dst_color,
366                                            src_a, dst_a,
367                                            blend->alpha_src_factor,
368                                            alpha_chan);
369                src_factor = vc4_nir_set_packed_chan(b, src_factor,
370                                                     src_alpha_factor,
371                                                     alpha_chan);
372        }
373        if (alpha_chan != 4 &&
374            blend->alpha_dst_factor != blend->rgb_dst_factor) {
375                nir_ssa_def *dst_alpha_factor =
376                        vc4_blend_channel_i(b,
377                                            src_color, dst_color,
378                                            src_a, dst_a,
379                                            blend->alpha_dst_factor,
380                                            alpha_chan);
381                dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
382                                                     dst_alpha_factor,
383                                                     alpha_chan);
384        }
385        nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
386        nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
387
388        nir_ssa_def *result =
389                vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
390        if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
391                nir_ssa_def *result_a = vc4_blend_func_i(b,
392                                                         src_blend,
393                                                         dst_blend,
394                                                         blend->alpha_func);
395                result = vc4_nir_set_packed_chan(b, result, result_a,
396                                                 alpha_chan);
397        }
398        return result;
399}
400
401static nir_ssa_def *
402vc4_logicop(nir_builder *b, int logicop_func,
403            nir_ssa_def *src, nir_ssa_def *dst)
404{
405        switch (logicop_func) {
406        case PIPE_LOGICOP_CLEAR:
407                return nir_imm_int(b, 0);
408        case PIPE_LOGICOP_NOR:
409                return nir_inot(b, nir_ior(b, src, dst));
410        case PIPE_LOGICOP_AND_INVERTED:
411                return nir_iand(b, nir_inot(b, src), dst);
412        case PIPE_LOGICOP_COPY_INVERTED:
413                return nir_inot(b, src);
414        case PIPE_LOGICOP_AND_REVERSE:
415                return nir_iand(b, src, nir_inot(b, dst));
416        case PIPE_LOGICOP_INVERT:
417                return nir_inot(b, dst);
418        case PIPE_LOGICOP_XOR:
419                return nir_ixor(b, src, dst);
420        case PIPE_LOGICOP_NAND:
421                return nir_inot(b, nir_iand(b, src, dst));
422        case PIPE_LOGICOP_AND:
423                return nir_iand(b, src, dst);
424        case PIPE_LOGICOP_EQUIV:
425                return nir_inot(b, nir_ixor(b, src, dst));
426        case PIPE_LOGICOP_NOOP:
427                return dst;
428        case PIPE_LOGICOP_OR_INVERTED:
429                return nir_ior(b, nir_inot(b, src), dst);
430        case PIPE_LOGICOP_OR_REVERSE:
431                return nir_ior(b, src, nir_inot(b, dst));
432        case PIPE_LOGICOP_OR:
433                return nir_ior(b, src, dst);
434        case PIPE_LOGICOP_SET:
435                return nir_imm_int(b, ~0);
436        default:
437                fprintf(stderr, "Unknown logic op %d\n", logicop_func);
438                /* FALLTHROUGH */
439        case PIPE_LOGICOP_COPY:
440                return src;
441        }
442}
443
444static nir_ssa_def *
445vc4_nir_pipe_compare_func(nir_builder *b, int func,
446                          nir_ssa_def *src0, nir_ssa_def *src1)
447{
448        switch (func) {
449        default:
450                fprintf(stderr, "Unknown compare func %d\n", func);
451                /* FALLTHROUGH */
452        case PIPE_FUNC_NEVER:
453                return nir_imm_int(b, 0);
454        case PIPE_FUNC_ALWAYS:
455                return nir_imm_int(b, ~0);
456        case PIPE_FUNC_EQUAL:
457                return nir_feq(b, src0, src1);
458        case PIPE_FUNC_NOTEQUAL:
459                return nir_fne(b, src0, src1);
460        case PIPE_FUNC_GREATER:
461                return nir_flt(b, src1, src0);
462        case PIPE_FUNC_GEQUAL:
463                return nir_fge(b, src0, src1);
464        case PIPE_FUNC_LESS:
465                return nir_flt(b, src0, src1);
466        case PIPE_FUNC_LEQUAL:
467                return nir_fge(b, src1, src0);
468        }
469}
470
471static void
472vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
473                                nir_ssa_def *alpha)
474{
475        if (!c->fs_key->alpha_test)
476                return;
477
478        nir_ssa_def *alpha_ref =
479                vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF);
480        nir_ssa_def *condition =
481                vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
482                                          alpha, alpha_ref);
483
484        nir_intrinsic_instr *discard =
485                nir_intrinsic_instr_create(b->shader,
486                                           nir_intrinsic_discard_if);
487        discard->num_components = 1;
488        discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
489        nir_builder_instr_insert(b, &discard->instr);
490}
491
492static nir_ssa_def *
493vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
494                         nir_ssa_def **colors)
495{
496        enum pipe_format color_format = c->fs_key->color_format;
497        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
498
499        nir_ssa_def *swizzled[4];
500        for (int i = 0; i < 4; i++) {
501                swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
502                                                           format_swiz[i]);
503        }
504
505        return nir_pack_unorm_4x8(b,
506                                  nir_vec4(b,
507                                           swizzled[0], swizzled[1],
508                                           swizzled[2], swizzled[3]));
509
510}
511
512static nir_ssa_def *
513vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
514                       int sample)
515{
516        enum pipe_format color_format = c->fs_key->color_format;
517        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
518        bool srgb = util_format_is_srgb(color_format);
519
520        /* Pull out the float src/dst color components. */
521        nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
522        nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
523        nir_ssa_def *src_color[4], *unpacked_dst_color[4];
524        for (unsigned i = 0; i < 4; i++) {
525                src_color[i] = nir_channel(b, src, i);
526                unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
527        }
528
529        if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
530                src_color[3] = nir_imm_float(b, 1.0);
531
532        vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
533
534        nir_ssa_def *packed_color;
535        if (srgb) {
536                /* Unswizzle the destination color. */
537                nir_ssa_def *dst_color[4];
538                for (unsigned i = 0; i < 4; i++) {
539                        dst_color[i] = vc4_nir_get_swizzled_channel(b,
540                                                                    unpacked_dst_color,
541                                                                    format_swiz[i]);
542                }
543
544                /* Turn dst color to linear. */
545                for (int i = 0; i < 3; i++)
546                        dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
547
548                nir_ssa_def *blend_color[4];
549                vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
550
551                /* sRGB encode the output color */
552                for (int i = 0; i < 3; i++)
553                        blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
554
555                packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
556        } else {
557                nir_ssa_def *packed_src_color =
558                        vc4_nir_swizzle_and_pack(c, b, src_color);
559
560                packed_color =
561                        vc4_do_blending_i(c, b,
562                                          packed_src_color, packed_dst_color,
563                                          src_color[3]);
564        }
565
566        packed_color = vc4_logicop(b, c->fs_key->logicop_func,
567                                   packed_color, packed_dst_color);
568
569        /* If the bit isn't set in the color mask, then just return the
570         * original dst color, instead.
571         */
572        uint32_t colormask = 0xffffffff;
573        for (int i = 0; i < 4; i++) {
574                if (format_swiz[i] < 4 &&
575                    !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
576                        colormask &= ~(0xff << (i * 8));
577                }
578        }
579
580        return nir_ior(b,
581                       nir_iand(b, packed_color,
582                                nir_imm_int(b, colormask)),
583                       nir_iand(b, packed_dst_color,
584                                nir_imm_int(b, ~colormask)));
585}
586
587static int
588vc4_nir_next_output_driver_location(nir_shader *s)
589{
590        int maxloc = -1;
591
592        nir_foreach_variable(var, &s->outputs)
593                maxloc = MAX2(maxloc, (int)var->data.driver_location);
594
595        return maxloc + 1;
596}
597
598static void
599vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
600                          nir_ssa_def *val)
601{
602        nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
603                                                        glsl_uint_type(),
604                                                        "sample_mask");
605        sample_mask->data.driver_location =
606                vc4_nir_next_output_driver_location(c->s);
607        sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
608
609        nir_intrinsic_instr *intr =
610                nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
611        intr->num_components = 1;
612        nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
613
614        intr->src[0] = nir_src_for_ssa(val);
615        intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
616        nir_builder_instr_insert(b, &intr->instr);
617}
618
619static void
620vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
621                          nir_intrinsic_instr *intr)
622{
623        nir_ssa_def *frag_color = intr->src[0].ssa;
624
625        if (c->fs_key->sample_coverage) {
626                nir_intrinsic_instr *load =
627                        nir_intrinsic_instr_create(b->shader,
628                                                   nir_intrinsic_load_sample_mask_in);
629                load->num_components = 1;
630                nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
631                nir_builder_instr_insert(b, &load->instr);
632
633                nir_ssa_def *bitmask = &load->dest.ssa;
634
635                vc4_nir_store_sample_mask(c, b, bitmask);
636        } else if (c->fs_key->sample_alpha_to_coverage) {
637                nir_ssa_def *a = nir_channel(b, frag_color, 3);
638
639                /* XXX: We should do a nice dither based on the fragment
640                 * coordinate, instead.
641                 */
642                nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
643                nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples));
644                nir_ssa_def *bitmask = nir_isub(b,
645                                                nir_ishl(b,
646                                                         nir_imm_int(b, 1),
647                                                         num_bits),
648                                                nir_imm_int(b, 1));
649                vc4_nir_store_sample_mask(c, b, bitmask);
650        }
651
652        /* The TLB color read returns each sample in turn, so if our blending
653         * depends on the destination color, we're going to have to run the
654         * blending function separately for each destination sample value, and
655         * then output the per-sample color using TLB_COLOR_MS.
656         */
657        nir_ssa_def *blend_output;
658        if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
659                c->msaa_per_sample_output = true;
660
661                nir_ssa_def *samples[4];
662                for (int i = 0; i < VC4_MAX_SAMPLES; i++)
663                        samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
664                blend_output = nir_vec4(b,
665                                        samples[0], samples[1],
666                                        samples[2], samples[3]);
667        } else {
668                blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
669        }
670
671        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
672                              nir_src_for_ssa(blend_output));
673        intr->num_components = blend_output->num_components;
674}
675
676static bool
677vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
678{
679        nir_foreach_instr_safe(instr, block) {
680                if (instr->type != nir_instr_type_intrinsic)
681                        continue;
682                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
683                if (intr->intrinsic != nir_intrinsic_store_output)
684                        continue;
685
686                nir_variable *output_var = NULL;
687                nir_foreach_variable(var, &c->s->outputs) {
688                        if (var->data.driver_location ==
689                            nir_intrinsic_base(intr)) {
690                                output_var = var;
691                                break;
692                        }
693                }
694                assert(output_var);
695
696                if (output_var->data.location != FRAG_RESULT_COLOR &&
697                    output_var->data.location != FRAG_RESULT_DATA0) {
698                        continue;
699                }
700
701                nir_function_impl *impl =
702                        nir_cf_node_get_function(&block->cf_node);
703                nir_builder b;
704                nir_builder_init(&b, impl);
705                b.cursor = nir_before_instr(&intr->instr);
706                vc4_nir_lower_blend_instr(c, &b, intr);
707        }
708        return true;
709}
710
711void
712vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
713{
714        nir_foreach_function(function, s) {
715                if (function->impl) {
716                        nir_foreach_block(block, function->impl) {
717                                vc4_nir_lower_blend_block(block, c);
718                        }
719
720                        nir_metadata_preserve(function->impl,
721                                              nir_metadata_block_index |
722                                              nir_metadata_dominance);
723                }
724        }
725}
726