vc4_qir.h revision 89b1b33f44bc6ce71109ac8668529c30b6d6d910
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_QIR_H
25#define VC4_QIR_H
26
27#include <assert.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <stdbool.h>
31#include <stdint.h>
32#include <string.h>
33
34#include "util/macros.h"
35#include "glsl/nir/nir.h"
36#include "util/list.h"
37#include "util/u_math.h"
38
39#include "vc4_screen.h"
40#include "pipe/p_state.h"
41
42struct nir_builder;
43
44enum qfile {
45        QFILE_NULL,
46        QFILE_TEMP,
47        QFILE_VARY,
48        QFILE_UNIF,
49        QFILE_VPM,
50
51        /**
52         * Stores an immediate value in the index field that can be turned
53         * into a small immediate field by qpu_encode_small_immediate().
54         */
55        QFILE_SMALL_IMM,
56};
57
58struct qreg {
59        enum qfile file;
60        uint32_t index;
61        int pack;
62};
63
64enum qop {
65        QOP_UNDEF,
66        QOP_MOV,
67        QOP_FADD,
68        QOP_FSUB,
69        QOP_FMUL,
70        QOP_MUL24,
71        QOP_FMIN,
72        QOP_FMAX,
73        QOP_FMINABS,
74        QOP_FMAXABS,
75        QOP_ADD,
76        QOP_SUB,
77        QOP_SHL,
78        QOP_SHR,
79        QOP_ASR,
80        QOP_MIN,
81        QOP_MAX,
82        QOP_AND,
83        QOP_OR,
84        QOP_XOR,
85        QOP_NOT,
86
87        /* Note: Orderings of these compares must be the same as in
88         * qpu_defines.h.  Selects the src[0] if the ns flag bit is set,
89         * otherwise 0. */
90        QOP_SEL_X_0_ZS,
91        QOP_SEL_X_0_ZC,
92        QOP_SEL_X_0_NS,
93        QOP_SEL_X_0_NC,
94        /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
95        QOP_SEL_X_Y_ZS,
96        QOP_SEL_X_Y_ZC,
97        QOP_SEL_X_Y_NS,
98        QOP_SEL_X_Y_NC,
99
100        QOP_FTOI,
101        QOP_ITOF,
102        QOP_RCP,
103        QOP_RSQ,
104        QOP_EXP2,
105        QOP_LOG2,
106        QOP_VW_SETUP,
107        QOP_VR_SETUP,
108        QOP_PACK_8888_F,
109        QOP_PACK_8A_F,
110        QOP_PACK_8B_F,
111        QOP_PACK_8C_F,
112        QOP_PACK_8D_F,
113        QOP_TLB_DISCARD_SETUP,
114        QOP_TLB_STENCIL_SETUP,
115        QOP_TLB_Z_WRITE,
116        QOP_TLB_COLOR_WRITE,
117        QOP_TLB_COLOR_READ,
118        QOP_VARY_ADD_C,
119
120        QOP_FRAG_X,
121        QOP_FRAG_Y,
122        QOP_FRAG_Z,
123        QOP_FRAG_W,
124        QOP_FRAG_REV_FLAG,
125
126        QOP_UNPACK_8A_F,
127        QOP_UNPACK_8B_F,
128        QOP_UNPACK_8C_F,
129        QOP_UNPACK_8D_F,
130        QOP_UNPACK_16A_F,
131        QOP_UNPACK_16B_F,
132
133        QOP_UNPACK_8A_I,
134        QOP_UNPACK_8B_I,
135        QOP_UNPACK_8C_I,
136        QOP_UNPACK_8D_I,
137        QOP_UNPACK_16A_I,
138        QOP_UNPACK_16B_I,
139
140        /** Texture x coordinate parameter write */
141        QOP_TEX_S,
142        /** Texture y coordinate parameter write */
143        QOP_TEX_T,
144        /** Texture border color parameter or cube map z coordinate write */
145        QOP_TEX_R,
146        /** Texture LOD bias parameter write */
147        QOP_TEX_B,
148
149        /**
150         * Texture-unit 4-byte read with address provided direct in S
151         * cooordinate.
152         *
153         * The first operand is the offset from the start of the UBO, and the
154         * second is the uniform that has the UBO's base pointer.
155         */
156        QOP_TEX_DIRECT,
157
158        /**
159         * Signal of texture read being necessary and then reading r4 into
160         * the destination
161         */
162        QOP_TEX_RESULT,
163};
164
165struct queued_qpu_inst {
166        struct list_head link;
167        uint64_t inst;
168};
169
170struct qinst {
171        struct list_head link;
172
173        enum qop op;
174        struct qreg dst;
175        struct qreg *src;
176        bool sf;
177};
178
179enum qstage {
180        /**
181         * Coordinate shader, runs during binning, before the VS, and just
182         * outputs position.
183         */
184        QSTAGE_COORD,
185        QSTAGE_VERT,
186        QSTAGE_FRAG,
187};
188
189enum quniform_contents {
190        /**
191         * Indicates that a constant 32-bit value is copied from the program's
192         * uniform contents.
193         */
194        QUNIFORM_CONSTANT,
195        /**
196         * Indicates that the program's uniform contents are used as an index
197         * into the GL uniform storage.
198         */
199        QUNIFORM_UNIFORM,
200
201        /** @{
202         * Scaling factors from clip coordinates to relative to the viewport
203         * center.
204         *
205         * This is used by the coordinate and vertex shaders to produce the
206         * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
207         * point offsets from the viewport ccenter.
208         */
209        QUNIFORM_VIEWPORT_X_SCALE,
210        QUNIFORM_VIEWPORT_Y_SCALE,
211        /** @} */
212
213        QUNIFORM_VIEWPORT_Z_OFFSET,
214        QUNIFORM_VIEWPORT_Z_SCALE,
215
216        QUNIFORM_USER_CLIP_PLANE,
217
218        /**
219         * A reference to a texture config parameter 0 uniform.
220         *
221         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
222         * defines texture type, miplevels, and such.  It will be found as a
223         * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
224         */
225        QUNIFORM_TEXTURE_CONFIG_P0,
226
227        /**
228         * A reference to a texture config parameter 1 uniform.
229         *
230         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
231         * defines texture width, height, filters, and wrap modes.  It will be
232         * found as a parameter to the second QOP_TEX_[STRB] instruction in a
233         * sequence.
234         */
235        QUNIFORM_TEXTURE_CONFIG_P1,
236
237        /** A reference to a texture config parameter 2 cubemap stride uniform */
238        QUNIFORM_TEXTURE_CONFIG_P2,
239
240        QUNIFORM_UBO_ADDR,
241
242        QUNIFORM_TEXRECT_SCALE_X,
243        QUNIFORM_TEXRECT_SCALE_Y,
244
245        QUNIFORM_TEXTURE_BORDER_COLOR,
246
247        QUNIFORM_BLEND_CONST_COLOR_X,
248        QUNIFORM_BLEND_CONST_COLOR_Y,
249        QUNIFORM_BLEND_CONST_COLOR_Z,
250        QUNIFORM_BLEND_CONST_COLOR_W,
251
252        QUNIFORM_STENCIL,
253
254        QUNIFORM_ALPHA_REF,
255};
256
257struct vc4_varying_semantic {
258        uint8_t semantic;
259        uint8_t index;
260        uint8_t swizzle;
261};
262
263struct vc4_compiler_ubo_range {
264        /**
265         * offset in bytes from the start of the ubo where this range is
266         * uploaded.
267         *
268         * Only set once used is set.
269         */
270        uint32_t dst_offset;
271
272        /**
273         * offset in bytes from the start of the gallium uniforms where the
274         * data comes from.
275         */
276        uint32_t src_offset;
277
278        /** size in bytes of this ubo range */
279        uint32_t size;
280
281        /**
282         * Set if this range is used by the shader for indirect uniforms
283         * access.
284         */
285        bool used;
286};
287
288struct vc4_key {
289        struct vc4_uncompiled_shader *shader_state;
290        struct {
291                enum pipe_format format;
292                unsigned compare_mode:1;
293                unsigned compare_func:3;
294                unsigned wrap_s:3;
295                unsigned wrap_t:3;
296                uint8_t swizzle[4];
297        } tex[VC4_MAX_TEXTURE_SAMPLERS];
298        uint8_t ucp_enables;
299};
300
301struct vc4_fs_key {
302        struct vc4_key base;
303        enum pipe_format color_format;
304        bool depth_enabled;
305        bool stencil_enabled;
306        bool stencil_twoside;
307        bool stencil_full_writemasks;
308        bool is_points;
309        bool is_lines;
310        bool alpha_test;
311        bool point_coord_upper_left;
312        bool light_twoside;
313        uint8_t alpha_test_func;
314        uint8_t logicop_func;
315        uint32_t point_sprite_mask;
316
317        struct pipe_rt_blend_state blend;
318};
319
320struct vc4_vs_key {
321        struct vc4_key base;
322
323        /**
324         * This is a proxy for the array of FS input semantics, which is
325         * larger than we would want to put in the key.
326         */
327        uint64_t compiled_fs_id;
328
329        enum pipe_format attr_formats[8];
330        bool is_coord;
331        bool per_vertex_point_size;
332};
333
334struct vc4_compile {
335        struct vc4_context *vc4;
336        nir_shader *s;
337        nir_function_impl *impl;
338        struct exec_list *cf_node_list;
339
340        /**
341         * Mapping from nir_register * or nir_ssa_def * to array of struct
342         * qreg for the values.
343         */
344        struct hash_table *def_ht;
345
346        /* For each temp, the instruction generating its value. */
347        struct qinst **defs;
348        uint32_t defs_array_size;
349
350        /**
351         * Inputs to the shader, arranged by TGSI declaration order.
352         *
353         * Not all fragment shader QFILE_VARY reads are present in this array.
354         */
355        struct qreg *inputs;
356        struct qreg *outputs;
357        uint32_t inputs_array_size;
358        uint32_t outputs_array_size;
359        uint32_t uniforms_array_size;
360
361        struct vc4_compiler_ubo_range *ubo_ranges;
362        uint32_t ubo_ranges_array_size;
363        /** Number of uniform areas declared in ubo_ranges. */
364        uint32_t num_uniform_ranges;
365        /** Number of uniform areas used for indirect addressed loads. */
366        uint32_t num_ubo_ranges;
367        uint32_t next_ubo_dst_offset;
368
369        struct qreg line_x, point_x, point_y;
370        struct qreg discard;
371
372        uint8_t vattr_sizes[8];
373
374        /**
375         * Array of the TGSI semantics of all FS QFILE_VARY reads.
376         *
377         * This includes those that aren't part of the VPM varyings, like
378         * point/line coordinates.
379         */
380        struct vc4_varying_semantic *input_semantics;
381        uint32_t num_input_semantics;
382        uint32_t input_semantics_array_size;
383
384        /**
385         * An entry per outputs[] in the VS indicating what the semantic of
386         * the output is.  Used to emit from the VS in the order that the FS
387         * needs.
388         */
389        struct vc4_varying_semantic *output_semantics;
390
391        struct pipe_shader_state *shader_state;
392        struct vc4_key *key;
393        struct vc4_fs_key *fs_key;
394        struct vc4_vs_key *vs_key;
395
396        uint32_t *uniform_data;
397        enum quniform_contents *uniform_contents;
398        uint32_t uniform_array_size;
399        uint32_t num_uniforms;
400        uint32_t num_outputs;
401        uint32_t num_texture_samples;
402        uint32_t output_position_index;
403        uint32_t output_clipvertex_index;
404        uint32_t output_color_index;
405        uint32_t output_point_size_index;
406
407        struct qreg undef;
408        enum qstage stage;
409        uint32_t num_temps;
410        struct list_head instructions;
411        uint32_t immediates[1024];
412
413        struct list_head qpu_inst_list;
414        uint64_t *qpu_insts;
415        uint32_t qpu_inst_count;
416        uint32_t qpu_inst_size;
417        uint32_t num_inputs;
418
419        uint32_t program_id;
420        uint32_t variant_id;
421};
422
423/* Special nir_load_input intrinsic index for loading the current TLB
424 * destination color.
425 */
426#define VC4_NIR_TLB_COLOR_READ_INPUT		2000000000
427
428/* Special offset for nir_load_uniform values to get a QUNIFORM_*
429 * state-dependent value.
430 */
431#define VC4_NIR_STATE_UNIFORM_OFFSET		2000000000
432
433struct vc4_compile *qir_compile_init(void);
434void qir_compile_destroy(struct vc4_compile *c);
435struct qinst *qir_inst(enum qop op, struct qreg dst,
436                       struct qreg src0, struct qreg src1);
437struct qinst *qir_inst4(enum qop op, struct qreg dst,
438                        struct qreg a,
439                        struct qreg b,
440                        struct qreg c,
441                        struct qreg d);
442void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);
443struct qreg qir_uniform(struct vc4_compile *c,
444                        enum quniform_contents contents,
445                        uint32_t data);
446void qir_reorder_uniforms(struct vc4_compile *c);
447
448void qir_emit(struct vc4_compile *c, struct qinst *inst);
449static inline void qir_emit_nodef(struct vc4_compile *c, struct qinst *inst)
450{
451        list_addtail(&inst->link, &c->instructions);
452}
453
454struct qreg qir_get_temp(struct vc4_compile *c);
455int qir_get_op_nsrc(enum qop qop);
456bool qir_reg_equals(struct qreg a, struct qreg b);
457bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
458bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
459bool qir_is_multi_instruction(struct qinst *inst);
460bool qir_is_mul(struct qinst *inst);
461bool qir_is_tex(struct qinst *inst);
462bool qir_depends_on_flags(struct qinst *inst);
463bool qir_writes_r4(struct qinst *inst);
464bool qir_src_needs_a_file(struct qinst *inst);
465struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
466
467void qir_dump(struct vc4_compile *c);
468void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
469const char *qir_get_stage_name(enum qstage stage);
470
471void qir_optimize(struct vc4_compile *c);
472bool qir_opt_algebraic(struct vc4_compile *c);
473bool qir_opt_constant_folding(struct vc4_compile *c);
474bool qir_opt_copy_propagation(struct vc4_compile *c);
475bool qir_opt_cse(struct vc4_compile *c);
476bool qir_opt_dead_code(struct vc4_compile *c);
477bool qir_opt_small_immediates(struct vc4_compile *c);
478bool qir_opt_vpm_writes(struct vc4_compile *c);
479void vc4_nir_lower_blend(struct vc4_compile *c);
480void vc4_nir_lower_io(struct vc4_compile *c);
481nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
482                                       enum quniform_contents contents);
483nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
484                                          nir_ssa_def **srcs, int swiz);
485void qir_lower_uniforms(struct vc4_compile *c);
486
487void qpu_schedule_instructions(struct vc4_compile *c);
488
489void qir_SF(struct vc4_compile *c, struct qreg src);
490
491static inline struct qreg
492qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
493{
494        return qir_uniform(c, QUNIFORM_CONSTANT, ui);
495}
496
497static inline struct qreg
498qir_uniform_f(struct vc4_compile *c, float f)
499{
500        return qir_uniform(c, QUNIFORM_CONSTANT, fui(f));
501}
502
503#define QIR_ALU0(name)                                                   \
504static inline struct qreg                                                \
505qir_##name(struct vc4_compile *c)                                        \
506{                                                                        \
507        struct qreg t = qir_get_temp(c);                                 \
508        qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef));        \
509        return t;                                                        \
510}
511
512#define QIR_ALU1(name)                                                   \
513static inline struct qreg                                                \
514qir_##name(struct vc4_compile *c, struct qreg a)                         \
515{                                                                        \
516        struct qreg t = qir_get_temp(c);                                 \
517        qir_emit(c, qir_inst(QOP_##name, t, a, c->undef));               \
518        return t;                                                        \
519}                                                                        \
520static inline void                                                       \
521qir_##name##_dest(struct vc4_compile *c, struct qreg dest,               \
522                  struct qreg a)                                         \
523{                                                                        \
524        qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef));      \
525}
526
527#define QIR_ALU2(name)                                                   \
528static inline struct qreg                                                \
529qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)          \
530{                                                                        \
531        struct qreg t = qir_get_temp(c);                                 \
532        qir_emit(c, qir_inst(QOP_##name, t, a, b));                      \
533        return t;                                                        \
534}                                                                        \
535static inline void                                                       \
536qir_##name##_dest(struct vc4_compile *c, struct qreg dest,               \
537                  struct qreg a, struct qreg b)                          \
538{                                                                        \
539        qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, b));             \
540}
541
542#define QIR_NODST_1(name)                                               \
543static inline void                                                      \
544qir_##name(struct vc4_compile *c, struct qreg a)                        \
545{                                                                       \
546        qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef));       \
547}
548
549#define QIR_NODST_2(name)                                               \
550static inline void                                                      \
551qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)         \
552{                                                                       \
553        qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
554}
555
556#define QIR_PACK(name)                                                   \
557static inline struct qreg                                                \
558qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a)       \
559{                                                                        \
560        qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef));      \
561        return dest;                                                     \
562}
563
564QIR_ALU1(MOV)
565QIR_ALU2(FADD)
566QIR_ALU2(FSUB)
567QIR_ALU2(FMUL)
568QIR_ALU2(MUL24)
569QIR_ALU1(SEL_X_0_ZS)
570QIR_ALU1(SEL_X_0_ZC)
571QIR_ALU1(SEL_X_0_NS)
572QIR_ALU1(SEL_X_0_NC)
573QIR_ALU2(SEL_X_Y_ZS)
574QIR_ALU2(SEL_X_Y_ZC)
575QIR_ALU2(SEL_X_Y_NS)
576QIR_ALU2(SEL_X_Y_NC)
577QIR_ALU2(FMIN)
578QIR_ALU2(FMAX)
579QIR_ALU2(FMINABS)
580QIR_ALU2(FMAXABS)
581QIR_ALU1(FTOI)
582QIR_ALU1(ITOF)
583
584QIR_ALU2(ADD)
585QIR_ALU2(SUB)
586QIR_ALU2(SHL)
587QIR_ALU2(SHR)
588QIR_ALU2(ASR)
589QIR_ALU2(MIN)
590QIR_ALU2(MAX)
591QIR_ALU2(AND)
592QIR_ALU2(OR)
593QIR_ALU2(XOR)
594QIR_ALU1(NOT)
595
596QIR_ALU1(RCP)
597QIR_ALU1(RSQ)
598QIR_ALU1(EXP2)
599QIR_ALU1(LOG2)
600QIR_ALU1(PACK_8888_F)
601QIR_PACK(PACK_8A_F)
602QIR_PACK(PACK_8B_F)
603QIR_PACK(PACK_8C_F)
604QIR_PACK(PACK_8D_F)
605QIR_ALU1(VARY_ADD_C)
606QIR_NODST_2(TEX_S)
607QIR_NODST_2(TEX_T)
608QIR_NODST_2(TEX_R)
609QIR_NODST_2(TEX_B)
610QIR_NODST_2(TEX_DIRECT)
611QIR_ALU0(FRAG_X)
612QIR_ALU0(FRAG_Y)
613QIR_ALU0(FRAG_Z)
614QIR_ALU0(FRAG_W)
615QIR_ALU0(FRAG_REV_FLAG)
616QIR_ALU0(TEX_RESULT)
617QIR_ALU0(TLB_COLOR_READ)
618QIR_NODST_1(TLB_COLOR_WRITE)
619QIR_NODST_1(TLB_Z_WRITE)
620QIR_NODST_1(TLB_DISCARD_SETUP)
621QIR_NODST_1(TLB_STENCIL_SETUP)
622
623static inline struct qreg
624qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
625{
626        struct qreg t = qir_get_temp(c);
627        qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef));
628        return t;
629}
630
631static inline struct qreg
632qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
633{
634        struct qreg t = qir_get_temp(c);
635        qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef));
636        return t;
637}
638
639static inline struct qreg
640qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
641{
642        struct qreg t = qir_get_temp(c);
643        qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef));
644        return t;
645}
646
647static inline struct qreg
648qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
649{
650        struct qreg t = qir_get_temp(c);
651        qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef));
652        return t;
653}
654
655static inline struct qreg
656qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)
657{
658        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef));
659        if (dest.file == QFILE_TEMP)
660                c->defs[dest.index] = NULL;
661        return dest;
662}
663
664static inline struct qreg
665qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
666{
667        return qir_EXP2(c, qir_FMUL(c,
668                                    y,
669                                    qir_LOG2(c, x)));
670}
671
672static inline void
673qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)
674{
675        static const struct qreg vpm = { QFILE_VPM, 0 };
676        qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef));
677}
678
679#endif /* VC4_QIR_H */
680