vc4_qir.h revision 73e2d4837d7e4611f31532ab0ccc14369341e0cb
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_QIR_H
25#define VC4_QIR_H
26
27#include <assert.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <stdbool.h>
31#include <stdint.h>
32#include <string.h>
33
34#include "util/macros.h"
35#include "glsl/nir/nir.h"
36#include "util/simple_list.h"
37#include "util/u_math.h"
38
39enum qfile {
40        QFILE_NULL,
41        QFILE_TEMP,
42        QFILE_VARY,
43        QFILE_UNIF,
44        QFILE_VPM,
45
46        /**
47         * Stores an immediate value in the index field that can be turned
48         * into a small immediate field by qpu_encode_small_immediate().
49         */
50        QFILE_SMALL_IMM,
51};
52
53struct qreg {
54        enum qfile file;
55        uint32_t index;
56};
57
58enum qop {
59        QOP_UNDEF,
60        QOP_MOV,
61        QOP_FADD,
62        QOP_FSUB,
63        QOP_FMUL,
64        QOP_MUL24,
65        QOP_FMIN,
66        QOP_FMAX,
67        QOP_FMINABS,
68        QOP_FMAXABS,
69        QOP_ADD,
70        QOP_SUB,
71        QOP_SHL,
72        QOP_SHR,
73        QOP_ASR,
74        QOP_MIN,
75        QOP_MAX,
76        QOP_AND,
77        QOP_OR,
78        QOP_XOR,
79        QOP_NOT,
80
81        /* Note: Orderings of these compares must be the same as in
82         * qpu_defines.h.  Selects the src[0] if the ns flag bit is set,
83         * otherwise 0. */
84        QOP_SEL_X_0_ZS,
85        QOP_SEL_X_0_ZC,
86        QOP_SEL_X_0_NS,
87        QOP_SEL_X_0_NC,
88        /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
89        QOP_SEL_X_Y_ZS,
90        QOP_SEL_X_Y_ZC,
91        QOP_SEL_X_Y_NS,
92        QOP_SEL_X_Y_NC,
93
94        QOP_FTOI,
95        QOP_ITOF,
96        QOP_RCP,
97        QOP_RSQ,
98        QOP_EXP2,
99        QOP_LOG2,
100        QOP_VW_SETUP,
101        QOP_VR_SETUP,
102        QOP_PACK_SCALED,
103        QOP_PACK_8888_F,
104        QOP_PACK_8A_F,
105        QOP_PACK_8B_F,
106        QOP_PACK_8C_F,
107        QOP_PACK_8D_F,
108        QOP_TLB_DISCARD_SETUP,
109        QOP_TLB_STENCIL_SETUP,
110        QOP_TLB_Z_WRITE,
111        QOP_TLB_COLOR_WRITE,
112        QOP_TLB_COLOR_READ,
113        QOP_VARY_ADD_C,
114
115        QOP_FRAG_X,
116        QOP_FRAG_Y,
117        QOP_FRAG_Z,
118        QOP_FRAG_W,
119        QOP_FRAG_REV_FLAG,
120
121        QOP_UNPACK_8A_F,
122        QOP_UNPACK_8B_F,
123        QOP_UNPACK_8C_F,
124        QOP_UNPACK_8D_F,
125        QOP_UNPACK_16A_F,
126        QOP_UNPACK_16B_F,
127
128        QOP_UNPACK_8A_I,
129        QOP_UNPACK_8B_I,
130        QOP_UNPACK_8C_I,
131        QOP_UNPACK_8D_I,
132        QOP_UNPACK_16A_I,
133        QOP_UNPACK_16B_I,
134
135        /** Texture x coordinate parameter write */
136        QOP_TEX_S,
137        /** Texture y coordinate parameter write */
138        QOP_TEX_T,
139        /** Texture border color parameter or cube map z coordinate write */
140        QOP_TEX_R,
141        /** Texture LOD bias parameter write */
142        QOP_TEX_B,
143
144        /**
145         * Texture-unit 4-byte read with address provided direct in S
146         * cooordinate.
147         *
148         * The first operand is the offset from the start of the UBO, and the
149         * second is the uniform that has the UBO's base pointer.
150         */
151        QOP_TEX_DIRECT,
152
153        /**
154         * Signal of texture read being necessary and then reading r4 into
155         * the destination
156         */
157        QOP_TEX_RESULT,
158        QOP_R4_UNPACK_A,
159        QOP_R4_UNPACK_B,
160        QOP_R4_UNPACK_C,
161        QOP_R4_UNPACK_D
162};
163
164struct queued_qpu_inst {
165        struct simple_node link;
166        uint64_t inst;
167};
168
169struct qinst {
170        struct simple_node link;
171
172        enum qop op;
173        struct qreg dst;
174        struct qreg *src;
175        bool sf;
176};
177
178enum qstage {
179        /**
180         * Coordinate shader, runs during binning, before the VS, and just
181         * outputs position.
182         */
183        QSTAGE_COORD,
184        QSTAGE_VERT,
185        QSTAGE_FRAG,
186};
187
188enum quniform_contents {
189        /**
190         * Indicates that a constant 32-bit value is copied from the program's
191         * uniform contents.
192         */
193        QUNIFORM_CONSTANT,
194        /**
195         * Indicates that the program's uniform contents are used as an index
196         * into the GL uniform storage.
197         */
198        QUNIFORM_UNIFORM,
199
200        /** @{
201         * Scaling factors from clip coordinates to relative to the viewport
202         * center.
203         *
204         * This is used by the coordinate and vertex shaders to produce the
205         * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
206         * point offsets from the viewport ccenter.
207         */
208        QUNIFORM_VIEWPORT_X_SCALE,
209        QUNIFORM_VIEWPORT_Y_SCALE,
210        /** @} */
211
212        QUNIFORM_VIEWPORT_Z_OFFSET,
213        QUNIFORM_VIEWPORT_Z_SCALE,
214
215        QUNIFORM_USER_CLIP_PLANE,
216
217        /**
218         * A reference to a texture config parameter 0 uniform.
219         *
220         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
221         * defines texture type, miplevels, and such.  It will be found as a
222         * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
223         */
224        QUNIFORM_TEXTURE_CONFIG_P0,
225
226        /**
227         * A reference to a texture config parameter 1 uniform.
228         *
229         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
230         * defines texture width, height, filters, and wrap modes.  It will be
231         * found as a parameter to the second QOP_TEX_[STRB] instruction in a
232         * sequence.
233         */
234        QUNIFORM_TEXTURE_CONFIG_P1,
235
236        /** A reference to a texture config parameter 2 cubemap stride uniform */
237        QUNIFORM_TEXTURE_CONFIG_P2,
238
239        QUNIFORM_UBO_ADDR,
240
241        QUNIFORM_TEXRECT_SCALE_X,
242        QUNIFORM_TEXRECT_SCALE_Y,
243
244        QUNIFORM_TEXTURE_BORDER_COLOR,
245
246        QUNIFORM_BLEND_CONST_COLOR,
247        QUNIFORM_STENCIL,
248
249        QUNIFORM_ALPHA_REF,
250};
251
252struct vc4_varying_semantic {
253        uint8_t semantic;
254        uint8_t index;
255        uint8_t swizzle;
256};
257
258struct vc4_compiler_ubo_range {
259        /**
260         * offset in bytes from the start of the ubo where this range is
261         * uploaded.
262         *
263         * Only set once used is set.
264         */
265        uint32_t dst_offset;
266
267        /**
268         * offset in bytes from the start of the gallium uniforms where the
269         * data comes from.
270         */
271        uint32_t src_offset;
272
273        /** size in bytes of this ubo range */
274        uint32_t size;
275
276        /**
277         * Set if this range is used by the shader for indirect uniforms
278         * access.
279         */
280        bool used;
281};
282
283struct vc4_compile {
284        struct vc4_context *vc4;
285        nir_shader *s;
286        nir_function_impl *impl;
287        struct exec_list *cf_node_list;
288
289        /**
290         * Mapping from nir_register * or nir_ssa_def * to array of struct
291         * qreg for the values.
292         */
293        struct hash_table *def_ht;
294
295        /* For each temp, the instruction generating its value. */
296        struct qinst **defs;
297        uint32_t defs_array_size;
298
299        /**
300         * Inputs to the shader, arranged by TGSI declaration order.
301         *
302         * Not all fragment shader QFILE_VARY reads are present in this array.
303         */
304        struct qreg *inputs;
305        struct qreg *outputs;
306        uint32_t inputs_array_size;
307        uint32_t outputs_array_size;
308        uint32_t uniforms_array_size;
309
310        struct vc4_compiler_ubo_range *ubo_ranges;
311        uint32_t ubo_ranges_array_size;
312        /** Number of uniform areas declared in ubo_ranges. */
313        uint32_t num_uniform_ranges;
314        /** Number of uniform areas used for indirect addressed loads. */
315        uint32_t num_ubo_ranges;
316        uint32_t next_ubo_dst_offset;
317
318        struct qreg line_x, point_x, point_y;
319        struct qreg discard;
320
321        uint8_t vattr_sizes[8];
322
323        /**
324         * Array of the TGSI semantics of all FS QFILE_VARY reads.
325         *
326         * This includes those that aren't part of the VPM varyings, like
327         * point/line coordinates.
328         */
329        struct vc4_varying_semantic *input_semantics;
330        uint32_t num_input_semantics;
331        uint32_t input_semantics_array_size;
332
333        /**
334         * An entry per outputs[] in the VS indicating what the semantic of
335         * the output is.  Used to emit from the VS in the order that the FS
336         * needs.
337         */
338        struct vc4_varying_semantic *output_semantics;
339
340        struct pipe_shader_state *shader_state;
341        struct vc4_key *key;
342        struct vc4_fs_key *fs_key;
343        struct vc4_vs_key *vs_key;
344
345        uint32_t *uniform_data;
346        enum quniform_contents *uniform_contents;
347        uint32_t uniform_array_size;
348        uint32_t num_uniforms;
349        uint32_t num_outputs;
350        uint32_t num_texture_samples;
351        uint32_t output_position_index;
352        uint32_t output_clipvertex_index;
353        uint32_t output_color_index;
354        uint32_t output_point_size_index;
355
356        struct qreg undef;
357        enum qstage stage;
358        uint32_t num_temps;
359        struct simple_node instructions;
360        uint32_t immediates[1024];
361
362        struct simple_node qpu_inst_list;
363        uint64_t *qpu_insts;
364        uint32_t qpu_inst_count;
365        uint32_t qpu_inst_size;
366        uint32_t num_inputs;
367
368        uint32_t program_id;
369        uint32_t variant_id;
370};
371
372struct vc4_compile *qir_compile_init(void);
373void qir_compile_destroy(struct vc4_compile *c);
374struct qinst *qir_inst(enum qop op, struct qreg dst,
375                       struct qreg src0, struct qreg src1);
376struct qinst *qir_inst4(enum qop op, struct qreg dst,
377                        struct qreg a,
378                        struct qreg b,
379                        struct qreg c,
380                        struct qreg d);
381void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);
382struct qreg qir_uniform(struct vc4_compile *c,
383                        enum quniform_contents contents,
384                        uint32_t data);
385void qir_reorder_uniforms(struct vc4_compile *c);
386void qir_emit(struct vc4_compile *c, struct qinst *inst);
387struct qreg qir_get_temp(struct vc4_compile *c);
388int qir_get_op_nsrc(enum qop qop);
389bool qir_reg_equals(struct qreg a, struct qreg b);
390bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
391bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
392bool qir_is_multi_instruction(struct qinst *inst);
393bool qir_is_tex(struct qinst *inst);
394bool qir_depends_on_flags(struct qinst *inst);
395bool qir_writes_r4(struct qinst *inst);
396bool qir_reads_r4(struct qinst *inst);
397bool qir_src_needs_a_file(struct qinst *inst);
398struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
399
400void qir_dump(struct vc4_compile *c);
401void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
402const char *qir_get_stage_name(enum qstage stage);
403
404void qir_optimize(struct vc4_compile *c);
405bool qir_opt_algebraic(struct vc4_compile *c);
406bool qir_opt_constant_folding(struct vc4_compile *c);
407bool qir_opt_copy_propagation(struct vc4_compile *c);
408bool qir_opt_cse(struct vc4_compile *c);
409bool qir_opt_dead_code(struct vc4_compile *c);
410bool qir_opt_small_immediates(struct vc4_compile *c);
411bool qir_opt_vpm_writes(struct vc4_compile *c);
412void qir_lower_uniforms(struct vc4_compile *c);
413
414void qpu_schedule_instructions(struct vc4_compile *c);
415
416void qir_SF(struct vc4_compile *c, struct qreg src);
417
418static inline struct qreg
419qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
420{
421        return qir_uniform(c, QUNIFORM_CONSTANT, ui);
422}
423
424static inline struct qreg
425qir_uniform_f(struct vc4_compile *c, float f)
426{
427        return qir_uniform(c, QUNIFORM_CONSTANT, fui(f));
428}
429
430#define QIR_ALU0(name)                                                   \
431static inline struct qreg                                                \
432qir_##name(struct vc4_compile *c)                                        \
433{                                                                        \
434        struct qreg t = qir_get_temp(c);                                 \
435        qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef));        \
436        return t;                                                        \
437}
438
439#define QIR_ALU1(name)                                                   \
440static inline struct qreg                                                \
441qir_##name(struct vc4_compile *c, struct qreg a)                         \
442{                                                                        \
443        struct qreg t = qir_get_temp(c);                                 \
444        qir_emit(c, qir_inst(QOP_##name, t, a, c->undef));               \
445        return t;                                                        \
446}
447
448#define QIR_ALU2(name)                                                   \
449static inline struct qreg                                                \
450qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)          \
451{                                                                        \
452        struct qreg t = qir_get_temp(c);                                 \
453        qir_emit(c, qir_inst(QOP_##name, t, a, b));                      \
454        return t;                                                        \
455}
456
457#define QIR_NODST_1(name)                                               \
458static inline void                                                      \
459qir_##name(struct vc4_compile *c, struct qreg a)                        \
460{                                                                       \
461        qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef));       \
462}
463
464#define QIR_NODST_2(name)                                               \
465static inline void                                                      \
466qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)         \
467{                                                                       \
468        qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
469}
470
471QIR_ALU1(MOV)
472QIR_ALU2(FADD)
473QIR_ALU2(FSUB)
474QIR_ALU2(FMUL)
475QIR_ALU2(MUL24)
476QIR_ALU1(SEL_X_0_ZS)
477QIR_ALU1(SEL_X_0_ZC)
478QIR_ALU1(SEL_X_0_NS)
479QIR_ALU1(SEL_X_0_NC)
480QIR_ALU2(SEL_X_Y_ZS)
481QIR_ALU2(SEL_X_Y_ZC)
482QIR_ALU2(SEL_X_Y_NS)
483QIR_ALU2(SEL_X_Y_NC)
484QIR_ALU2(FMIN)
485QIR_ALU2(FMAX)
486QIR_ALU2(FMINABS)
487QIR_ALU2(FMAXABS)
488QIR_ALU1(FTOI)
489QIR_ALU1(ITOF)
490
491QIR_ALU2(ADD)
492QIR_ALU2(SUB)
493QIR_ALU2(SHL)
494QIR_ALU2(SHR)
495QIR_ALU2(ASR)
496QIR_ALU2(MIN)
497QIR_ALU2(MAX)
498QIR_ALU2(AND)
499QIR_ALU2(OR)
500QIR_ALU2(XOR)
501QIR_ALU1(NOT)
502
503QIR_ALU1(RCP)
504QIR_ALU1(RSQ)
505QIR_ALU1(EXP2)
506QIR_ALU1(LOG2)
507QIR_ALU2(PACK_SCALED)
508QIR_ALU1(PACK_8888_F)
509QIR_ALU2(PACK_8A_F)
510QIR_ALU2(PACK_8B_F)
511QIR_ALU2(PACK_8C_F)
512QIR_ALU2(PACK_8D_F)
513QIR_ALU1(VARY_ADD_C)
514QIR_NODST_2(TEX_S)
515QIR_NODST_2(TEX_T)
516QIR_NODST_2(TEX_R)
517QIR_NODST_2(TEX_B)
518QIR_NODST_2(TEX_DIRECT)
519QIR_ALU0(FRAG_X)
520QIR_ALU0(FRAG_Y)
521QIR_ALU0(FRAG_Z)
522QIR_ALU0(FRAG_W)
523QIR_ALU0(FRAG_REV_FLAG)
524QIR_ALU0(TEX_RESULT)
525QIR_ALU0(TLB_COLOR_READ)
526QIR_NODST_1(TLB_Z_WRITE)
527QIR_NODST_1(TLB_DISCARD_SETUP)
528QIR_NODST_1(TLB_STENCIL_SETUP)
529
530static inline struct qreg
531qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
532{
533        struct qreg t = qir_get_temp(c);
534        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef));
535        return t;
536}
537
538static inline struct qreg
539qir_SEL_X_0_COND(struct vc4_compile *c, int i)
540{
541        struct qreg t = qir_get_temp(c);
542        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
543        return t;
544}
545
546static inline struct qreg
547qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
548{
549        struct qreg t = qir_get_temp(c);
550        qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef));
551        return t;
552}
553
554static inline struct qreg
555qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
556{
557        struct qreg t = qir_get_temp(c);
558        qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef));
559        return t;
560}
561
562static inline struct qreg
563qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
564{
565        struct qreg t = qir_get_temp(c);
566        qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef));
567        return t;
568}
569
570static inline struct qreg
571qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
572{
573        struct qreg t = qir_get_temp(c);
574        qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef));
575        return t;
576}
577
578static inline struct qreg
579qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
580{
581        struct qreg t = qir_get_temp(c);
582        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
583        return t;
584}
585
586static inline struct qreg
587qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
588{
589        return qir_EXP2(c, qir_FMUL(c,
590                                    y,
591                                    qir_LOG2(c, x)));
592}
593
594static inline void
595qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)
596{
597        static const struct qreg vpm = { QFILE_VPM, 0 };
598        qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef));
599}
600
601#endif /* VC4_QIR_H */
602