vc4_qir.h revision 8c5dcdbccb68b73d2856d9c1faafadc536e682e3
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_QIR_H
25#define VC4_QIR_H
26
27#include <assert.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <stdbool.h>
31#include <stdint.h>
32#include <string.h>
33
34#include "util/macros.h"
35#include "util/simple_list.h"
36#include "util/u_math.h"
37#include "tgsi/tgsi_parse.h"
38
39enum qfile {
40        QFILE_NULL,
41        QFILE_TEMP,
42        QFILE_VARY,
43        QFILE_UNIF,
44        QFILE_VPM,
45
46        /**
47         * Stores an immediate value in the index field that can be turned
48         * into a small immediate field by qpu_encode_small_immediate().
49         */
50        QFILE_SMALL_IMM,
51};
52
53struct qreg {
54        enum qfile file;
55        uint32_t index;
56};
57
58enum qop {
59        QOP_UNDEF,
60        QOP_MOV,
61        QOP_FADD,
62        QOP_FSUB,
63        QOP_FMUL,
64        QOP_MUL24,
65        QOP_FMIN,
66        QOP_FMAX,
67        QOP_FMINABS,
68        QOP_FMAXABS,
69        QOP_ADD,
70        QOP_SUB,
71        QOP_SHL,
72        QOP_SHR,
73        QOP_ASR,
74        QOP_MIN,
75        QOP_MAX,
76        QOP_AND,
77        QOP_OR,
78        QOP_XOR,
79        QOP_NOT,
80
81        /* Note: Orderings of these compares must be the same as in
82         * qpu_defines.h.  Selects the src[0] if the ns flag bit is set,
83         * otherwise 0. */
84        QOP_SEL_X_0_ZS,
85        QOP_SEL_X_0_ZC,
86        QOP_SEL_X_0_NS,
87        QOP_SEL_X_0_NC,
88        /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
89        QOP_SEL_X_Y_ZS,
90        QOP_SEL_X_Y_ZC,
91        QOP_SEL_X_Y_NS,
92        QOP_SEL_X_Y_NC,
93
94        QOP_FTOI,
95        QOP_ITOF,
96        QOP_RCP,
97        QOP_RSQ,
98        QOP_EXP2,
99        QOP_LOG2,
100        QOP_VW_SETUP,
101        QOP_VR_SETUP,
102        QOP_PACK_SCALED,
103        QOP_PACK_8888_F,
104        QOP_PACK_8A_F,
105        QOP_PACK_8B_F,
106        QOP_PACK_8C_F,
107        QOP_PACK_8D_F,
108        QOP_TLB_DISCARD_SETUP,
109        QOP_TLB_STENCIL_SETUP,
110        QOP_TLB_Z_WRITE,
111        QOP_TLB_COLOR_WRITE,
112        QOP_TLB_COLOR_READ,
113        QOP_VARY_ADD_C,
114
115        QOP_FRAG_X,
116        QOP_FRAG_Y,
117        QOP_FRAG_Z,
118        QOP_FRAG_W,
119        QOP_FRAG_REV_FLAG,
120
121        QOP_UNPACK_8A_F,
122        QOP_UNPACK_8B_F,
123        QOP_UNPACK_8C_F,
124        QOP_UNPACK_8D_F,
125        QOP_UNPACK_16A_F,
126        QOP_UNPACK_16B_F,
127
128        QOP_UNPACK_8A_I,
129        QOP_UNPACK_8B_I,
130        QOP_UNPACK_8C_I,
131        QOP_UNPACK_8D_I,
132        QOP_UNPACK_16A_I,
133        QOP_UNPACK_16B_I,
134
135        /** Texture x coordinate parameter write */
136        QOP_TEX_S,
137        /** Texture y coordinate parameter write */
138        QOP_TEX_T,
139        /** Texture border color parameter or cube map z coordinate write */
140        QOP_TEX_R,
141        /** Texture LOD bias parameter write */
142        QOP_TEX_B,
143
144        /**
145         * Texture-unit 4-byte read with address provided direct in S
146         * cooordinate.
147         *
148         * The first operand is the offset from the start of the UBO, and the
149         * second is the uniform that has the UBO's base pointer.
150         */
151        QOP_TEX_DIRECT,
152
153        /**
154         * Signal of texture read being necessary and then reading r4 into
155         * the destination
156         */
157        QOP_TEX_RESULT,
158        QOP_R4_UNPACK_A,
159        QOP_R4_UNPACK_B,
160        QOP_R4_UNPACK_C,
161        QOP_R4_UNPACK_D
162};
163
164struct queued_qpu_inst {
165        struct simple_node link;
166        uint64_t inst;
167};
168
169struct qinst {
170        struct simple_node link;
171
172        enum qop op;
173        struct qreg dst;
174        struct qreg *src;
175        bool sf;
176};
177
178enum qstage {
179        /**
180         * Coordinate shader, runs during binning, before the VS, and just
181         * outputs position.
182         */
183        QSTAGE_COORD,
184        QSTAGE_VERT,
185        QSTAGE_FRAG,
186};
187
188enum quniform_contents {
189        /**
190         * Indicates that a constant 32-bit value is copied from the program's
191         * uniform contents.
192         */
193        QUNIFORM_CONSTANT,
194        /**
195         * Indicates that the program's uniform contents are used as an index
196         * into the GL uniform storage.
197         */
198        QUNIFORM_UNIFORM,
199
200        /** @{
201         * Scaling factors from clip coordinates to relative to the viewport
202         * center.
203         *
204         * This is used by the coordinate and vertex shaders to produce the
205         * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
206         * point offsets from the viewport ccenter.
207         */
208        QUNIFORM_VIEWPORT_X_SCALE,
209        QUNIFORM_VIEWPORT_Y_SCALE,
210        /** @} */
211
212        QUNIFORM_VIEWPORT_Z_OFFSET,
213        QUNIFORM_VIEWPORT_Z_SCALE,
214
215        QUNIFORM_USER_CLIP_PLANE,
216
217        /**
218         * A reference to a texture config parameter 0 uniform.
219         *
220         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
221         * defines texture type, miplevels, and such.  It will be found as a
222         * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
223         */
224        QUNIFORM_TEXTURE_CONFIG_P0,
225
226        /**
227         * A reference to a texture config parameter 1 uniform.
228         *
229         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
230         * defines texture width, height, filters, and wrap modes.  It will be
231         * found as a parameter to the second QOP_TEX_[STRB] instruction in a
232         * sequence.
233         */
234        QUNIFORM_TEXTURE_CONFIG_P1,
235
236        /** A reference to a texture config parameter 2 cubemap stride uniform */
237        QUNIFORM_TEXTURE_CONFIG_P2,
238
239        QUNIFORM_UBO_ADDR,
240
241        QUNIFORM_TEXRECT_SCALE_X,
242        QUNIFORM_TEXRECT_SCALE_Y,
243
244        QUNIFORM_TEXTURE_BORDER_COLOR,
245
246        QUNIFORM_BLEND_CONST_COLOR,
247        QUNIFORM_STENCIL,
248
249        QUNIFORM_ALPHA_REF,
250};
251
252struct vc4_varying_semantic {
253        uint8_t semantic;
254        uint8_t index;
255        uint8_t swizzle;
256};
257
258struct vc4_compiler_ubo_range {
259        /**
260         * offset in bytes from the start of the ubo where this range is
261         * uploaded.
262         *
263         * Only set once used is set.
264         */
265        uint32_t dst_offset;
266
267        /**
268         * offset in bytes from the start of the gallium uniforms where the
269         * data comes from.
270         */
271        uint32_t src_offset;
272
273        /** size in bytes of this ubo range */
274        uint32_t size;
275
276        /**
277         * Set if this range is used by the shader for indirect uniforms
278         * access.
279         */
280        bool used;
281};
282
283struct vc4_compile {
284        struct vc4_context *vc4;
285        struct tgsi_parse_context parser;
286        struct qreg *temps;
287        /* For each temp, the instruction generating its value. */
288        struct qinst **defs;
289        uint32_t defs_array_size;
290        /**
291         * Inputs to the shader, arranged by TGSI declaration order.
292         *
293         * Not all fragment shader QFILE_VARY reads are present in this array.
294         */
295        struct qreg *inputs;
296        struct qreg *outputs;
297        struct qreg *consts;
298        struct qreg addr[4]; /* TGSI ARL destination. */
299        uint32_t temps_array_size;
300        uint32_t inputs_array_size;
301        uint32_t outputs_array_size;
302        uint32_t uniforms_array_size;
303        uint32_t consts_array_size;
304        uint32_t num_consts;
305
306        struct vc4_compiler_ubo_range *ubo_ranges;
307        uint32_t ubo_ranges_array_size;
308        uint32_t num_ubo_ranges;
309        uint32_t next_ubo_dst_offset;
310
311        struct qreg line_x, point_x, point_y;
312        struct qreg discard;
313
314        uint8_t vattr_sizes[8];
315
316        /**
317         * Array of the TGSI semantics of all FS QFILE_VARY reads.
318         *
319         * This includes those that aren't part of the VPM varyings, like
320         * point/line coordinates.
321         */
322        struct vc4_varying_semantic *input_semantics;
323        uint32_t num_input_semantics;
324        uint32_t input_semantics_array_size;
325
326        /**
327         * An entry per outputs[] in the VS indicating what the semantic of
328         * the output is.  Used to emit from the VS in the order that the FS
329         * needs.
330         */
331        struct vc4_varying_semantic *output_semantics;
332
333        struct pipe_shader_state *shader_state;
334        struct vc4_key *key;
335        struct vc4_fs_key *fs_key;
336        struct vc4_vs_key *vs_key;
337
338        uint32_t *uniform_data;
339        enum quniform_contents *uniform_contents;
340        uint32_t uniform_array_size;
341        uint32_t num_uniforms;
342        uint32_t num_outputs;
343        uint32_t num_texture_samples;
344        uint32_t output_position_index;
345        uint32_t output_clipvertex_index;
346        uint32_t output_color_index;
347        uint32_t output_point_size_index;
348
349        struct qreg undef;
350        enum qstage stage;
351        uint32_t num_temps;
352        struct simple_node instructions;
353        uint32_t immediates[1024];
354
355        struct simple_node qpu_inst_list;
356        uint64_t *qpu_insts;
357        uint32_t qpu_inst_count;
358        uint32_t qpu_inst_size;
359        uint32_t num_inputs;
360
361        uint32_t program_id;
362        uint32_t variant_id;
363};
364
365struct vc4_compile *qir_compile_init(void);
366void qir_compile_destroy(struct vc4_compile *c);
367struct qinst *qir_inst(enum qop op, struct qreg dst,
368                       struct qreg src0, struct qreg src1);
369struct qinst *qir_inst4(enum qop op, struct qreg dst,
370                        struct qreg a,
371                        struct qreg b,
372                        struct qreg c,
373                        struct qreg d);
374void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);
375struct qreg qir_uniform(struct vc4_compile *c,
376                        enum quniform_contents contents,
377                        uint32_t data);
378void qir_reorder_uniforms(struct vc4_compile *c);
379void qir_emit(struct vc4_compile *c, struct qinst *inst);
380struct qreg qir_get_temp(struct vc4_compile *c);
381int qir_get_op_nsrc(enum qop qop);
382bool qir_reg_equals(struct qreg a, struct qreg b);
383bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
384bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
385bool qir_is_multi_instruction(struct qinst *inst);
386bool qir_is_tex(struct qinst *inst);
387bool qir_depends_on_flags(struct qinst *inst);
388bool qir_writes_r4(struct qinst *inst);
389bool qir_reads_r4(struct qinst *inst);
390bool qir_src_needs_a_file(struct qinst *inst);
391struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
392
393void qir_dump(struct vc4_compile *c);
394void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
395const char *qir_get_stage_name(enum qstage stage);
396
397void qir_optimize(struct vc4_compile *c);
398bool qir_opt_algebraic(struct vc4_compile *c);
399bool qir_opt_constant_folding(struct vc4_compile *c);
400bool qir_opt_copy_propagation(struct vc4_compile *c);
401bool qir_opt_cse(struct vc4_compile *c);
402bool qir_opt_dead_code(struct vc4_compile *c);
403bool qir_opt_small_immediates(struct vc4_compile *c);
404bool qir_opt_vpm_writes(struct vc4_compile *c);
405void qir_lower_uniforms(struct vc4_compile *c);
406
407void qpu_schedule_instructions(struct vc4_compile *c);
408
409void qir_SF(struct vc4_compile *c, struct qreg src);
410
411static inline struct qreg
412qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
413{
414        return qir_uniform(c, QUNIFORM_CONSTANT, ui);
415}
416
417static inline struct qreg
418qir_uniform_f(struct vc4_compile *c, float f)
419{
420        return qir_uniform(c, QUNIFORM_CONSTANT, fui(f));
421}
422
423#define QIR_ALU0(name)                                                   \
424static inline struct qreg                                                \
425qir_##name(struct vc4_compile *c)                                        \
426{                                                                        \
427        struct qreg t = qir_get_temp(c);                                 \
428        qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef));        \
429        return t;                                                        \
430}
431
432#define QIR_ALU1(name)                                                   \
433static inline struct qreg                                                \
434qir_##name(struct vc4_compile *c, struct qreg a)                         \
435{                                                                        \
436        struct qreg t = qir_get_temp(c);                                 \
437        qir_emit(c, qir_inst(QOP_##name, t, a, c->undef));               \
438        return t;                                                        \
439}
440
441#define QIR_ALU2(name)                                                   \
442static inline struct qreg                                                \
443qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)          \
444{                                                                        \
445        struct qreg t = qir_get_temp(c);                                 \
446        qir_emit(c, qir_inst(QOP_##name, t, a, b));                      \
447        return t;                                                        \
448}
449
450#define QIR_NODST_1(name)                                               \
451static inline void                                                      \
452qir_##name(struct vc4_compile *c, struct qreg a)                        \
453{                                                                       \
454        qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef));       \
455}
456
457#define QIR_NODST_2(name)                                               \
458static inline void                                                      \
459qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)         \
460{                                                                       \
461        qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
462}
463
464QIR_ALU1(MOV)
465QIR_ALU2(FADD)
466QIR_ALU2(FSUB)
467QIR_ALU2(FMUL)
468QIR_ALU2(MUL24)
469QIR_ALU1(SEL_X_0_ZS)
470QIR_ALU1(SEL_X_0_ZC)
471QIR_ALU1(SEL_X_0_NS)
472QIR_ALU1(SEL_X_0_NC)
473QIR_ALU2(SEL_X_Y_ZS)
474QIR_ALU2(SEL_X_Y_ZC)
475QIR_ALU2(SEL_X_Y_NS)
476QIR_ALU2(SEL_X_Y_NC)
477QIR_ALU2(FMIN)
478QIR_ALU2(FMAX)
479QIR_ALU2(FMINABS)
480QIR_ALU2(FMAXABS)
481QIR_ALU1(FTOI)
482QIR_ALU1(ITOF)
483
484QIR_ALU2(ADD)
485QIR_ALU2(SUB)
486QIR_ALU2(SHL)
487QIR_ALU2(SHR)
488QIR_ALU2(ASR)
489QIR_ALU2(MIN)
490QIR_ALU2(MAX)
491QIR_ALU2(AND)
492QIR_ALU2(OR)
493QIR_ALU2(XOR)
494QIR_ALU1(NOT)
495
496QIR_ALU1(RCP)
497QIR_ALU1(RSQ)
498QIR_ALU1(EXP2)
499QIR_ALU1(LOG2)
500QIR_ALU2(PACK_SCALED)
501QIR_ALU1(PACK_8888_F)
502QIR_ALU2(PACK_8A_F)
503QIR_ALU2(PACK_8B_F)
504QIR_ALU2(PACK_8C_F)
505QIR_ALU2(PACK_8D_F)
506QIR_ALU1(VARY_ADD_C)
507QIR_NODST_2(TEX_S)
508QIR_NODST_2(TEX_T)
509QIR_NODST_2(TEX_R)
510QIR_NODST_2(TEX_B)
511QIR_NODST_2(TEX_DIRECT)
512QIR_ALU0(FRAG_X)
513QIR_ALU0(FRAG_Y)
514QIR_ALU0(FRAG_Z)
515QIR_ALU0(FRAG_W)
516QIR_ALU0(FRAG_REV_FLAG)
517QIR_ALU0(TEX_RESULT)
518QIR_ALU0(TLB_COLOR_READ)
519QIR_NODST_1(TLB_Z_WRITE)
520QIR_NODST_1(TLB_DISCARD_SETUP)
521QIR_NODST_1(TLB_STENCIL_SETUP)
522
523static inline struct qreg
524qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
525{
526        struct qreg t = qir_get_temp(c);
527        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef));
528        return t;
529}
530
531static inline struct qreg
532qir_SEL_X_0_COND(struct vc4_compile *c, int i)
533{
534        struct qreg t = qir_get_temp(c);
535        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
536        return t;
537}
538
539static inline struct qreg
540qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
541{
542        struct qreg t = qir_get_temp(c);
543        qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef));
544        return t;
545}
546
547static inline struct qreg
548qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
549{
550        struct qreg t = qir_get_temp(c);
551        qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef));
552        return t;
553}
554
555static inline struct qreg
556qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
557{
558        struct qreg t = qir_get_temp(c);
559        qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef));
560        return t;
561}
562
563static inline struct qreg
564qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
565{
566        struct qreg t = qir_get_temp(c);
567        qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef));
568        return t;
569}
570
571static inline struct qreg
572qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
573{
574        struct qreg t = qir_get_temp(c);
575        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
576        return t;
577}
578
579static inline struct qreg
580qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
581{
582        return qir_EXP2(c, qir_FMUL(c,
583                                    y,
584                                    qir_LOG2(c, x)));
585}
586
587static inline void
588qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)
589{
590        static const struct qreg vpm = { QFILE_VPM, 0 };
591        qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef));
592}
593
594#endif /* VC4_QIR_H */
595