vc4_qir.h revision 14dc281c1332518b6144718e1fb3845abbe23ff7
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_QIR_H
25#define VC4_QIR_H
26
27#include <assert.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <stdbool.h>
31#include <stdint.h>
32#include <string.h>
33
34#include "util/macros.h"
35#include "util/simple_list.h"
36#include "tgsi/tgsi_parse.h"
37
38enum qfile {
39        QFILE_NULL,
40        QFILE_TEMP,
41        QFILE_VARY,
42        QFILE_UNIF,
43        QFILE_VPM,
44
45        /**
46         * Stores an immediate value in the index field that can be turned
47         * into a small immediate field by qpu_encode_small_immediate().
48         */
49        QFILE_SMALL_IMM,
50};
51
52struct qreg {
53        enum qfile file;
54        uint32_t index;
55};
56
57enum qop {
58        QOP_UNDEF,
59        QOP_MOV,
60        QOP_FADD,
61        QOP_FSUB,
62        QOP_FMUL,
63        QOP_MUL24,
64        QOP_FMIN,
65        QOP_FMAX,
66        QOP_FMINABS,
67        QOP_FMAXABS,
68        QOP_ADD,
69        QOP_SUB,
70        QOP_SHL,
71        QOP_SHR,
72        QOP_ASR,
73        QOP_MIN,
74        QOP_MAX,
75        QOP_AND,
76        QOP_OR,
77        QOP_XOR,
78        QOP_NOT,
79
80        /* Note: Orderings of these compares must be the same as in
81         * qpu_defines.h.  Selects the src[0] if the ns flag bit is set,
82         * otherwise 0. */
83        QOP_SEL_X_0_ZS,
84        QOP_SEL_X_0_ZC,
85        QOP_SEL_X_0_NS,
86        QOP_SEL_X_0_NC,
87        /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
88        QOP_SEL_X_Y_ZS,
89        QOP_SEL_X_Y_ZC,
90        QOP_SEL_X_Y_NS,
91        QOP_SEL_X_Y_NC,
92
93        QOP_FTOI,
94        QOP_ITOF,
95        QOP_RCP,
96        QOP_RSQ,
97        QOP_EXP2,
98        QOP_LOG2,
99        QOP_VW_SETUP,
100        QOP_VR_SETUP,
101        QOP_PACK_SCALED,
102        QOP_PACK_8888_F,
103        QOP_PACK_8A_F,
104        QOP_PACK_8B_F,
105        QOP_PACK_8C_F,
106        QOP_PACK_8D_F,
107        QOP_TLB_DISCARD_SETUP,
108        QOP_TLB_STENCIL_SETUP,
109        QOP_TLB_Z_WRITE,
110        QOP_TLB_COLOR_WRITE,
111        QOP_TLB_COLOR_READ,
112        QOP_VARY_ADD_C,
113
114        QOP_FRAG_X,
115        QOP_FRAG_Y,
116        QOP_FRAG_Z,
117        QOP_FRAG_W,
118        QOP_FRAG_REV_FLAG,
119
120        QOP_UNPACK_8A_F,
121        QOP_UNPACK_8B_F,
122        QOP_UNPACK_8C_F,
123        QOP_UNPACK_8D_F,
124        QOP_UNPACK_16A_F,
125        QOP_UNPACK_16B_F,
126
127        QOP_UNPACK_8A_I,
128        QOP_UNPACK_8B_I,
129        QOP_UNPACK_8C_I,
130        QOP_UNPACK_8D_I,
131        QOP_UNPACK_16A_I,
132        QOP_UNPACK_16B_I,
133
134        /** Texture x coordinate parameter write */
135        QOP_TEX_S,
136        /** Texture y coordinate parameter write */
137        QOP_TEX_T,
138        /** Texture border color parameter or cube map z coordinate write */
139        QOP_TEX_R,
140        /** Texture LOD bias parameter write */
141        QOP_TEX_B,
142
143        /**
144         * Texture-unit 4-byte read with address provided direct in S
145         * cooordinate.
146         *
147         * The first operand is the offset from the start of the UBO, and the
148         * second is the uniform that has the UBO's base pointer.
149         */
150        QOP_TEX_DIRECT,
151
152        /**
153         * Signal of texture read being necessary and then reading r4 into
154         * the destination
155         */
156        QOP_TEX_RESULT,
157        QOP_R4_UNPACK_A,
158        QOP_R4_UNPACK_B,
159        QOP_R4_UNPACK_C,
160        QOP_R4_UNPACK_D
161};
162
163struct queued_qpu_inst {
164        struct simple_node link;
165        uint64_t inst;
166};
167
168struct qinst {
169        struct simple_node link;
170
171        enum qop op;
172        struct qreg dst;
173        struct qreg *src;
174        bool sf;
175};
176
177enum qstage {
178        /**
179         * Coordinate shader, runs during binning, before the VS, and just
180         * outputs position.
181         */
182        QSTAGE_COORD,
183        QSTAGE_VERT,
184        QSTAGE_FRAG,
185};
186
187enum quniform_contents {
188        /**
189         * Indicates that a constant 32-bit value is copied from the program's
190         * uniform contents.
191         */
192        QUNIFORM_CONSTANT,
193        /**
194         * Indicates that the program's uniform contents are used as an index
195         * into the GL uniform storage.
196         */
197        QUNIFORM_UNIFORM,
198
199        /** @{
200         * Scaling factors from clip coordinates to relative to the viewport
201         * center.
202         *
203         * This is used by the coordinate and vertex shaders to produce the
204         * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
205         * point offsets from the viewport ccenter.
206         */
207        QUNIFORM_VIEWPORT_X_SCALE,
208        QUNIFORM_VIEWPORT_Y_SCALE,
209        /** @} */
210
211        QUNIFORM_VIEWPORT_Z_OFFSET,
212        QUNIFORM_VIEWPORT_Z_SCALE,
213
214        QUNIFORM_USER_CLIP_PLANE,
215
216        /**
217         * A reference to a texture config parameter 0 uniform.
218         *
219         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
220         * defines texture type, miplevels, and such.  It will be found as a
221         * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
222         */
223        QUNIFORM_TEXTURE_CONFIG_P0,
224
225        /**
226         * A reference to a texture config parameter 1 uniform.
227         *
228         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
229         * defines texture width, height, filters, and wrap modes.  It will be
230         * found as a parameter to the second QOP_TEX_[STRB] instruction in a
231         * sequence.
232         */
233        QUNIFORM_TEXTURE_CONFIG_P1,
234
235        /** A reference to a texture config parameter 2 cubemap stride uniform */
236        QUNIFORM_TEXTURE_CONFIG_P2,
237
238        QUNIFORM_UBO_ADDR,
239
240        QUNIFORM_TEXRECT_SCALE_X,
241        QUNIFORM_TEXRECT_SCALE_Y,
242
243        QUNIFORM_TEXTURE_BORDER_COLOR,
244
245        QUNIFORM_BLEND_CONST_COLOR,
246        QUNIFORM_STENCIL,
247
248        QUNIFORM_ALPHA_REF,
249};
250
251struct vc4_varying_semantic {
252        uint8_t semantic;
253        uint8_t index;
254        uint8_t swizzle;
255};
256
257struct vc4_compiler_ubo_range {
258        /**
259         * offset in bytes from the start of the ubo where this range is
260         * uploaded.
261         *
262         * Only set once used is set.
263         */
264        uint32_t dst_offset;
265
266        /**
267         * offset in bytes from the start of the gallium uniforms where the
268         * data comes from.
269         */
270        uint32_t src_offset;
271
272        /** size in bytes of this ubo range */
273        uint32_t size;
274
275        /**
276         * Set if this range is used by the shader for indirect uniforms
277         * access.
278         */
279        bool used;
280};
281
282struct vc4_compile {
283        struct vc4_context *vc4;
284        struct tgsi_parse_context parser;
285        struct qreg *temps;
286        /**
287         * Inputs to the shader, arranged by TGSI declaration order.
288         *
289         * Not all fragment shader QFILE_VARY reads are present in this array.
290         */
291        struct qreg *inputs;
292        struct qreg *outputs;
293        struct qreg *consts;
294        struct qreg addr[4]; /* TGSI ARL destination. */
295        uint32_t temps_array_size;
296        uint32_t inputs_array_size;
297        uint32_t outputs_array_size;
298        uint32_t uniforms_array_size;
299        uint32_t consts_array_size;
300        uint32_t num_consts;
301
302        struct vc4_compiler_ubo_range *ubo_ranges;
303        uint32_t ubo_ranges_array_size;
304        uint32_t num_ubo_ranges;
305        uint32_t next_ubo_dst_offset;
306
307        struct qreg line_x, point_x, point_y;
308        struct qreg discard;
309
310        uint8_t vattr_sizes[8];
311
312        /**
313         * Array of the TGSI semantics of all FS QFILE_VARY reads.
314         *
315         * This includes those that aren't part of the VPM varyings, like
316         * point/line coordinates.
317         */
318        struct vc4_varying_semantic *input_semantics;
319        uint32_t num_input_semantics;
320        uint32_t input_semantics_array_size;
321
322        /**
323         * An entry per outputs[] in the VS indicating what the semantic of
324         * the output is.  Used to emit from the VS in the order that the FS
325         * needs.
326         */
327        struct vc4_varying_semantic *output_semantics;
328
329        struct pipe_shader_state *shader_state;
330        struct vc4_key *key;
331        struct vc4_fs_key *fs_key;
332        struct vc4_vs_key *vs_key;
333
334        uint32_t *uniform_data;
335        enum quniform_contents *uniform_contents;
336        uint32_t uniform_array_size;
337        uint32_t num_uniforms;
338        uint32_t num_outputs;
339        uint32_t num_texture_samples;
340        uint32_t output_position_index;
341        uint32_t output_clipvertex_index;
342        uint32_t output_color_index;
343        uint32_t output_point_size_index;
344
345        struct qreg undef;
346        enum qstage stage;
347        uint32_t num_temps;
348        struct simple_node instructions;
349        uint32_t immediates[1024];
350
351        struct simple_node qpu_inst_list;
352        uint64_t *qpu_insts;
353        uint32_t qpu_inst_count;
354        uint32_t qpu_inst_size;
355        uint32_t num_inputs;
356
357        uint32_t program_id;
358        uint32_t variant_id;
359};
360
361struct vc4_compile *qir_compile_init(void);
362void qir_compile_destroy(struct vc4_compile *c);
363struct qinst *qir_inst(enum qop op, struct qreg dst,
364                       struct qreg src0, struct qreg src1);
365struct qinst *qir_inst4(enum qop op, struct qreg dst,
366                        struct qreg a,
367                        struct qreg b,
368                        struct qreg c,
369                        struct qreg d);
370void qir_remove_instruction(struct qinst *qinst);
371void qir_reorder_uniforms(struct vc4_compile *c);
372void qir_emit(struct vc4_compile *c, struct qinst *inst);
373struct qreg qir_get_temp(struct vc4_compile *c);
374int qir_get_op_nsrc(enum qop qop);
375bool qir_reg_equals(struct qreg a, struct qreg b);
376bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
377bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
378bool qir_is_multi_instruction(struct qinst *inst);
379bool qir_is_tex(struct qinst *inst);
380bool qir_depends_on_flags(struct qinst *inst);
381bool qir_writes_r4(struct qinst *inst);
382bool qir_reads_r4(struct qinst *inst);
383bool qir_src_needs_a_file(struct qinst *inst);
384struct qreg qir_follow_movs(struct qinst **defs, struct qreg reg);
385
386void qir_dump(struct vc4_compile *c);
387void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
388const char *qir_get_stage_name(enum qstage stage);
389
390void qir_optimize(struct vc4_compile *c);
391bool qir_opt_algebraic(struct vc4_compile *c);
392bool qir_opt_copy_propagation(struct vc4_compile *c);
393bool qir_opt_cse(struct vc4_compile *c);
394bool qir_opt_dead_code(struct vc4_compile *c);
395bool qir_opt_small_immediates(struct vc4_compile *c);
396bool qir_opt_vpm_writes(struct vc4_compile *c);
397void qir_lower_uniforms(struct vc4_compile *c);
398
399void qpu_schedule_instructions(struct vc4_compile *c);
400
401void qir_SF(struct vc4_compile *c, struct qreg src);
402
403#define QIR_ALU0(name)                                                   \
404static inline struct qreg                                                \
405qir_##name(struct vc4_compile *c)                                        \
406{                                                                        \
407        struct qreg t = qir_get_temp(c);                                 \
408        qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef));        \
409        return t;                                                        \
410}
411
412#define QIR_ALU1(name)                                                   \
413static inline struct qreg                                                \
414qir_##name(struct vc4_compile *c, struct qreg a)                         \
415{                                                                        \
416        struct qreg t = qir_get_temp(c);                                 \
417        qir_emit(c, qir_inst(QOP_##name, t, a, c->undef));               \
418        return t;                                                        \
419}
420
421#define QIR_ALU2(name)                                                   \
422static inline struct qreg                                                \
423qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)          \
424{                                                                        \
425        struct qreg t = qir_get_temp(c);                                 \
426        qir_emit(c, qir_inst(QOP_##name, t, a, b));                      \
427        return t;                                                        \
428}
429
430#define QIR_NODST_1(name)                                               \
431static inline void                                                      \
432qir_##name(struct vc4_compile *c, struct qreg a)                        \
433{                                                                       \
434        qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef));       \
435}
436
437#define QIR_NODST_2(name)                                               \
438static inline void                                                      \
439qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)         \
440{                                                                       \
441        qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
442}
443
444QIR_ALU1(MOV)
445QIR_ALU2(FADD)
446QIR_ALU2(FSUB)
447QIR_ALU2(FMUL)
448QIR_ALU2(MUL24)
449QIR_ALU1(SEL_X_0_ZS)
450QIR_ALU1(SEL_X_0_ZC)
451QIR_ALU1(SEL_X_0_NS)
452QIR_ALU1(SEL_X_0_NC)
453QIR_ALU2(SEL_X_Y_ZS)
454QIR_ALU2(SEL_X_Y_ZC)
455QIR_ALU2(SEL_X_Y_NS)
456QIR_ALU2(SEL_X_Y_NC)
457QIR_ALU2(FMIN)
458QIR_ALU2(FMAX)
459QIR_ALU2(FMINABS)
460QIR_ALU2(FMAXABS)
461QIR_ALU1(FTOI)
462QIR_ALU1(ITOF)
463
464QIR_ALU2(ADD)
465QIR_ALU2(SUB)
466QIR_ALU2(SHL)
467QIR_ALU2(SHR)
468QIR_ALU2(ASR)
469QIR_ALU2(MIN)
470QIR_ALU2(MAX)
471QIR_ALU2(AND)
472QIR_ALU2(OR)
473QIR_ALU2(XOR)
474QIR_ALU1(NOT)
475
476QIR_ALU1(RCP)
477QIR_ALU1(RSQ)
478QIR_ALU1(EXP2)
479QIR_ALU1(LOG2)
480QIR_ALU2(PACK_SCALED)
481QIR_ALU1(PACK_8888_F)
482QIR_ALU2(PACK_8A_F)
483QIR_ALU2(PACK_8B_F)
484QIR_ALU2(PACK_8C_F)
485QIR_ALU2(PACK_8D_F)
486QIR_ALU1(VARY_ADD_C)
487QIR_NODST_2(TEX_S)
488QIR_NODST_2(TEX_T)
489QIR_NODST_2(TEX_R)
490QIR_NODST_2(TEX_B)
491QIR_NODST_2(TEX_DIRECT)
492QIR_ALU0(FRAG_X)
493QIR_ALU0(FRAG_Y)
494QIR_ALU0(FRAG_Z)
495QIR_ALU0(FRAG_W)
496QIR_ALU0(FRAG_REV_FLAG)
497QIR_ALU0(TEX_RESULT)
498QIR_ALU0(TLB_COLOR_READ)
499QIR_NODST_1(TLB_Z_WRITE)
500QIR_NODST_1(TLB_DISCARD_SETUP)
501QIR_NODST_1(TLB_STENCIL_SETUP)
502
503static inline struct qreg
504qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
505{
506        struct qreg t = qir_get_temp(c);
507        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef));
508        return t;
509}
510
511static inline struct qreg
512qir_SEL_X_0_COND(struct vc4_compile *c, int i)
513{
514        struct qreg t = qir_get_temp(c);
515        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
516        return t;
517}
518
519static inline struct qreg
520qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
521{
522        struct qreg t = qir_get_temp(c);
523        qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef));
524        return t;
525}
526
527static inline struct qreg
528qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
529{
530        struct qreg t = qir_get_temp(c);
531        qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef));
532        return t;
533}
534
535static inline struct qreg
536qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
537{
538        struct qreg t = qir_get_temp(c);
539        qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef));
540        return t;
541}
542
543static inline struct qreg
544qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
545{
546        struct qreg t = qir_get_temp(c);
547        qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef));
548        return t;
549}
550
551static inline struct qreg
552qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
553{
554        struct qreg t = qir_get_temp(c);
555        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
556        return t;
557}
558
559static inline struct qreg
560qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
561{
562        return qir_EXP2(c, qir_FMUL(c,
563                                    y,
564                                    qir_LOG2(c, x)));
565}
566
567static inline void
568qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)
569{
570        static const struct qreg vpm = { QFILE_VPM, 0 };
571        qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef));
572}
573
574#endif /* VC4_QIR_H */
575