vc4_qir.h revision 3fe4d8e1e39b47c9c5c4bfdd87300abd0c336a7e
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_QIR_H
25#define VC4_QIR_H
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <stdbool.h>
30#include <stdint.h>
31#include <string.h>
32
33#include "util/u_simple_list.h"
34#include "tgsi/tgsi_parse.h"
35
36enum qfile {
37        QFILE_NULL,
38        QFILE_TEMP,
39        QFILE_VARY,
40        QFILE_UNIF,
41};
42
43struct qreg {
44        enum qfile file;
45        uint32_t index;
46};
47
48enum qop {
49        QOP_UNDEF,
50        QOP_MOV,
51        QOP_FADD,
52        QOP_FSUB,
53        QOP_FMUL,
54        QOP_MUL24,
55        QOP_FMIN,
56        QOP_FMAX,
57        QOP_FMINABS,
58        QOP_FMAXABS,
59        QOP_ADD,
60        QOP_SUB,
61        QOP_SHL,
62        QOP_SHR,
63        QOP_ASR,
64        QOP_MIN,
65        QOP_MAX,
66        QOP_AND,
67        QOP_OR,
68        QOP_XOR,
69        QOP_NOT,
70
71        /* Sets the flag register according to src. */
72        QOP_SF,
73
74        /* Note: Orderings of these compares must be the same as in
75         * qpu_defines.h.  Selects the src[0] if the ns flag bit is set,
76         * otherwise 0. */
77        QOP_SEL_X_0_ZS,
78        QOP_SEL_X_0_ZC,
79        QOP_SEL_X_0_NS,
80        QOP_SEL_X_0_NC,
81        /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
82        QOP_SEL_X_Y_ZS,
83        QOP_SEL_X_Y_ZC,
84        QOP_SEL_X_Y_NS,
85        QOP_SEL_X_Y_NC,
86
87        QOP_FTOI,
88        QOP_ITOF,
89        QOP_RCP,
90        QOP_RSQ,
91        QOP_EXP2,
92        QOP_LOG2,
93        QOP_VW_SETUP,
94        QOP_VR_SETUP,
95        QOP_PACK_SCALED,
96        QOP_PACK_COLORS,
97        QOP_VPM_WRITE,
98        QOP_VPM_READ,
99        QOP_TLB_DISCARD_SETUP,
100        QOP_TLB_STENCIL_SETUP,
101        QOP_TLB_Z_WRITE,
102        QOP_TLB_COLOR_WRITE,
103        QOP_TLB_COLOR_READ,
104        QOP_VARY_ADD_C,
105
106        QOP_FRAG_X,
107        QOP_FRAG_Y,
108        QOP_FRAG_Z,
109        QOP_FRAG_W,
110        QOP_FRAG_REV_FLAG,
111
112        QOP_UNPACK_8A,
113        QOP_UNPACK_8B,
114        QOP_UNPACK_8C,
115        QOP_UNPACK_8D,
116
117        /** Texture x coordinate parameter write */
118        QOP_TEX_S,
119        /** Texture y coordinate parameter write */
120        QOP_TEX_T,
121        /** Texture border color parameter or cube map z coordinate write */
122        QOP_TEX_R,
123        /** Texture LOD bias parameter write */
124        QOP_TEX_B,
125
126        /**
127         * Texture-unit 4-byte read with address provided direct in S
128         * cooordinate.
129         *
130         * The first operand is the offset from the start of the UBO, and the
131         * second is the uniform that has the UBO's base pointer.
132         */
133        QOP_TEX_DIRECT,
134
135        /**
136         * Signal of texture read being necessary and then reading r4 into
137         * the destination
138         */
139        QOP_TEX_RESULT,
140        QOP_R4_UNPACK_A,
141        QOP_R4_UNPACK_B,
142        QOP_R4_UNPACK_C,
143        QOP_R4_UNPACK_D
144};
145
146struct simple_node {
147        struct simple_node *next;
148        struct simple_node *prev;
149};
150
151struct queued_qpu_inst {
152        struct simple_node link;
153        uint64_t inst;
154};
155
156struct qinst {
157        struct simple_node link;
158
159        enum qop op;
160        struct qreg dst;
161        struct qreg *src;
162};
163
164enum qstage {
165        /**
166         * Coordinate shader, runs during binning, before the VS, and just
167         * outputs position.
168         */
169        QSTAGE_COORD,
170        QSTAGE_VERT,
171        QSTAGE_FRAG,
172};
173
174enum quniform_contents {
175        /**
176         * Indicates that a constant 32-bit value is copied from the program's
177         * uniform contents.
178         */
179        QUNIFORM_CONSTANT,
180        /**
181         * Indicates that the program's uniform contents are used as an index
182         * into the GL uniform storage.
183         */
184        QUNIFORM_UNIFORM,
185
186        /** @{
187         * Scaling factors from clip coordinates to relative to the viewport
188         * center.
189         *
190         * This is used by the coordinate and vertex shaders to produce the
191         * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
192         * point offsets from the viewport ccenter.
193         */
194        QUNIFORM_VIEWPORT_X_SCALE,
195        QUNIFORM_VIEWPORT_Y_SCALE,
196        /** @} */
197
198        QUNIFORM_VIEWPORT_Z_OFFSET,
199        QUNIFORM_VIEWPORT_Z_SCALE,
200
201        QUNIFORM_USER_CLIP_PLANE,
202
203        /**
204         * A reference to a texture config parameter 0 uniform.
205         *
206         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
207         * defines texture type, miplevels, and such.  It will be found as a
208         * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
209         */
210        QUNIFORM_TEXTURE_CONFIG_P0,
211
212        /**
213         * A reference to a texture config parameter 1 uniform.
214         *
215         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
216         * defines texture width, height, filters, and wrap modes.  It will be
217         * found as a parameter to the second QOP_TEX_[STRB] instruction in a
218         * sequence.
219         */
220        QUNIFORM_TEXTURE_CONFIG_P1,
221
222        /** A reference to a texture config parameter 2 cubemap stride uniform */
223        QUNIFORM_TEXTURE_CONFIG_P2,
224
225        QUNIFORM_UBO_ADDR,
226
227        QUNIFORM_TEXRECT_SCALE_X,
228        QUNIFORM_TEXRECT_SCALE_Y,
229
230        QUNIFORM_TEXTURE_BORDER_COLOR,
231
232        QUNIFORM_BLEND_CONST_COLOR,
233        QUNIFORM_STENCIL,
234
235        QUNIFORM_ALPHA_REF,
236};
237
238struct vc4_varying_semantic {
239        uint8_t semantic;
240        uint8_t index;
241        uint8_t swizzle;
242};
243
244struct vc4_compiler_ubo_range {
245        /**
246         * offset in bytes from the start of the ubo where this range is
247         * uploaded.
248         *
249         * Only set once used is set.
250         */
251        uint32_t dst_offset;
252
253        /**
254         * offset in bytes from the start of the gallium uniforms where the
255         * data comes from.
256         */
257        uint32_t src_offset;
258
259        /** size in bytes of this ubo range */
260        uint32_t size;
261
262        /**
263         * Set if this range is used by the shader for indirect uniforms
264         * access.
265         */
266        bool used;
267};
268
269struct vc4_compile {
270        struct vc4_context *vc4;
271        struct tgsi_parse_context parser;
272        struct qreg *temps;
273        /**
274         * Inputs to the shader, arranged by TGSI declaration order.
275         *
276         * Not all fragment shader QFILE_VARY reads are present in this array.
277         */
278        struct qreg *inputs;
279        struct qreg *outputs;
280        struct qreg *consts;
281        struct qreg addr[4]; /* TGSI ARL destination. */
282        uint32_t temps_array_size;
283        uint32_t inputs_array_size;
284        uint32_t outputs_array_size;
285        uint32_t uniforms_array_size;
286        uint32_t consts_array_size;
287        uint32_t num_consts;
288
289        struct vc4_compiler_ubo_range *ubo_ranges;
290        uint32_t ubo_ranges_array_size;
291        uint32_t num_ubo_ranges;
292        uint32_t next_ubo_dst_offset;
293
294        struct qreg line_x, point_x, point_y;
295        struct qreg discard;
296
297        /**
298         * Array of the TGSI semantics of all FS QFILE_VARY reads.
299         *
300         * This includes those that aren't part of the VPM varyings, like
301         * point/line coordinates.
302         */
303        struct vc4_varying_semantic *input_semantics;
304        uint32_t num_input_semantics;
305        uint32_t input_semantics_array_size;
306
307        /**
308         * An entry per outputs[] in the VS indicating what the semantic of
309         * the output is.  Used to emit from the VS in the order that the FS
310         * needs.
311         */
312        struct vc4_varying_semantic *output_semantics;
313
314        struct pipe_shader_state *shader_state;
315        struct vc4_key *key;
316        struct vc4_fs_key *fs_key;
317        struct vc4_vs_key *vs_key;
318
319        uint32_t *uniform_data;
320        enum quniform_contents *uniform_contents;
321        uint32_t uniform_array_size;
322        uint32_t num_uniforms;
323        uint32_t num_outputs;
324        uint32_t num_texture_samples;
325        uint32_t output_position_index;
326        uint32_t output_clipvertex_index;
327        uint32_t output_color_index;
328        uint32_t output_point_size_index;
329
330        struct qreg undef;
331        enum qstage stage;
332        uint32_t num_temps;
333        struct simple_node instructions;
334        uint32_t immediates[1024];
335
336        struct simple_node qpu_inst_list;
337        uint64_t *qpu_insts;
338        uint32_t qpu_inst_count;
339        uint32_t qpu_inst_size;
340        uint32_t num_inputs;
341
342        uint32_t program_id;
343        uint32_t variant_id;
344};
345
346struct vc4_compile *qir_compile_init(void);
347void qir_compile_destroy(struct vc4_compile *c);
348struct qinst *qir_inst(enum qop op, struct qreg dst,
349                       struct qreg src0, struct qreg src1);
350struct qinst *qir_inst4(enum qop op, struct qreg dst,
351                        struct qreg a,
352                        struct qreg b,
353                        struct qreg c,
354                        struct qreg d);
355void qir_remove_instruction(struct qinst *qinst);
356void qir_reorder_uniforms(struct vc4_compile *c);
357void qir_emit(struct vc4_compile *c, struct qinst *inst);
358struct qreg qir_get_temp(struct vc4_compile *c);
359int qir_get_op_nsrc(enum qop qop);
360bool qir_reg_equals(struct qreg a, struct qreg b);
361bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
362bool qir_depends_on_flags(struct qinst *inst);
363bool qir_writes_r4(struct qinst *inst);
364bool qir_reads_r4(struct qinst *inst);
365
366void qir_dump(struct vc4_compile *c);
367void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
368const char *qir_get_stage_name(enum qstage stage);
369
370void qir_optimize(struct vc4_compile *c);
371bool qir_opt_algebraic(struct vc4_compile *c);
372bool qir_opt_copy_propagation(struct vc4_compile *c);
373bool qir_opt_cse(struct vc4_compile *c);
374bool qir_opt_dead_code(struct vc4_compile *c);
375
376void qpu_schedule_instructions(struct vc4_compile *c);
377
378#define QIR_ALU0(name)                                                   \
379static inline struct qreg                                                \
380qir_##name(struct vc4_compile *c)                                        \
381{                                                                        \
382        struct qreg t = qir_get_temp(c);                                 \
383        qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef));        \
384        return t;                                                        \
385}
386
387#define QIR_ALU1(name)                                                   \
388static inline struct qreg                                                \
389qir_##name(struct vc4_compile *c, struct qreg a)                         \
390{                                                                        \
391        struct qreg t = qir_get_temp(c);                                 \
392        qir_emit(c, qir_inst(QOP_##name, t, a, c->undef));               \
393        return t;                                                        \
394}
395
396#define QIR_ALU2(name)                                                   \
397static inline struct qreg                                                \
398qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)          \
399{                                                                        \
400        struct qreg t = qir_get_temp(c);                                 \
401        qir_emit(c, qir_inst(QOP_##name, t, a, b));                      \
402        return t;                                                        \
403}
404
405#define QIR_NODST_1(name)                                               \
406static inline void                                                      \
407qir_##name(struct vc4_compile *c, struct qreg a)                        \
408{                                                                       \
409        qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef));       \
410}
411
412#define QIR_NODST_2(name)                                               \
413static inline void                                                      \
414qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)         \
415{                                                                       \
416        qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
417}
418
419QIR_ALU1(MOV)
420QIR_ALU2(FADD)
421QIR_ALU2(FSUB)
422QIR_ALU2(FMUL)
423QIR_ALU2(MUL24)
424QIR_NODST_1(SF)
425QIR_ALU1(SEL_X_0_ZS)
426QIR_ALU1(SEL_X_0_ZC)
427QIR_ALU1(SEL_X_0_NS)
428QIR_ALU1(SEL_X_0_NC)
429QIR_ALU2(SEL_X_Y_ZS)
430QIR_ALU2(SEL_X_Y_ZC)
431QIR_ALU2(SEL_X_Y_NS)
432QIR_ALU2(SEL_X_Y_NC)
433QIR_ALU2(FMIN)
434QIR_ALU2(FMAX)
435QIR_ALU2(FMINABS)
436QIR_ALU2(FMAXABS)
437QIR_ALU1(FTOI)
438QIR_ALU1(ITOF)
439
440QIR_ALU2(ADD)
441QIR_ALU2(SUB)
442QIR_ALU2(SHL)
443QIR_ALU2(SHR)
444QIR_ALU2(ASR)
445QIR_ALU2(MIN)
446QIR_ALU2(MAX)
447QIR_ALU2(AND)
448QIR_ALU2(OR)
449QIR_ALU2(XOR)
450QIR_ALU1(NOT)
451
452QIR_ALU1(RCP)
453QIR_ALU1(RSQ)
454QIR_ALU1(EXP2)
455QIR_ALU1(LOG2)
456QIR_ALU2(PACK_SCALED)
457QIR_ALU1(VARY_ADD_C)
458QIR_NODST_1(VPM_WRITE)
459QIR_NODST_2(TEX_S)
460QIR_NODST_2(TEX_T)
461QIR_NODST_2(TEX_R)
462QIR_NODST_2(TEX_B)
463QIR_NODST_2(TEX_DIRECT)
464QIR_ALU0(FRAG_X)
465QIR_ALU0(FRAG_Y)
466QIR_ALU0(FRAG_Z)
467QIR_ALU0(FRAG_W)
468QIR_ALU0(FRAG_REV_FLAG)
469QIR_ALU0(TEX_RESULT)
470QIR_ALU0(TLB_COLOR_READ)
471QIR_NODST_1(TLB_Z_WRITE)
472QIR_NODST_1(TLB_DISCARD_SETUP)
473QIR_NODST_1(TLB_STENCIL_SETUP)
474
475static inline struct qreg
476qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
477{
478        struct qreg t = qir_get_temp(c);
479        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef));
480        return t;
481}
482
483static inline struct qreg
484qir_SEL_X_0_COND(struct vc4_compile *c, int i)
485{
486        struct qreg t = qir_get_temp(c);
487        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
488        return t;
489}
490
491static inline struct qreg
492qir_UNPACK_8(struct vc4_compile *c, struct qreg src, int i)
493{
494        struct qreg t = qir_get_temp(c);
495        qir_emit(c, qir_inst(QOP_UNPACK_8A + i, t, src, c->undef));
496        return t;
497}
498
499static inline struct qreg
500qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
501{
502        return qir_EXP2(c, qir_FMUL(c,
503                                    y,
504                                    qir_LOG2(c, x)));
505}
506
507#endif /* VC4_QIR_H */
508