vc4_qir.h revision cc8fb2904673588d31b660dbfaf692615b5202dd
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_QIR_H
25#define VC4_QIR_H
26
27#include <assert.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <stdbool.h>
31#include <stdint.h>
32#include <string.h>
33
34#include "util/macros.h"
35#include "glsl/nir/nir.h"
36#include "util/list.h"
37#include "util/u_math.h"
38
39#include "vc4_screen.h"
40#include "pipe/p_state.h"
41
42enum qfile {
43        QFILE_NULL,
44        QFILE_TEMP,
45        QFILE_VARY,
46        QFILE_UNIF,
47        QFILE_VPM,
48
49        /**
50         * Stores an immediate value in the index field that can be turned
51         * into a small immediate field by qpu_encode_small_immediate().
52         */
53        QFILE_SMALL_IMM,
54};
55
56struct qreg {
57        enum qfile file;
58        uint32_t index;
59};
60
61enum qop {
62        QOP_UNDEF,
63        QOP_MOV,
64        QOP_FADD,
65        QOP_FSUB,
66        QOP_FMUL,
67        QOP_MUL24,
68        QOP_FMIN,
69        QOP_FMAX,
70        QOP_FMINABS,
71        QOP_FMAXABS,
72        QOP_ADD,
73        QOP_SUB,
74        QOP_SHL,
75        QOP_SHR,
76        QOP_ASR,
77        QOP_MIN,
78        QOP_MAX,
79        QOP_AND,
80        QOP_OR,
81        QOP_XOR,
82        QOP_NOT,
83
84        /* Note: Orderings of these compares must be the same as in
85         * qpu_defines.h.  Selects the src[0] if the ns flag bit is set,
86         * otherwise 0. */
87        QOP_SEL_X_0_ZS,
88        QOP_SEL_X_0_ZC,
89        QOP_SEL_X_0_NS,
90        QOP_SEL_X_0_NC,
91        /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
92        QOP_SEL_X_Y_ZS,
93        QOP_SEL_X_Y_ZC,
94        QOP_SEL_X_Y_NS,
95        QOP_SEL_X_Y_NC,
96
97        QOP_FTOI,
98        QOP_ITOF,
99        QOP_RCP,
100        QOP_RSQ,
101        QOP_EXP2,
102        QOP_LOG2,
103        QOP_VW_SETUP,
104        QOP_VR_SETUP,
105        QOP_PACK_SCALED,
106        QOP_PACK_8888_F,
107        QOP_PACK_8A_F,
108        QOP_PACK_8B_F,
109        QOP_PACK_8C_F,
110        QOP_PACK_8D_F,
111        QOP_TLB_DISCARD_SETUP,
112        QOP_TLB_STENCIL_SETUP,
113        QOP_TLB_Z_WRITE,
114        QOP_TLB_COLOR_WRITE,
115        QOP_TLB_COLOR_READ,
116        QOP_VARY_ADD_C,
117
118        QOP_FRAG_X,
119        QOP_FRAG_Y,
120        QOP_FRAG_Z,
121        QOP_FRAG_W,
122        QOP_FRAG_REV_FLAG,
123
124        QOP_UNPACK_8A_F,
125        QOP_UNPACK_8B_F,
126        QOP_UNPACK_8C_F,
127        QOP_UNPACK_8D_F,
128        QOP_UNPACK_16A_F,
129        QOP_UNPACK_16B_F,
130
131        QOP_UNPACK_8A_I,
132        QOP_UNPACK_8B_I,
133        QOP_UNPACK_8C_I,
134        QOP_UNPACK_8D_I,
135        QOP_UNPACK_16A_I,
136        QOP_UNPACK_16B_I,
137
138        /** Texture x coordinate parameter write */
139        QOP_TEX_S,
140        /** Texture y coordinate parameter write */
141        QOP_TEX_T,
142        /** Texture border color parameter or cube map z coordinate write */
143        QOP_TEX_R,
144        /** Texture LOD bias parameter write */
145        QOP_TEX_B,
146
147        /**
148         * Texture-unit 4-byte read with address provided direct in S
149         * cooordinate.
150         *
151         * The first operand is the offset from the start of the UBO, and the
152         * second is the uniform that has the UBO's base pointer.
153         */
154        QOP_TEX_DIRECT,
155
156        /**
157         * Signal of texture read being necessary and then reading r4 into
158         * the destination
159         */
160        QOP_TEX_RESULT,
161};
162
163struct queued_qpu_inst {
164        struct list_head link;
165        uint64_t inst;
166};
167
168struct qinst {
169        struct list_head link;
170
171        enum qop op;
172        struct qreg dst;
173        struct qreg *src;
174        bool sf;
175};
176
177enum qstage {
178        /**
179         * Coordinate shader, runs during binning, before the VS, and just
180         * outputs position.
181         */
182        QSTAGE_COORD,
183        QSTAGE_VERT,
184        QSTAGE_FRAG,
185};
186
187enum quniform_contents {
188        /**
189         * Indicates that a constant 32-bit value is copied from the program's
190         * uniform contents.
191         */
192        QUNIFORM_CONSTANT,
193        /**
194         * Indicates that the program's uniform contents are used as an index
195         * into the GL uniform storage.
196         */
197        QUNIFORM_UNIFORM,
198
199        /** @{
200         * Scaling factors from clip coordinates to relative to the viewport
201         * center.
202         *
203         * This is used by the coordinate and vertex shaders to produce the
204         * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
205         * point offsets from the viewport ccenter.
206         */
207        QUNIFORM_VIEWPORT_X_SCALE,
208        QUNIFORM_VIEWPORT_Y_SCALE,
209        /** @} */
210
211        QUNIFORM_VIEWPORT_Z_OFFSET,
212        QUNIFORM_VIEWPORT_Z_SCALE,
213
214        QUNIFORM_USER_CLIP_PLANE,
215
216        /**
217         * A reference to a texture config parameter 0 uniform.
218         *
219         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
220         * defines texture type, miplevels, and such.  It will be found as a
221         * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
222         */
223        QUNIFORM_TEXTURE_CONFIG_P0,
224
225        /**
226         * A reference to a texture config parameter 1 uniform.
227         *
228         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
229         * defines texture width, height, filters, and wrap modes.  It will be
230         * found as a parameter to the second QOP_TEX_[STRB] instruction in a
231         * sequence.
232         */
233        QUNIFORM_TEXTURE_CONFIG_P1,
234
235        /** A reference to a texture config parameter 2 cubemap stride uniform */
236        QUNIFORM_TEXTURE_CONFIG_P2,
237
238        QUNIFORM_UBO_ADDR,
239
240        QUNIFORM_TEXRECT_SCALE_X,
241        QUNIFORM_TEXRECT_SCALE_Y,
242
243        QUNIFORM_TEXTURE_BORDER_COLOR,
244
245        QUNIFORM_BLEND_CONST_COLOR,
246        QUNIFORM_STENCIL,
247
248        QUNIFORM_ALPHA_REF,
249};
250
251struct vc4_varying_semantic {
252        uint8_t semantic;
253        uint8_t index;
254        uint8_t swizzle;
255};
256
257struct vc4_compiler_ubo_range {
258        /**
259         * offset in bytes from the start of the ubo where this range is
260         * uploaded.
261         *
262         * Only set once used is set.
263         */
264        uint32_t dst_offset;
265
266        /**
267         * offset in bytes from the start of the gallium uniforms where the
268         * data comes from.
269         */
270        uint32_t src_offset;
271
272        /** size in bytes of this ubo range */
273        uint32_t size;
274
275        /**
276         * Set if this range is used by the shader for indirect uniforms
277         * access.
278         */
279        bool used;
280};
281
282struct vc4_key {
283        struct vc4_uncompiled_shader *shader_state;
284        struct {
285                enum pipe_format format;
286                unsigned compare_mode:1;
287                unsigned compare_func:3;
288                unsigned wrap_s:3;
289                unsigned wrap_t:3;
290                uint8_t swizzle[4];
291        } tex[VC4_MAX_TEXTURE_SAMPLERS];
292        uint8_t ucp_enables;
293};
294
295struct vc4_fs_key {
296        struct vc4_key base;
297        enum pipe_format color_format;
298        bool depth_enabled;
299        bool stencil_enabled;
300        bool stencil_twoside;
301        bool stencil_full_writemasks;
302        bool is_points;
303        bool is_lines;
304        bool alpha_test;
305        bool point_coord_upper_left;
306        bool light_twoside;
307        uint8_t alpha_test_func;
308        uint8_t logicop_func;
309        uint32_t point_sprite_mask;
310
311        struct pipe_rt_blend_state blend;
312};
313
314struct vc4_vs_key {
315        struct vc4_key base;
316
317        /**
318         * This is a proxy for the array of FS input semantics, which is
319         * larger than we would want to put in the key.
320         */
321        uint64_t compiled_fs_id;
322
323        enum pipe_format attr_formats[8];
324        bool is_coord;
325        bool per_vertex_point_size;
326};
327
328struct vc4_compile {
329        struct vc4_context *vc4;
330        nir_shader *s;
331        nir_function_impl *impl;
332        struct exec_list *cf_node_list;
333
334        /**
335         * Mapping from nir_register * or nir_ssa_def * to array of struct
336         * qreg for the values.
337         */
338        struct hash_table *def_ht;
339
340        /* For each temp, the instruction generating its value. */
341        struct qinst **defs;
342        uint32_t defs_array_size;
343
344        /**
345         * Inputs to the shader, arranged by TGSI declaration order.
346         *
347         * Not all fragment shader QFILE_VARY reads are present in this array.
348         */
349        struct qreg *inputs;
350        struct qreg *outputs;
351        uint32_t inputs_array_size;
352        uint32_t outputs_array_size;
353        uint32_t uniforms_array_size;
354
355        struct vc4_compiler_ubo_range *ubo_ranges;
356        uint32_t ubo_ranges_array_size;
357        /** Number of uniform areas declared in ubo_ranges. */
358        uint32_t num_uniform_ranges;
359        /** Number of uniform areas used for indirect addressed loads. */
360        uint32_t num_ubo_ranges;
361        uint32_t next_ubo_dst_offset;
362
363        struct qreg line_x, point_x, point_y;
364        struct qreg discard;
365
366        uint8_t vattr_sizes[8];
367
368        /**
369         * Array of the TGSI semantics of all FS QFILE_VARY reads.
370         *
371         * This includes those that aren't part of the VPM varyings, like
372         * point/line coordinates.
373         */
374        struct vc4_varying_semantic *input_semantics;
375        uint32_t num_input_semantics;
376        uint32_t input_semantics_array_size;
377
378        /**
379         * An entry per outputs[] in the VS indicating what the semantic of
380         * the output is.  Used to emit from the VS in the order that the FS
381         * needs.
382         */
383        struct vc4_varying_semantic *output_semantics;
384
385        struct pipe_shader_state *shader_state;
386        struct vc4_key *key;
387        struct vc4_fs_key *fs_key;
388        struct vc4_vs_key *vs_key;
389
390        uint32_t *uniform_data;
391        enum quniform_contents *uniform_contents;
392        uint32_t uniform_array_size;
393        uint32_t num_uniforms;
394        uint32_t num_outputs;
395        uint32_t num_texture_samples;
396        uint32_t output_position_index;
397        uint32_t output_clipvertex_index;
398        uint32_t output_color_index;
399        uint32_t output_point_size_index;
400
401        struct qreg undef;
402        enum qstage stage;
403        uint32_t num_temps;
404        struct list_head instructions;
405        uint32_t immediates[1024];
406
407        struct list_head qpu_inst_list;
408        uint64_t *qpu_insts;
409        uint32_t qpu_inst_count;
410        uint32_t qpu_inst_size;
411        uint32_t num_inputs;
412
413        uint32_t program_id;
414        uint32_t variant_id;
415};
416
417struct vc4_compile *qir_compile_init(void);
418void qir_compile_destroy(struct vc4_compile *c);
419struct qinst *qir_inst(enum qop op, struct qreg dst,
420                       struct qreg src0, struct qreg src1);
421struct qinst *qir_inst4(enum qop op, struct qreg dst,
422                        struct qreg a,
423                        struct qreg b,
424                        struct qreg c,
425                        struct qreg d);
426void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);
427struct qreg qir_uniform(struct vc4_compile *c,
428                        enum quniform_contents contents,
429                        uint32_t data);
430void qir_reorder_uniforms(struct vc4_compile *c);
431void qir_emit(struct vc4_compile *c, struct qinst *inst);
432struct qreg qir_get_temp(struct vc4_compile *c);
433int qir_get_op_nsrc(enum qop qop);
434bool qir_reg_equals(struct qreg a, struct qreg b);
435bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
436bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
437bool qir_is_multi_instruction(struct qinst *inst);
438bool qir_is_tex(struct qinst *inst);
439bool qir_depends_on_flags(struct qinst *inst);
440bool qir_writes_r4(struct qinst *inst);
441bool qir_src_needs_a_file(struct qinst *inst);
442struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
443
444void qir_dump(struct vc4_compile *c);
445void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
446const char *qir_get_stage_name(enum qstage stage);
447
448void qir_optimize(struct vc4_compile *c);
449bool qir_opt_algebraic(struct vc4_compile *c);
450bool qir_opt_constant_folding(struct vc4_compile *c);
451bool qir_opt_copy_propagation(struct vc4_compile *c);
452bool qir_opt_cse(struct vc4_compile *c);
453bool qir_opt_dead_code(struct vc4_compile *c);
454bool qir_opt_small_immediates(struct vc4_compile *c);
455bool qir_opt_vpm_writes(struct vc4_compile *c);
456void vc4_nir_lower_io(struct vc4_compile *c);
457void qir_lower_uniforms(struct vc4_compile *c);
458
459void qpu_schedule_instructions(struct vc4_compile *c);
460
461void qir_SF(struct vc4_compile *c, struct qreg src);
462
463static inline struct qreg
464qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
465{
466        return qir_uniform(c, QUNIFORM_CONSTANT, ui);
467}
468
469static inline struct qreg
470qir_uniform_f(struct vc4_compile *c, float f)
471{
472        return qir_uniform(c, QUNIFORM_CONSTANT, fui(f));
473}
474
475#define QIR_ALU0(name)                                                   \
476static inline struct qreg                                                \
477qir_##name(struct vc4_compile *c)                                        \
478{                                                                        \
479        struct qreg t = qir_get_temp(c);                                 \
480        qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef));        \
481        return t;                                                        \
482}
483
484#define QIR_ALU1(name)                                                   \
485static inline struct qreg                                                \
486qir_##name(struct vc4_compile *c, struct qreg a)                         \
487{                                                                        \
488        struct qreg t = qir_get_temp(c);                                 \
489        qir_emit(c, qir_inst(QOP_##name, t, a, c->undef));               \
490        return t;                                                        \
491}
492
493#define QIR_ALU2(name)                                                   \
494static inline struct qreg                                                \
495qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)          \
496{                                                                        \
497        struct qreg t = qir_get_temp(c);                                 \
498        qir_emit(c, qir_inst(QOP_##name, t, a, b));                      \
499        return t;                                                        \
500}
501
502#define QIR_NODST_1(name)                                               \
503static inline void                                                      \
504qir_##name(struct vc4_compile *c, struct qreg a)                        \
505{                                                                       \
506        qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef));       \
507}
508
509#define QIR_NODST_2(name)                                               \
510static inline void                                                      \
511qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)         \
512{                                                                       \
513        qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
514}
515
516QIR_ALU1(MOV)
517QIR_ALU2(FADD)
518QIR_ALU2(FSUB)
519QIR_ALU2(FMUL)
520QIR_ALU2(MUL24)
521QIR_ALU1(SEL_X_0_ZS)
522QIR_ALU1(SEL_X_0_ZC)
523QIR_ALU1(SEL_X_0_NS)
524QIR_ALU1(SEL_X_0_NC)
525QIR_ALU2(SEL_X_Y_ZS)
526QIR_ALU2(SEL_X_Y_ZC)
527QIR_ALU2(SEL_X_Y_NS)
528QIR_ALU2(SEL_X_Y_NC)
529QIR_ALU2(FMIN)
530QIR_ALU2(FMAX)
531QIR_ALU2(FMINABS)
532QIR_ALU2(FMAXABS)
533QIR_ALU1(FTOI)
534QIR_ALU1(ITOF)
535
536QIR_ALU2(ADD)
537QIR_ALU2(SUB)
538QIR_ALU2(SHL)
539QIR_ALU2(SHR)
540QIR_ALU2(ASR)
541QIR_ALU2(MIN)
542QIR_ALU2(MAX)
543QIR_ALU2(AND)
544QIR_ALU2(OR)
545QIR_ALU2(XOR)
546QIR_ALU1(NOT)
547
548QIR_ALU1(RCP)
549QIR_ALU1(RSQ)
550QIR_ALU1(EXP2)
551QIR_ALU1(LOG2)
552QIR_ALU2(PACK_SCALED)
553QIR_ALU1(PACK_8888_F)
554QIR_ALU2(PACK_8A_F)
555QIR_ALU2(PACK_8B_F)
556QIR_ALU2(PACK_8C_F)
557QIR_ALU2(PACK_8D_F)
558QIR_ALU1(VARY_ADD_C)
559QIR_NODST_2(TEX_S)
560QIR_NODST_2(TEX_T)
561QIR_NODST_2(TEX_R)
562QIR_NODST_2(TEX_B)
563QIR_NODST_2(TEX_DIRECT)
564QIR_ALU0(FRAG_X)
565QIR_ALU0(FRAG_Y)
566QIR_ALU0(FRAG_Z)
567QIR_ALU0(FRAG_W)
568QIR_ALU0(FRAG_REV_FLAG)
569QIR_ALU0(TEX_RESULT)
570QIR_ALU0(TLB_COLOR_READ)
571QIR_NODST_1(TLB_COLOR_WRITE)
572QIR_NODST_1(TLB_Z_WRITE)
573QIR_NODST_1(TLB_DISCARD_SETUP)
574QIR_NODST_1(TLB_STENCIL_SETUP)
575
576static inline struct qreg
577qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
578{
579        struct qreg t = qir_get_temp(c);
580        qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef));
581        return t;
582}
583
584static inline struct qreg
585qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
586{
587        struct qreg t = qir_get_temp(c);
588        qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef));
589        return t;
590}
591
592static inline struct qreg
593qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
594{
595        struct qreg t = qir_get_temp(c);
596        qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef));
597        return t;
598}
599
600static inline struct qreg
601qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
602{
603        struct qreg t = qir_get_temp(c);
604        qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef));
605        return t;
606}
607
608static inline struct qreg
609qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
610{
611        struct qreg t = qir_get_temp(c);
612        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
613        return t;
614}
615
616static inline struct qreg
617qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
618{
619        return qir_EXP2(c, qir_FMUL(c,
620                                    y,
621                                    qir_LOG2(c, x)));
622}
623
624static inline void
625qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)
626{
627        static const struct qreg vpm = { QFILE_VPM, 0 };
628        qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef));
629}
630
631#endif /* VC4_QIR_H */
632