vc4_qir.h revision 8ce65261789f085e657e6a487db93d38ee6bea63
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_QIR_H
25#define VC4_QIR_H
26
27#include <assert.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <stdbool.h>
31#include <stdint.h>
32#include <string.h>
33
34#include "util/macros.h"
35#include "compiler/nir/nir.h"
36#include "util/list.h"
37#include "util/u_math.h"
38
39#include "vc4_screen.h"
40#include "vc4_qpu_defines.h"
41#include "vc4_qpu.h"
42#include "kernel/vc4_packet.h"
43#include "pipe/p_state.h"
44
45struct nir_builder;
46
47enum qfile {
48        QFILE_NULL,
49        QFILE_TEMP,
50        QFILE_VARY,
51        QFILE_UNIF,
52        QFILE_VPM,
53        QFILE_TLB_COLOR_WRITE,
54        QFILE_TLB_COLOR_WRITE_MS,
55        QFILE_TLB_Z_WRITE,
56        QFILE_TLB_STENCIL_SETUP,
57
58        /* Payload registers that aren't in the physical register file, so we
59         * can just use the corresponding qpu_reg at qpu_emit time.
60         */
61        QFILE_FRAG_X,
62        QFILE_FRAG_Y,
63        QFILE_FRAG_REV_FLAG,
64        QFILE_QPU_ELEMENT,
65
66        /**
67         * Stores an immediate value in the index field that will be used
68         * directly by qpu_load_imm().
69         */
70        QFILE_LOAD_IMM,
71
72        /**
73         * Stores an immediate value in the index field that can be turned
74         * into a small immediate field by qpu_encode_small_immediate().
75         */
76        QFILE_SMALL_IMM,
77};
78
79struct qreg {
80        enum qfile file;
81        uint32_t index;
82        int pack;
83};
84
85static inline struct qreg qir_reg(enum qfile file, uint32_t index)
86{
87        return (struct qreg){file, index};
88}
89
90enum qop {
91        QOP_UNDEF,
92        QOP_MOV,
93        QOP_FMOV,
94        QOP_MMOV,
95        QOP_FADD,
96        QOP_FSUB,
97        QOP_FMUL,
98        QOP_V8MULD,
99        QOP_V8MIN,
100        QOP_V8MAX,
101        QOP_V8ADDS,
102        QOP_V8SUBS,
103        QOP_MUL24,
104        QOP_FMIN,
105        QOP_FMAX,
106        QOP_FMINABS,
107        QOP_FMAXABS,
108        QOP_ADD,
109        QOP_SUB,
110        QOP_SHL,
111        QOP_SHR,
112        QOP_ASR,
113        QOP_MIN,
114        QOP_MAX,
115        QOP_AND,
116        QOP_OR,
117        QOP_XOR,
118        QOP_NOT,
119
120        QOP_FTOI,
121        QOP_ITOF,
122        QOP_RCP,
123        QOP_RSQ,
124        QOP_EXP2,
125        QOP_LOG2,
126        QOP_VW_SETUP,
127        QOP_VR_SETUP,
128        QOP_TLB_COLOR_READ,
129        QOP_MS_MASK,
130        QOP_VARY_ADD_C,
131
132        QOP_FRAG_Z,
133        QOP_FRAG_W,
134
135        /** Texture x coordinate parameter write */
136        QOP_TEX_S,
137        /** Texture y coordinate parameter write */
138        QOP_TEX_T,
139        /** Texture border color parameter or cube map z coordinate write */
140        QOP_TEX_R,
141        /** Texture LOD bias parameter write */
142        QOP_TEX_B,
143
144        /**
145         * Texture-unit 4-byte read with address provided direct in S
146         * cooordinate.
147         *
148         * The first operand is the offset from the start of the UBO, and the
149         * second is the uniform that has the UBO's base pointer.
150         */
151        QOP_TEX_DIRECT,
152
153        /**
154         * Signal of texture read being necessary and then reading r4 into
155         * the destination
156         */
157        QOP_TEX_RESULT,
158
159        /* 32-bit immediate loaded to each SIMD channel */
160        QOP_LOAD_IMM,
161
162        /* 32-bit immediate divided into 16 2-bit unsigned int values and
163         * loaded to each corresponding SIMD channel.
164         */
165        QOP_LOAD_IMM_U2,
166        /* 32-bit immediate divided into 16 2-bit signed int values and
167         * loaded to each corresponding SIMD channel.
168         */
169        QOP_LOAD_IMM_I2,
170
171        QOP_ROT_MUL,
172
173        /* Jumps to block->successor[0] if the qinst->cond (as a
174         * QPU_COND_BRANCH_*) passes, or block->successor[1] if not.  Note
175         * that block->successor[1] may be unset if the condition is ALWAYS.
176         */
177        QOP_BRANCH,
178
179        /* Emits an ADD from src[0] to src[1], where src[0] must be a
180         * QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS,
181         * required by the kernel as part of its branch validation.
182         */
183        QOP_UNIFORMS_RESET,
184};
185
186struct queued_qpu_inst {
187        struct list_head link;
188        uint64_t inst;
189};
190
191struct qinst {
192        struct list_head link;
193
194        enum qop op;
195        struct qreg dst;
196        struct qreg *src;
197        bool sf;
198        uint8_t cond;
199};
200
201enum qstage {
202        /**
203         * Coordinate shader, runs during binning, before the VS, and just
204         * outputs position.
205         */
206        QSTAGE_COORD,
207        QSTAGE_VERT,
208        QSTAGE_FRAG,
209};
210
211enum quniform_contents {
212        /**
213         * Indicates that a constant 32-bit value is copied from the program's
214         * uniform contents.
215         */
216        QUNIFORM_CONSTANT,
217        /**
218         * Indicates that the program's uniform contents are used as an index
219         * into the GL uniform storage.
220         */
221        QUNIFORM_UNIFORM,
222
223        /** @{
224         * Scaling factors from clip coordinates to relative to the viewport
225         * center.
226         *
227         * This is used by the coordinate and vertex shaders to produce the
228         * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
229         * point offsets from the viewport ccenter.
230         */
231        QUNIFORM_VIEWPORT_X_SCALE,
232        QUNIFORM_VIEWPORT_Y_SCALE,
233        /** @} */
234
235        QUNIFORM_VIEWPORT_Z_OFFSET,
236        QUNIFORM_VIEWPORT_Z_SCALE,
237
238        QUNIFORM_USER_CLIP_PLANE,
239
240        /**
241         * A reference to a texture config parameter 0 uniform.
242         *
243         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
244         * defines texture type, miplevels, and such.  It will be found as a
245         * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
246         */
247        QUNIFORM_TEXTURE_CONFIG_P0,
248
249        /**
250         * A reference to a texture config parameter 1 uniform.
251         *
252         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
253         * defines texture width, height, filters, and wrap modes.  It will be
254         * found as a parameter to the second QOP_TEX_[STRB] instruction in a
255         * sequence.
256         */
257        QUNIFORM_TEXTURE_CONFIG_P1,
258
259        /** A reference to a texture config parameter 2 cubemap stride uniform */
260        QUNIFORM_TEXTURE_CONFIG_P2,
261
262        QUNIFORM_TEXTURE_FIRST_LEVEL,
263
264        QUNIFORM_TEXTURE_MSAA_ADDR,
265
266        QUNIFORM_UBO_ADDR,
267
268        QUNIFORM_TEXRECT_SCALE_X,
269        QUNIFORM_TEXRECT_SCALE_Y,
270
271        QUNIFORM_TEXTURE_BORDER_COLOR,
272
273        QUNIFORM_BLEND_CONST_COLOR_X,
274        QUNIFORM_BLEND_CONST_COLOR_Y,
275        QUNIFORM_BLEND_CONST_COLOR_Z,
276        QUNIFORM_BLEND_CONST_COLOR_W,
277        QUNIFORM_BLEND_CONST_COLOR_RGBA,
278        QUNIFORM_BLEND_CONST_COLOR_AAAA,
279
280        QUNIFORM_STENCIL,
281
282        QUNIFORM_ALPHA_REF,
283        QUNIFORM_SAMPLE_MASK,
284
285        /* Placeholder uniform that will be updated by the kernel when used by
286         * an instruction writing to QPU_W_UNIFORMS_ADDRESS.
287         */
288        QUNIFORM_UNIFORMS_ADDRESS,
289};
290
291struct vc4_varying_slot {
292        uint8_t slot;
293        uint8_t swizzle;
294};
295
296struct vc4_compiler_ubo_range {
297        /**
298         * offset in bytes from the start of the ubo where this range is
299         * uploaded.
300         *
301         * Only set once used is set.
302         */
303        uint32_t dst_offset;
304
305        /**
306         * offset in bytes from the start of the gallium uniforms where the
307         * data comes from.
308         */
309        uint32_t src_offset;
310
311        /** size in bytes of this ubo range */
312        uint32_t size;
313
314        /**
315         * Set if this range is used by the shader for indirect uniforms
316         * access.
317         */
318        bool used;
319};
320
321struct vc4_key {
322        struct vc4_uncompiled_shader *shader_state;
323        struct {
324                enum pipe_format format;
325                uint8_t swizzle[4];
326                union {
327                        struct {
328                                unsigned compare_mode:1;
329                                unsigned compare_func:3;
330                                unsigned wrap_s:3;
331                                unsigned wrap_t:3;
332                                bool force_first_level:1;
333                        };
334                        struct {
335                                uint16_t msaa_width, msaa_height;
336                        };
337                };
338        } tex[VC4_MAX_TEXTURE_SAMPLERS];
339        uint8_t ucp_enables;
340};
341
342struct vc4_fs_key {
343        struct vc4_key base;
344        enum pipe_format color_format;
345        bool depth_enabled;
346        bool stencil_enabled;
347        bool stencil_twoside;
348        bool stencil_full_writemasks;
349        bool is_points;
350        bool is_lines;
351        bool alpha_test;
352        bool point_coord_upper_left;
353        bool light_twoside;
354        bool msaa;
355        bool sample_coverage;
356        bool sample_alpha_to_coverage;
357        bool sample_alpha_to_one;
358        uint8_t alpha_test_func;
359        uint8_t logicop_func;
360        uint32_t point_sprite_mask;
361
362        struct pipe_rt_blend_state blend;
363};
364
365struct vc4_vs_key {
366        struct vc4_key base;
367
368        const struct vc4_fs_inputs *fs_inputs;
369        enum pipe_format attr_formats[8];
370        bool is_coord;
371        bool per_vertex_point_size;
372        bool clamp_color;
373};
374
375/** A basic block of QIR intructions. */
376struct qblock {
377        struct list_head link;
378
379        struct list_head instructions;
380        struct list_head qpu_inst_list;
381
382        struct set *predecessors;
383        struct qblock *successors[2];
384
385        int index;
386
387        /* Instruction IPs for the first and last instruction of the block.
388         * Set by vc4_qpu_schedule.c.
389         */
390        uint32_t start_qpu_ip;
391        uint32_t end_qpu_ip;
392
393        /* Instruction IP for the branch instruction of the block.  Set by
394         * vc4_qpu_schedule.c.
395         */
396        uint32_t branch_qpu_ip;
397
398        /** @{ used by vc4_qir_live_variables.c */
399        BITSET_WORD *def;
400        BITSET_WORD *use;
401        BITSET_WORD *live_in;
402        BITSET_WORD *live_out;
403        int start_ip, end_ip;
404        /** @} */
405};
406
407struct vc4_compile {
408        struct vc4_context *vc4;
409        nir_shader *s;
410        nir_function_impl *impl;
411        struct exec_list *cf_node_list;
412
413        /**
414         * Mapping from nir_register * or nir_ssa_def * to array of struct
415         * qreg for the values.
416         */
417        struct hash_table *def_ht;
418
419        /* For each temp, the instruction generating its value. */
420        struct qinst **defs;
421        uint32_t defs_array_size;
422
423        /**
424         * Inputs to the shader, arranged by TGSI declaration order.
425         *
426         * Not all fragment shader QFILE_VARY reads are present in this array.
427         */
428        struct qreg *inputs;
429        struct qreg *outputs;
430        bool msaa_per_sample_output;
431        struct qreg color_reads[VC4_MAX_SAMPLES];
432        struct qreg sample_colors[VC4_MAX_SAMPLES];
433        uint32_t inputs_array_size;
434        uint32_t outputs_array_size;
435        uint32_t uniforms_array_size;
436
437        struct vc4_compiler_ubo_range *ubo_ranges;
438        uint32_t ubo_ranges_array_size;
439        /** Number of uniform areas declared in ubo_ranges. */
440        uint32_t num_uniform_ranges;
441        /** Number of uniform areas used for indirect addressed loads. */
442        uint32_t num_ubo_ranges;
443        uint32_t next_ubo_dst_offset;
444
445        /* State for whether we're executing on each channel currently.  0 if
446         * yes, otherwise a block number + 1 that the channel jumped to.
447         */
448        struct qreg execute;
449
450        struct qreg line_x, point_x, point_y;
451        struct qreg discard;
452        struct qreg payload_FRAG_Z;
453        struct qreg payload_FRAG_W;
454
455        uint8_t vattr_sizes[8];
456
457        /**
458         * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
459         *
460         * This includes those that aren't part of the VPM varyings, like
461         * point/line coordinates.
462         */
463        struct vc4_varying_slot *input_slots;
464        uint32_t num_input_slots;
465        uint32_t input_slots_array_size;
466
467        /**
468         * An entry per outputs[] in the VS indicating what the VARYING_SLOT_*
469         * of the output is.  Used to emit from the VS in the order that the
470         * FS needs.
471         */
472        struct vc4_varying_slot *output_slots;
473
474        struct pipe_shader_state *shader_state;
475        struct vc4_key *key;
476        struct vc4_fs_key *fs_key;
477        struct vc4_vs_key *vs_key;
478
479        /* Live ranges of temps. */
480        int *temp_start, *temp_end;
481
482        uint32_t *uniform_data;
483        enum quniform_contents *uniform_contents;
484        uint32_t uniform_array_size;
485        uint32_t num_uniforms;
486        uint32_t num_outputs;
487        uint32_t num_texture_samples;
488        uint32_t output_position_index;
489        uint32_t output_color_index;
490        uint32_t output_point_size_index;
491        uint32_t output_sample_mask_index;
492
493        struct qreg undef;
494        enum qstage stage;
495        uint32_t num_temps;
496
497        struct list_head blocks;
498        int next_block_index;
499        struct qblock *cur_block;
500        struct qblock *loop_cont_block;
501        struct qblock *loop_break_block;
502
503        struct list_head qpu_inst_list;
504
505        uint64_t *qpu_insts;
506        uint32_t qpu_inst_count;
507        uint32_t qpu_inst_size;
508        uint32_t num_inputs;
509
510        /**
511         * Number of inputs from num_inputs remaining to be queued to the read
512         * FIFO in the VS/CS.
513         */
514        uint32_t num_inputs_remaining;
515
516        /* Number of inputs currently in the read FIFO for the VS/CS */
517        uint32_t num_inputs_in_fifo;
518
519        /** Next offset in the VPM to read from in the VS/CS */
520        uint32_t vpm_read_offset;
521
522        uint32_t program_id;
523        uint32_t variant_id;
524};
525
526/* Special nir_load_input intrinsic index for loading the current TLB
527 * destination color.
528 */
529#define VC4_NIR_TLB_COLOR_READ_INPUT		2000000000
530
531#define VC4_NIR_MS_MASK_OUTPUT			2000000000
532
533struct vc4_compile *qir_compile_init(void);
534void qir_compile_destroy(struct vc4_compile *c);
535struct qblock *qir_new_block(struct vc4_compile *c);
536void qir_set_emit_block(struct vc4_compile *c, struct qblock *block);
537void qir_link_blocks(struct qblock *predecessor, struct qblock *successor);
538struct qblock *qir_entry_block(struct vc4_compile *c);
539struct qblock *qir_exit_block(struct vc4_compile *c);
540struct qinst *qir_inst(enum qop op, struct qreg dst,
541                       struct qreg src0, struct qreg src1);
542struct qinst *qir_inst4(enum qop op, struct qreg dst,
543                        struct qreg a,
544                        struct qreg b,
545                        struct qreg c,
546                        struct qreg d);
547void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);
548struct qreg qir_uniform(struct vc4_compile *c,
549                        enum quniform_contents contents,
550                        uint32_t data);
551void qir_schedule_instructions(struct vc4_compile *c);
552void qir_reorder_uniforms(struct vc4_compile *c);
553void qir_emit_uniform_stream_resets(struct vc4_compile *c);
554
555struct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst);
556struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst);
557
558struct qreg qir_get_temp(struct vc4_compile *c);
559void qir_calculate_live_intervals(struct vc4_compile *c);
560int qir_get_op_nsrc(enum qop qop);
561bool qir_reg_equals(struct qreg a, struct qreg b);
562bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
563bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
564bool qir_is_mul(struct qinst *inst);
565bool qir_is_raw_mov(struct qinst *inst);
566bool qir_is_tex(struct qinst *inst);
567bool qir_is_float_input(struct qinst *inst);
568bool qir_depends_on_flags(struct qinst *inst);
569bool qir_writes_r4(struct qinst *inst);
570struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
571uint8_t qir_channels_written(struct qinst *inst);
572
573void qir_dump(struct vc4_compile *c);
574void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
575const char *qir_get_stage_name(enum qstage stage);
576
577void qir_validate(struct vc4_compile *c);
578
579void qir_optimize(struct vc4_compile *c);
580bool qir_opt_algebraic(struct vc4_compile *c);
581bool qir_opt_constant_folding(struct vc4_compile *c);
582bool qir_opt_copy_propagation(struct vc4_compile *c);
583bool qir_opt_dead_code(struct vc4_compile *c);
584bool qir_opt_peephole_sf(struct vc4_compile *c);
585bool qir_opt_small_immediates(struct vc4_compile *c);
586bool qir_opt_vpm(struct vc4_compile *c);
587void vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c);
588void vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c);
589nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
590                                          nir_ssa_def **srcs, int swiz);
591void vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c);
592void qir_lower_uniforms(struct vc4_compile *c);
593
594uint32_t qpu_schedule_instructions(struct vc4_compile *c);
595
596void qir_SF(struct vc4_compile *c, struct qreg src);
597
598static inline struct qreg
599qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
600{
601        return qir_uniform(c, QUNIFORM_CONSTANT, ui);
602}
603
604static inline struct qreg
605qir_uniform_f(struct vc4_compile *c, float f)
606{
607        return qir_uniform(c, QUNIFORM_CONSTANT, fui(f));
608}
609
610#define QIR_ALU0(name)                                                   \
611static inline struct qreg                                                \
612qir_##name(struct vc4_compile *c)                                        \
613{                                                                        \
614        return qir_emit_def(c, qir_inst(QOP_##name, c->undef,            \
615                                        c->undef, c->undef));            \
616}                                                                        \
617static inline struct qinst *                                             \
618qir_##name##_dest(struct vc4_compile *c, struct qreg dest)               \
619{                                                                        \
620        return qir_emit_nondef(c, qir_inst(QOP_##name, dest,             \
621                                           c->undef, c->undef));         \
622}
623
624#define QIR_ALU1(name)                                                   \
625static inline struct qreg                                                \
626qir_##name(struct vc4_compile *c, struct qreg a)                         \
627{                                                                        \
628        return qir_emit_def(c, qir_inst(QOP_##name, c->undef,            \
629                                        a, c->undef));                   \
630}                                                                        \
631static inline struct qinst *                                             \
632qir_##name##_dest(struct vc4_compile *c, struct qreg dest,               \
633                  struct qreg a)                                         \
634{                                                                        \
635        return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a,          \
636                                           c->undef));                   \
637}
638
639#define QIR_ALU2(name)                                                   \
640static inline struct qreg                                                \
641qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)          \
642{                                                                        \
643        return qir_emit_def(c, qir_inst(QOP_##name, c->undef, a, b));    \
644}                                                                        \
645static inline struct qinst *                                             \
646qir_##name##_dest(struct vc4_compile *c, struct qreg dest,               \
647                  struct qreg a, struct qreg b)                          \
648{                                                                        \
649        return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, b));     \
650}
651
652#define QIR_NODST_1(name)                                               \
653static inline struct qinst *                                            \
654qir_##name(struct vc4_compile *c, struct qreg a)                        \
655{                                                                       \
656        return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef,        \
657                                           a, c->undef));               \
658}
659
660#define QIR_NODST_2(name)                                               \
661static inline struct qinst *                                            \
662qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)         \
663{                                                                       \
664        return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef,        \
665                                           a, b));                      \
666}
667
668#define QIR_PAYLOAD(name)                                                \
669static inline struct qreg                                                \
670qir_##name(struct vc4_compile *c)                                        \
671{                                                                        \
672        struct qreg *payload = &c->payload_##name;                       \
673        if (payload->file != QFILE_NULL)                                 \
674                return *payload;                                         \
675        *payload = qir_get_temp(c);                                      \
676        struct qinst *inst = qir_inst(QOP_##name, *payload,              \
677                                      c->undef, c->undef);               \
678        struct qblock *entry = qir_entry_block(c);                       \
679        list_add(&inst->link, &entry->instructions);                     \
680        c->defs[payload->index] = inst;                                  \
681        return *payload;                                                 \
682}
683
684QIR_ALU1(MOV)
685QIR_ALU1(FMOV)
686QIR_ALU1(MMOV)
687QIR_ALU2(FADD)
688QIR_ALU2(FSUB)
689QIR_ALU2(FMUL)
690QIR_ALU2(V8MULD)
691QIR_ALU2(V8MIN)
692QIR_ALU2(V8MAX)
693QIR_ALU2(V8ADDS)
694QIR_ALU2(V8SUBS)
695QIR_ALU2(MUL24)
696QIR_ALU2(FMIN)
697QIR_ALU2(FMAX)
698QIR_ALU2(FMINABS)
699QIR_ALU2(FMAXABS)
700QIR_ALU1(FTOI)
701QIR_ALU1(ITOF)
702
703QIR_ALU2(ADD)
704QIR_ALU2(SUB)
705QIR_ALU2(SHL)
706QIR_ALU2(SHR)
707QIR_ALU2(ASR)
708QIR_ALU2(MIN)
709QIR_ALU2(MAX)
710QIR_ALU2(AND)
711QIR_ALU2(OR)
712QIR_ALU2(XOR)
713QIR_ALU1(NOT)
714
715QIR_ALU1(RCP)
716QIR_ALU1(RSQ)
717QIR_ALU1(EXP2)
718QIR_ALU1(LOG2)
719QIR_ALU1(VARY_ADD_C)
720QIR_NODST_2(TEX_S)
721QIR_NODST_2(TEX_T)
722QIR_NODST_2(TEX_R)
723QIR_NODST_2(TEX_B)
724QIR_NODST_2(TEX_DIRECT)
725QIR_PAYLOAD(FRAG_Z)
726QIR_PAYLOAD(FRAG_W)
727QIR_ALU0(TEX_RESULT)
728QIR_ALU0(TLB_COLOR_READ)
729QIR_NODST_1(MS_MASK)
730
731static inline struct qreg
732qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1)
733{
734        struct qreg t = qir_get_temp(c);
735        struct qinst *a = qir_MOV_dest(c, t, src0);
736        struct qinst *b = qir_MOV_dest(c, t, src1);
737        a->cond = cond;
738        b->cond = qpu_cond_complement(cond);
739        return t;
740}
741
742static inline struct qreg
743qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
744{
745        struct qreg t = qir_FMOV(c, src);
746        c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
747        return t;
748}
749
750static inline struct qreg
751qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
752{
753        struct qreg t = qir_MOV(c, src);
754        c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
755        return t;
756}
757
758static inline struct qreg
759qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
760{
761        struct qreg t = qir_FMOV(c, src);
762        c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
763        return t;
764}
765
766static inline struct qreg
767qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
768{
769        struct qreg t = qir_MOV(c, src);
770        c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
771        return t;
772}
773
774static inline void
775qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)
776{
777        assert(!dest.pack);
778        dest.pack = QPU_PACK_MUL_8A + chan;
779        qir_emit_nondef(c, qir_inst(QOP_MMOV, dest, val, c->undef));
780}
781
782static inline struct qreg
783qir_PACK_8888_F(struct vc4_compile *c, struct qreg val)
784{
785        struct qreg dest = qir_MMOV(c, val);
786        c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888;
787        return dest;
788}
789
790static inline struct qreg
791qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
792{
793        return qir_EXP2(c, qir_FMUL(c,
794                                    y,
795                                    qir_LOG2(c, x)));
796}
797
798static inline void
799qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)
800{
801        qir_MOV_dest(c, qir_reg(QFILE_VPM, 0), val);
802}
803
804static inline struct qreg
805qir_LOAD_IMM(struct vc4_compile *c, uint32_t val)
806{
807        return qir_emit_def(c, qir_inst(QOP_LOAD_IMM, c->undef,
808                                        qir_reg(QFILE_LOAD_IMM, val), c->undef));
809}
810
811static inline struct qreg
812qir_LOAD_IMM_U2(struct vc4_compile *c, uint32_t val)
813{
814        return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_U2, c->undef,
815                                        qir_reg(QFILE_LOAD_IMM, val),
816                                        c->undef));
817}
818
819static inline struct qreg
820qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val)
821{
822        return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_I2, c->undef,
823                                        qir_reg(QFILE_LOAD_IMM, val),
824                                        c->undef));
825}
826
827/** Shifts the multiply output to the right by rot channels */
828static inline struct qreg
829qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot)
830{
831        return qir_emit_def(c, qir_inst(QOP_ROT_MUL, c->undef,
832                                        val,
833                                        qir_reg(QFILE_LOAD_IMM,
834                                                QPU_SMALL_IMM_MUL_ROT + rot)));
835}
836
837static inline void
838qir_MOV_cond(struct vc4_compile *c, uint8_t cond,
839             struct qreg dest, struct qreg src)
840{
841        qir_MOV_dest(c, dest, src)->cond = cond;
842}
843
844static inline struct qinst *
845qir_BRANCH(struct vc4_compile *c, uint8_t cond)
846{
847        struct qinst *inst = qir_inst(QOP_BRANCH, c->undef, c->undef, c->undef);
848        inst->cond = cond;
849        qir_emit_nondef(c, inst);
850        return inst;
851}
852
853#define qir_for_each_block(block, c)                                    \
854        list_for_each_entry(struct qblock, block, &c->blocks, link)
855
856#define qir_for_each_block_rev(block, c)                                \
857        list_for_each_entry_rev(struct qblock, block, &c->blocks, link)
858
859/* Loop over the non-NULL members of the successors array. */
860#define qir_for_each_successor(succ, block)                             \
861        for (struct qblock *succ = block->successors[0];                \
862             succ != NULL;                                              \
863             succ = (succ == block->successors[1] ? NULL :              \
864                     block->successors[1]))
865
866#define qir_for_each_inst(inst, block)                                  \
867        list_for_each_entry(struct qinst, inst, &block->instructions, link)
868
869#define qir_for_each_inst_rev(inst, block)                                  \
870        list_for_each_entry_rev(struct qinst, inst, &block->instructions, link)
871
872#define qir_for_each_inst_safe(inst, block)                             \
873        list_for_each_entry_safe(struct qinst, inst, &block->instructions, link)
874
875#define qir_for_each_inst_inorder(inst, c)                              \
876        qir_for_each_block(_block, c)                                   \
877                qir_for_each_inst(inst, _block)
878
879#endif /* VC4_QIR_H */
880