18b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
28b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
38b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/*
48b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
58b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
68b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * Permission is hereby granted, free of charge, to any person obtaining a
78b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * copy of this software and associated documentation files (the "Software"),
88b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * to deal in the Software without restriction, including without limitation
98b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * the rights to use, copy, modify, merge, publish, distribute, sublicense,
108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * and/or sell copies of the Software, and to permit persons to whom the
118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * Software is furnished to do so, subject to the following conditions:
128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * The above copyright notice and this permission notice (including the next
148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * paragraph) shall be included in all copies or substantial portions of the
158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * Software.
168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * SOFTWARE.
248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * Authors:
268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *    Rob Clark <robclark@freedesktop.org>
278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark */
288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include <stdarg.h>
308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include "pipe/p_state.h"
328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include "util/u_string.h"
338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include "util/u_memory.h"
348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include "util/u_inlines.h"
358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include "freedreno_util.h"
378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include "ir3_compiler.h"
398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include "ir3_shader.h"
40efbf14e8936384ab1d243afbe3fa9bb0f40e3898Rob Clark#include "ir3_nir.h"
418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
428b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include "instr-a3xx.h"
438b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#include "ir3.h"
448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstruct ir3_compile {
47c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	struct ir3_compiler *compiler;
48c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark
498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct nir_shader *s;
508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3 *ir;
528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_shader_variant *so;
538b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
54457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct ir3_block *block;      /* the current block */
55457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct ir3_block *in_block;   /* block created for shader inputs */
56457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
57457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	nir_function_impl *impl;
588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* For fragment shaders, from the hw perspective the only
608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * actual input is r0.xy position register passed to bary.f.
618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * But TGSI doesn't know that, it still declares things as
628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * IN[] registers.  So we do all the input tracking normally
638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * and fix things up after compile_instructions()
648b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *
658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * NOTE that frag_pos is the hardware position (possibly it
668b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * is actually an index or tag or some such.. it is *not*
678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * values that can be directly used for gl_FragCoord..)
688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
698b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4];
708b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* For vertex shaders, keep track of the system values sources */
728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *vertex_id, *basevertex, *instance_id;
738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* mapping from nir_register to defining instruction: */
758b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct hash_table *def_ht;
768b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
774cf4006674bd7c507688316e2033d77066c45c90Rob Clark	unsigned num_arrays;
784cf4006674bd7c507688316e2033d77066c45c90Rob Clark
798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* a common pattern for indirect addressing is to request the
808b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * same address register multiple times.  To avoid generating
818b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * duplicate instruction sequences (which our backend does not
828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * try to clean up, since that should be done as the NIR stage)
838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * we cache the address value generated for a given src value:
848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct hash_table *addr_ht;
868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
87457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	/* maps nir_block to ir3_block, mostly for the purposes of
88457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	 * figuring out the blocks successors
89457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	 */
90457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct hash_table *block_ht;
91457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
928b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * so we need to use ldlv.u32 to load the varying directly:
948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
958b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	bool flat_bypass;
968b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
97715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	/* on a3xx, we need to add one to # of array levels:
98715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	 */
99715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	bool levels_add_one;
100715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
10143048c7093c367897fbcbb3ca8580e9c122cd6f6Rob Clark	/* on a3xx, we need to scale up integer coords for isaml based
10243048c7093c367897fbcbb3ca8580e9c122cd6f6Rob Clark	 * on LoD:
10343048c7093c367897fbcbb3ca8580e9c122cd6f6Rob Clark	 */
10443048c7093c367897fbcbb3ca8580e9c122cd6f6Rob Clark	bool unminify_coords;
10543048c7093c367897fbcbb3ca8580e9c122cd6f6Rob Clark
106216225ce57de3987aefc2c052eebe54b9c312bc0Rob Clark	/* on a4xx, for array textures we need to add 0.5 to the array
107216225ce57de3987aefc2c052eebe54b9c312bc0Rob Clark	 * index coordinate:
108216225ce57de3987aefc2c052eebe54b9c312bc0Rob Clark	 */
109216225ce57de3987aefc2c052eebe54b9c312bc0Rob Clark	bool array_index_add_half;
110216225ce57de3987aefc2c052eebe54b9c312bc0Rob Clark
111adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	/* on a4xx, bitmask of samplers which need astc+srgb workaround: */
112adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	unsigned astc_srgb;
113adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
114adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	unsigned max_texture_index;
1158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
1168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* set if we encounter something we can't handle yet, so we
1178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * can bail cleanly and fallback to TGSI compiler f/e
1188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
1198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	bool error;
1208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark};
1218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
122fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark/* gpu pointer size in units of 32bit registers/slots */
123fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clarkstatic unsigned pointer_size(struct ir3_compile *ctx)
124fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark{
125fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	return (ctx->compiler->gpu_id >= 500) ? 2 : 1;
126fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark}
1278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
128457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
129457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
130457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
1328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_compile *
133c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clarkcompile_init(struct ir3_compiler *compiler,
13474135f804a4f18040a0a62664df67d35c8090d1dRob Clark		struct ir3_shader_variant *so)
1358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
1368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
1378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
138c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	if (compiler->gpu_id >= 400) {
1398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/* need special handling for "flat" */
1408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ctx->flat_bypass = true;
141715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		ctx->levels_add_one = false;
14243048c7093c367897fbcbb3ca8580e9c122cd6f6Rob Clark		ctx->unminify_coords = false;
143216225ce57de3987aefc2c052eebe54b9c312bc0Rob Clark		ctx->array_index_add_half = true;
144adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
145adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		if (so->type == SHADER_VERTEX)
146adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark			ctx->astc_srgb = so->key.vastc_srgb;
147adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		else if (so->type == SHADER_FRAGMENT)
148adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark			ctx->astc_srgb = so->key.fastc_srgb;
149adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
1508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	} else {
1518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/* no special handling for "flat" */
1528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ctx->flat_bypass = false;
153715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		ctx->levels_add_one = true;
15443048c7093c367897fbcbb3ca8580e9c122cd6f6Rob Clark		ctx->unminify_coords = true;
155216225ce57de3987aefc2c052eebe54b9c312bc0Rob Clark		ctx->array_index_add_half = false;
1568b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
1578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
158c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ctx->compiler = compiler;
1598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ctx->ir = so->ir;
1608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ctx->so = so;
1618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ctx->def_ht = _mesa_hash_table_create(ctx,
1628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			_mesa_hash_pointer, _mesa_key_pointer_equal);
163457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	ctx->block_ht = _mesa_hash_table_create(ctx,
164457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			_mesa_hash_pointer, _mesa_key_pointer_equal);
1658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
16674135f804a4f18040a0a62664df67d35c8090d1dRob Clark	/* TODO: maybe generate some sort of bitmask of what key
16774135f804a4f18040a0a62664df67d35c8090d1dRob Clark	 * lowers vs what shader has (ie. no need to lower
16874135f804a4f18040a0a62664df67d35c8090d1dRob Clark	 * texture clamp lowering if no texture sample instrs)..
16974135f804a4f18040a0a62664df67d35c8090d1dRob Clark	 * although should be done further up the stack to avoid
17074135f804a4f18040a0a62664df67d35c8090d1dRob Clark	 * creating duplicate variants..
17174135f804a4f18040a0a62664df67d35c8090d1dRob Clark	 */
17274135f804a4f18040a0a62664df67d35c8090d1dRob Clark
17374135f804a4f18040a0a62664df67d35c8090d1dRob Clark	if (ir3_key_lowers_nir(&so->key)) {
17474135f804a4f18040a0a62664df67d35c8090d1dRob Clark		nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
17574135f804a4f18040a0a62664df67d35c8090d1dRob Clark		ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
17674135f804a4f18040a0a62664df67d35c8090d1dRob Clark	} else {
17774135f804a4f18040a0a62664df67d35c8090d1dRob Clark		/* fast-path for shader key that lowers nothing in NIR: */
17874135f804a4f18040a0a62664df67d35c8090d1dRob Clark		ctx->s = so->shader->nir;
17974135f804a4f18040a0a62664df67d35c8090d1dRob Clark	}
18074135f804a4f18040a0a62664df67d35c8090d1dRob Clark
18174135f804a4f18040a0a62664df67d35c8090d1dRob Clark	if (fd_mesa_debug & FD_DBG_DISASM) {
18274135f804a4f18040a0a62664df67d35c8090d1dRob Clark		DBG("dump nir%dv%d: type=%d, k={bp=%u,cts=%u,hp=%u}",
18374135f804a4f18040a0a62664df67d35c8090d1dRob Clark			so->shader->id, so->id, so->type,
18474135f804a4f18040a0a62664df67d35c8090d1dRob Clark			so->key.binning_pass, so->key.color_two_side,
18574135f804a4f18040a0a62664df67d35c8090d1dRob Clark			so->key.half_precision);
18674135f804a4f18040a0a62664df67d35c8090d1dRob Clark		nir_print_shader(ctx->s, stdout);
18774135f804a4f18040a0a62664df67d35c8090d1dRob Clark	}
1888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
189fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	so->num_uniforms = ctx->s->num_uniforms;
190fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	so->num_ubos = ctx->s->info->num_ubos;
1911b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark
192fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	/* Layout of constant registers, each section aligned to vec4.  Note
193fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	 * that pointer size (ubo, etc) changes depending on generation.
194a240748de52f2e469e91b60d29ae872828a594d7Rob Clark	 *
195fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	 *    user consts
196fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	 *    UBO addresses
197a240748de52f2e469e91b60d29ae872828a594d7Rob Clark	 *    if (vertex shader) {
198fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	 *        driver params (IR3_DP_*)
199fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	 *        if (stream_output.num_outputs > 0)
200fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	 *           stream-out addresses
201a240748de52f2e469e91b60d29ae872828a594d7Rob Clark	 *    }
202fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	 *    immediates
203a240748de52f2e469e91b60d29ae872828a594d7Rob Clark	 *
204fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	 * Immediates go last mostly because they are inserted in the CP pass
205fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	 * after the nir -> ir3 frontend.
206a240748de52f2e469e91b60d29ae872828a594d7Rob Clark	 */
207fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	unsigned constoff = align(ctx->s->num_uniforms, 4);
208fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	unsigned ptrsz = pointer_size(ctx);
2091b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark
210fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	memset(&so->constbase, ~0, sizeof(so->constbase));
211fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark
212fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	if (so->num_ubos > 0) {
213fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark		so->constbase.ubo = constoff;
214fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark		constoff += align(ctx->s->info->num_ubos * ptrsz, 4) / 4;
215fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	}
2168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
217a240748de52f2e469e91b60d29ae872828a594d7Rob Clark	if (so->type == SHADER_VERTEX) {
218fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark		so->constbase.driver_param = constoff;
219fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark		constoff += align(IR3_DP_COUNT, 4) / 4;
220fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark
221d10c5a24818b39585acfa60ceb6dbbae22b5be09Rob Clark		if ((compiler->gpu_id < 500) &&
222d10c5a24818b39585acfa60ceb6dbbae22b5be09Rob Clark				so->shader->stream_output.num_outputs > 0) {
223fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark			so->constbase.tfbo = constoff;
224fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark			constoff += align(PIPE_MAX_SO_BUFFERS * ptrsz, 4) / 4;
225fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark		}
226a240748de52f2e469e91b60d29ae872828a594d7Rob Clark	}
227a240748de52f2e469e91b60d29ae872828a594d7Rob Clark
228fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	so->constbase.immediate = constoff;
229fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark
2308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return ctx;
2318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
2328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
2348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkcompile_error(struct ir3_compile *ctx, const char *format, ...)
2358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
2368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	va_list ap;
2378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	va_start(ap, format);
2388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	_debug_vprintf(format, ap);
2398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	va_end(ap);
2408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	nir_print_shader(ctx->s, stdout);
2418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ctx->error = true;
2424cf4006674bd7c507688316e2033d77066c45c90Rob Clark	debug_assert(0);
2438b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
2448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark#define compile_assert(ctx, cond) do { \
2468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
2478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	} while (0)
2488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
2508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkcompile_free(struct ir3_compile *ctx)
2518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
2528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ralloc_free(ctx);
2538b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
2548b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2554cf4006674bd7c507688316e2033d77066c45c90Rob Clarkstatic void
2564cf4006674bd7c507688316e2033d77066c45c90Rob Clarkdeclare_var(struct ir3_compile *ctx, nir_variable *var)
2574cf4006674bd7c507688316e2033d77066c45c90Rob Clark{
2584cf4006674bd7c507688316e2033d77066c45c90Rob Clark	unsigned length = glsl_get_length(var->type) * 4;  /* always vec4, at least with ttn */
259dfc001dccc7a29b1acf8d1ecf8072223aa98bc1bRob Clark	struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
260fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	arr->id = ++ctx->num_arrays;
2614cf4006674bd7c507688316e2033d77066c45c90Rob Clark	arr->length = length;
262fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	arr->var = var;
263fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	list_addtail(&arr->node, &ctx->ir->array_list);
2644cf4006674bd7c507688316e2033d77066c45c90Rob Clark}
2654cf4006674bd7c507688316e2033d77066c45c90Rob Clark
266fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clarkstatic struct ir3_array *
2674cf4006674bd7c507688316e2033d77066c45c90Rob Clarkget_var(struct ir3_compile *ctx, nir_variable *var)
2684cf4006674bd7c507688316e2033d77066c45c90Rob Clark{
269fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
270fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		if (arr->var == var)
271fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark			return arr;
272457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	}
273fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	compile_error(ctx, "bogus var: %s\n", var->name);
274fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	return NULL;
2754cf4006674bd7c507688316e2033d77066c45c90Rob Clark}
2764cf4006674bd7c507688316e2033d77066c45c90Rob Clark
2778b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/* allocate a n element value array (to be populated by caller) and
2788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * insert in def_ht
2798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark */
2808b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction **
2818b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark__get_dst(struct ir3_compile *ctx, void *key, unsigned n)
2828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
2838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction **value =
2848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
2858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	_mesa_hash_table_insert(ctx->def_ht, key, value);
2868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return value;
2878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
2888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction **
2908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkget_dst(struct ir3_compile *ctx, nir_dest *dst, unsigned n)
2918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
292fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	compile_assert(ctx, dst->is_ssa);
2938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (dst->is_ssa) {
2948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		return __get_dst(ctx, &dst->ssa, n);
2958b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	} else {
2968b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		return __get_dst(ctx, dst->reg.reg, n);
2978b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
2988b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
2998b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3008b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction **
3018b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkget_dst_ssa(struct ir3_compile *ctx, nir_ssa_def *dst, unsigned n)
3028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
3038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return __get_dst(ctx, dst, n);
3048b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
3058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
30678ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clarkstatic struct ir3_instruction * const *
3078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkget_src(struct ir3_compile *ctx, nir_src *src)
3088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
3098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct hash_entry *entry;
310fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	compile_assert(ctx, src->is_ssa);
3118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (src->is_ssa) {
3128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
3138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	} else {
3148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		entry = _mesa_hash_table_search(ctx->def_ht, src->reg.reg);
3158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
3168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	compile_assert(ctx, entry);
3178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return entry->data;
3188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
3198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
3218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkcreate_immed(struct ir3_block *block, uint32_t val)
3228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
3238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *mov;
3248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
32538ae05a340bdf526d5da62159223ad9938fea36aRob Clark	mov = ir3_instr_create(block, OPC_MOV);
3268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	mov->cat1.src_type = TYPE_U32;
3278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	mov->cat1.dst_type = TYPE_U32;
3288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ir3_reg_create(mov, 0, 0);
3298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ir3_reg_create(mov, 0, IR3_REG_IMMED)->uim_val = val;
3308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return mov;
3328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
3338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
3358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkcreate_addr(struct ir3_block *block, struct ir3_instruction *src)
3368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
3378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *instr, *immed;
3388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* TODO in at least some cases, the backend could probably be
3408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * made clever enough to propagate IR3_REG_HALF..
3418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
3428b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
3438b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	instr->regs[0]->flags |= IR3_REG_HALF;
3448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	immed = create_immed(block, 2);
3468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	immed->regs[0]->flags |= IR3_REG_HALF;
3478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	instr = ir3_SHL_B(block, instr, 0, immed, 0);
3498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	instr->regs[0]->flags |= IR3_REG_HALF;
3508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	instr->regs[1]->flags |= IR3_REG_HALF;
3518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	instr = ir3_MOV(block, instr, TYPE_S16);
353d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark	instr->regs[0]->num = regid(REG_A0, 0);
354d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark	instr->regs[0]->flags |= IR3_REG_HALF;
3558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	instr->regs[1]->flags |= IR3_REG_HALF;
3568b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return instr;
3588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
3598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/* caches addr values to avoid generating multiple cov/shl/mova
3618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * sequences for each use of a given NIR level src as address
3628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark */
3638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
3648b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkget_addr(struct ir3_compile *ctx, struct ir3_instruction *src)
3658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
3668b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *addr;
3678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
368ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark	if (!ctx->addr_ht) {
369ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark		ctx->addr_ht = _mesa_hash_table_create(ctx,
370ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark				_mesa_hash_pointer, _mesa_key_pointer_equal);
371ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark	} else {
372ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark		struct hash_entry *entry;
373ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark		entry = _mesa_hash_table_search(ctx->addr_ht, src);
374ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark		if (entry)
375ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark			return entry->data;
376ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark	}
377ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark
3788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	addr = create_addr(ctx->block, src);
3798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	_mesa_hash_table_insert(ctx->addr_ht, src, addr);
3808b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3818b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return addr;
3828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
3838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
3848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
385457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkget_predicate(struct ir3_compile *ctx, struct ir3_instruction *src)
386457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark{
387457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct ir3_block *b = ctx->block;
388457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct ir3_instruction *cond;
389457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
390457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	/* NOTE: only cmps.*.* can write p0.x: */
391457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
392457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	cond->cat2.condition = IR3_COND_NE;
393457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
394457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	/* condition always goes in predicate register: */
395457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	cond->regs[0]->num = regid(REG_P0, 0);
396457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
397457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	return cond;
398457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark}
399457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
400457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic struct ir3_instruction *
4014cf4006674bd7c507688316e2033d77066c45c90Rob Clarkcreate_uniform(struct ir3_compile *ctx, unsigned n)
4028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
4038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *mov;
4048b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
40538ae05a340bdf526d5da62159223ad9938fea36aRob Clark	mov = ir3_instr_create(ctx->block, OPC_MOV);
4068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* TODO get types right? */
4078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	mov->cat1.src_type = TYPE_F32;
4088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	mov->cat1.dst_type = TYPE_F32;
4098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ir3_reg_create(mov, 0, 0);
4108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ir3_reg_create(mov, n, IR3_REG_CONST);
4118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
4128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return mov;
4138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
4148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
4158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
4166a33c5c0dffce136bdc95daa2db2d3e9d3c1741fRob Clarkcreate_uniform_indirect(struct ir3_compile *ctx, int n,
4178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		struct ir3_instruction *address)
4188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
4198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *mov;
4208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
42138ae05a340bdf526d5da62159223ad9938fea36aRob Clark	mov = ir3_instr_create(ctx->block, OPC_MOV);
4228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	mov->cat1.src_type = TYPE_U32;
4238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	mov->cat1.dst_type = TYPE_U32;
4248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ir3_reg_create(mov, 0, 0);
425fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
4268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
4276b9f5cd5f7b25e9e03104fe279df74817f69fe87Rob Clark	ir3_instr_set_address(mov, address);
4284cf4006674bd7c507688316e2033d77066c45c90Rob Clark
4298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return mov;
4308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
4318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
4328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
4334cf4006674bd7c507688316e2033d77066c45c90Rob Clarkcreate_collect(struct ir3_block *block, struct ir3_instruction **arr,
4344cf4006674bd7c507688316e2033d77066c45c90Rob Clark		unsigned arrsz)
4358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
4364cf4006674bd7c507688316e2033d77066c45c90Rob Clark	struct ir3_instruction *collect;
4378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
4386e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark	if (arrsz == 0)
4396e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		return NULL;
4406e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark
44138ae05a340bdf526d5da62159223ad9938fea36aRob Clark	collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
442c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ir3_reg_create(collect, 0, 0);     /* dst */
4438b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	for (unsigned i = 0; i < arrsz; i++)
4448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i];
4458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
4464cf4006674bd7c507688316e2033d77066c45c90Rob Clark	return collect;
4474cf4006674bd7c507688316e2033d77066c45c90Rob Clark}
4484cf4006674bd7c507688316e2033d77066c45c90Rob Clark
4494cf4006674bd7c507688316e2033d77066c45c90Rob Clarkstatic struct ir3_instruction *
4506a33c5c0dffce136bdc95daa2db2d3e9d3c1741fRob Clarkcreate_indirect_load(struct ir3_compile *ctx, unsigned arrsz, int n,
4514cf4006674bd7c507688316e2033d77066c45c90Rob Clark		struct ir3_instruction *address, struct ir3_instruction *collect)
4524cf4006674bd7c507688316e2033d77066c45c90Rob Clark{
4534cf4006674bd7c507688316e2033d77066c45c90Rob Clark	struct ir3_block *block = ctx->block;
4544cf4006674bd7c507688316e2033d77066c45c90Rob Clark	struct ir3_instruction *mov;
4554cf4006674bd7c507688316e2033d77066c45c90Rob Clark	struct ir3_register *src;
4564cf4006674bd7c507688316e2033d77066c45c90Rob Clark
45738ae05a340bdf526d5da62159223ad9938fea36aRob Clark	mov = ir3_instr_create(block, OPC_MOV);
4588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	mov->cat1.src_type = TYPE_U32;
4598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	mov->cat1.dst_type = TYPE_U32;
4608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ir3_reg_create(mov, 0, 0);
4618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV);
4628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	src->instr = collect;
4638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	src->size  = arrsz;
464fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	src->array.offset = n;
4654cf4006674bd7c507688316e2033d77066c45c90Rob Clark
4666b9f5cd5f7b25e9e03104fe279df74817f69fe87Rob Clark	ir3_instr_set_address(mov, address);
4674cf4006674bd7c507688316e2033d77066c45c90Rob Clark
4684cf4006674bd7c507688316e2033d77066c45c90Rob Clark	return mov;
4694cf4006674bd7c507688316e2033d77066c45c90Rob Clark}
4704cf4006674bd7c507688316e2033d77066c45c90Rob Clark
471fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark/* relative (indirect) if address!=NULL */
472fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clarkstatic struct ir3_instruction *
4736a33c5c0dffce136bdc95daa2db2d3e9d3c1741fRob Clarkcreate_var_load(struct ir3_compile *ctx, struct ir3_array *arr, int n,
474fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		struct ir3_instruction *address)
475fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark{
476fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	struct ir3_block *block = ctx->block;
477fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	struct ir3_instruction *mov;
478fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	struct ir3_register *src;
479fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark
48038ae05a340bdf526d5da62159223ad9938fea36aRob Clark	mov = ir3_instr_create(block, OPC_MOV);
481fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	mov->cat1.src_type = TYPE_U32;
482fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	mov->cat1.dst_type = TYPE_U32;
483fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	ir3_reg_create(mov, 0, 0);
484fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
485fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark			COND(address, IR3_REG_RELATIV));
486fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	src->instr = arr->last_write;
487fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	src->size  = arr->length;
488fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	src->array.id = arr->id;
489fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	src->array.offset = n;
490fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark
491fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	if (address)
492fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		ir3_instr_set_address(mov, address);
493fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark
494fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	arr->last_access = mov;
495fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark
496fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	return mov;
497fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark}
498fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark
499fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark/* relative (indirect) if address!=NULL */
5004cf4006674bd7c507688316e2033d77066c45c90Rob Clarkstatic struct ir3_instruction *
5016a33c5c0dffce136bdc95daa2db2d3e9d3c1741fRob Clarkcreate_var_store(struct ir3_compile *ctx, struct ir3_array *arr, int n,
502fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		struct ir3_instruction *src, struct ir3_instruction *address)
5034cf4006674bd7c507688316e2033d77066c45c90Rob Clark{
5044cf4006674bd7c507688316e2033d77066c45c90Rob Clark	struct ir3_block *block = ctx->block;
5054cf4006674bd7c507688316e2033d77066c45c90Rob Clark	struct ir3_instruction *mov;
5064cf4006674bd7c507688316e2033d77066c45c90Rob Clark	struct ir3_register *dst;
5074cf4006674bd7c507688316e2033d77066c45c90Rob Clark
50838ae05a340bdf526d5da62159223ad9938fea36aRob Clark	mov = ir3_instr_create(block, OPC_MOV);
5094cf4006674bd7c507688316e2033d77066c45c90Rob Clark	mov->cat1.src_type = TYPE_U32;
5104cf4006674bd7c507688316e2033d77066c45c90Rob Clark	mov->cat1.dst_type = TYPE_U32;
511fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
512fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark			COND(address, IR3_REG_RELATIV));
513fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	dst->instr = arr->last_access;
514fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	dst->size  = arr->length;
515fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	dst->array.id = arr->id;
516fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	dst->array.offset = n;
5174cf4006674bd7c507688316e2033d77066c45c90Rob Clark	ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
5184cf4006674bd7c507688316e2033d77066c45c90Rob Clark
5196b9f5cd5f7b25e9e03104fe279df74817f69fe87Rob Clark	ir3_instr_set_address(mov, address);
5208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
521fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	arr->last_write = arr->last_access = mov;
522fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark
5238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return mov;
5248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
5258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
5268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
527810763deb514c3fec41c3e95761de34e6211d291Rob Clarkcreate_input(struct ir3_block *block, unsigned n)
5288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
5298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *in;
5308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
53138ae05a340bdf526d5da62159223ad9938fea36aRob Clark	in = ir3_instr_create(block, OPC_META_INPUT);
5328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	in->inout.block = block;
5338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ir3_reg_create(in, n, 0);
5348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
5358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return in;
5368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
5378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
5388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
53957fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clarkcreate_frag_input(struct ir3_compile *ctx, bool use_ldlv)
5408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
5418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_block *block = ctx->block;
5428b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *instr;
54357fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	/* actual inloc is assigned and fixed up later: */
54457fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	struct ir3_instruction *inloc = create_immed(block, 0);
5458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
5468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (use_ldlv) {
5478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
5488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		instr->cat6.type = TYPE_U32;
5498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		instr->cat6.iim_val = 1;
5508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	} else {
5518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		instr = ir3_BARY_F(block, inloc, 0, ctx->frag_pos, 0);
5528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		instr->regs[2]->wrmask = 0x3;
5538b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
5548b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
5558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return instr;
5568b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
5578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
5588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
5598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkcreate_frag_coord(struct ir3_compile *ctx, unsigned comp)
5608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
5618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_block *block = ctx->block;
5628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *instr;
5638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
5648b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	compile_assert(ctx, !ctx->frag_coord[comp]);
5658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
566810763deb514c3fec41c3e95761de34e6211d291Rob Clark	ctx->frag_coord[comp] = create_input(ctx->block, 0);
5678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
5688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	switch (comp) {
5698b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case 0: /* .x */
5708b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case 1: /* .y */
5718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/* for frag_coord, we get unsigned values.. we need
5728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 * to subtract (integer) 8 and divide by 16 (right-
5738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 * shift by 4) then convert to float:
5748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 *
57595e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark		 *    sub.s tmp, src, 8
5768b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 *    shr.b tmp, tmp, 4
5778b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 *    mov.u32f32 dst, tmp
5788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 *
5798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 */
58095e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark		instr = ir3_SUB_S(block, ctx->frag_coord[comp], 0,
58195e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark				create_immed(block, 8), 0);
5828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		instr = ir3_SHR_B(block, instr, 0,
5838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				create_immed(block, 4), 0);
5848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
5858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
5868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		return instr;
5878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case 2: /* .z */
5888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case 3: /* .w */
5898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	default:
5908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/* seems that we can use these as-is: */
5918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		return ctx->frag_coord[comp];
5928b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
5938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
5948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
595a240748de52f2e469e91b60d29ae872828a594d7Rob Clarkstatic struct ir3_instruction *
596a240748de52f2e469e91b60d29ae872828a594d7Rob Clarkcreate_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
597a240748de52f2e469e91b60d29ae872828a594d7Rob Clark{
598a240748de52f2e469e91b60d29ae872828a594d7Rob Clark	/* first four vec4 sysval's reserved for UBOs: */
59991ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark	/* NOTE: dp is in scalar, but there can be >4 dp components: */
600fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	unsigned n = ctx->so->constbase.driver_param;
60191ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark	unsigned r = regid(n + dp / 4, dp % 4);
602a240748de52f2e469e91b60d29ae872828a594d7Rob Clark	return create_uniform(ctx, r);
603a240748de52f2e469e91b60d29ae872828a594d7Rob Clark}
604a240748de52f2e469e91b60d29ae872828a594d7Rob Clark
60597e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark/* helper for instructions that produce multiple consecutive scalar
60697e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark * outputs which need to have a split/fanout meta instruction inserted
60797e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark */
60897e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clarkstatic void
60997e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clarksplit_dest(struct ir3_block *block, struct ir3_instruction **dst,
610adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		struct ir3_instruction *src, unsigned base, unsigned n)
61197e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark{
61297e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	struct ir3_instruction *prev = NULL;
61366a93a0ff9aa402c37aa9d00b4489715d611b496Rob Clark	for (int i = 0, j = 0; i < n; i++) {
61438ae05a340bdf526d5da62159223ad9938fea36aRob Clark		struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
61597e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		ir3_reg_create(split, 0, IR3_REG_SSA);
61697e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src;
617adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		split->fo.off = i + base;
61897e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark
61997e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		if (prev) {
62097e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark			split->cp.left = prev;
62197e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark			split->cp.left_cnt++;
62297e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark			prev->cp.right = split;
62397e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark			prev->cp.right_cnt++;
62497e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		}
62597e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		prev = split;
62697e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark
627adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		if (src->regs[0]->wrmask & (1 << (i + base)))
62897e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark			dst[j++] = split;
62997e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	}
63097e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark}
63197e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark
6328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/*
6338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * Adreno uses uint rather than having dedicated bool type,
6348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * which (potentially) requires some conversion, in particular
6358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * when using output of an bool instr to int input, or visa
6368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * versa.
6378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
6388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *         | Adreno  |  NIR  |
6398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *  -------+---------+-------+-
6408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *   true  |    1    |  ~0   |
6418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *   false |    0    |   0   |
6428b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
6438b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * To convert from an adreno bool (uint) to nir, use:
6448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
6458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *    absneg.s dst, (neg)src
6468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
6478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * To convert back in the other direction:
6488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
6498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *    absneg.s dst, (abs)arc
6508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark *
6518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * The CP step can clean up the absneg.s that cancel each other
6528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * out, and with a slight bit of extra cleverness (to recognize
6538b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * the instructions which produce either a 0 or 1) can eliminate
6548b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * the absneg.s's completely when an instruction that wants
6558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * 0/1 consumes the result.  For example, when a nir 'bcsel'
6568b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * consumes the result of 'feq'.  So we should be able to get by
6578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * without a boolean resolve step, and without incuring any
6588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * extra penalty in instruction count.
6598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark */
6608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
6618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/* NIR bool -> native (adreno): */
6628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
6638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkir3_b2n(struct ir3_block *block, struct ir3_instruction *instr)
6648b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
6658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return ir3_ABSNEG_S(block, instr, IR3_REG_SABS);
6668b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
6678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
6688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/* native (adreno) -> NIR bool: */
6698b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic struct ir3_instruction *
6708b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkir3_n2b(struct ir3_block *block, struct ir3_instruction *instr)
6718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
6728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return ir3_ABSNEG_S(block, instr, IR3_REG_SNEG);
6738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
6748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
6758b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/*
6768b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * alu/sfu instructions:
6778b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark */
6788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
6798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
6808b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkemit_alu(struct ir3_compile *ctx, nir_alu_instr *alu)
6818b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
6828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	const nir_op_info *info = &nir_op_infos[alu->op];
6838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction **dst, *src[info->num_inputs];
6848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_block *b = ctx->block;
6858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
6868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	dst = get_dst(ctx, &alu->dest.dest, MAX2(info->output_size, 1));
6878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
6888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* Vectors are special in that they have non-scalarized writemasks,
6898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * and just take the first swizzle channel for each argument in
6908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * order into each writemask channel.
6918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
6928b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if ((alu->op == nir_op_vec2) ||
6938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			(alu->op == nir_op_vec3) ||
6948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			(alu->op == nir_op_vec4)) {
6958b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
6968b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		for (int i = 0; i < info->num_inputs; i++) {
6978b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			nir_alu_src *asrc = &alu->src[i];
6988b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
6998b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			compile_assert(ctx, !asrc->abs);
7008b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			compile_assert(ctx, !asrc->negate);
7018b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
7028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[0]];
7034cf4006674bd7c507688316e2033d77066c45c90Rob Clark			if (!src[i])
7044cf4006674bd7c507688316e2033d77066c45c90Rob Clark				src[i] = create_immed(ctx->block, 0);
7058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			dst[i] = ir3_MOV(b, src[i], TYPE_U32);
7068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
7078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
7088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		return;
7098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
7108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
7118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* General case: We can just grab the one used channel per src. */
7128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	for (int i = 0; i < info->num_inputs; i++) {
7138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		unsigned chan = ffs(alu->dest.write_mask) - 1;
7148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		nir_alu_src *asrc = &alu->src[i];
7158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
7168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		compile_assert(ctx, !asrc->abs);
7178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		compile_assert(ctx, !asrc->negate);
7188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
7198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[chan]];
720b98c0262d1183d24a37272558c51678cd6a0e9ecRob Clark
721b98c0262d1183d24a37272558c51678cd6a0e9ecRob Clark		compile_assert(ctx, src[i]);
7228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
7238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
7248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	switch (alu->op) {
7258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_f2i:
7268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_S32);
7278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_f2u:
7298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_U32);
7308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_i2f:
7328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_COV(b, src[0], TYPE_S32, TYPE_F32);
7338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_u2f:
7358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_COV(b, src[0], TYPE_U32, TYPE_F32);
7368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_imov:
7388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_MOV(b, src[0], TYPE_S32);
7398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7404cf4006674bd7c507688316e2033d77066c45c90Rob Clark	case nir_op_fmov:
7414cf4006674bd7c507688316e2033d77066c45c90Rob Clark		dst[0] = ir3_MOV(b, src[0], TYPE_F32);
7424cf4006674bd7c507688316e2033d77066c45c90Rob Clark		break;
7438b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_f2b:
7448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0);
7458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_NE;
7468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
7478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_b2f:
7498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32);
7508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_b2i:
7528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_b2n(b, src[0]);
7538b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7548b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_i2b:
7558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
7568b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_NE;
7578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
7588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
7608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fneg:
7618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FNEG);
7628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fabs:
7648b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FABS);
7658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7668b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fmax:
7678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_MAX_F(b, src[0], 0, src[1], 0);
7688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7698b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fmin:
7708b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0);
7718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fmul:
7738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0);
7748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7758b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fadd:
7768b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_ADD_F(b, src[0], 0, src[1], 0);
7778b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fsub:
7798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_ADD_F(b, src[0], 0, src[1], IR3_REG_FNEG);
7808b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7818b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ffma:
7828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_MAD_F32(b, src[0], 0, src[1], 0, src[2], 0);
7838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fddx:
7858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_DSX(b, src[0], 0);
7868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat5.type = TYPE_F32;
7878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fddy:
7898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_DSY(b, src[0], 0);
7908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat5.type = TYPE_F32;
7918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7928b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_flt:
7948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
7958b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_LT;
7968b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
7978b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
7988b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fge:
7998b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
8008b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_GE;
8018b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
8028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_feq:
8048b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
8058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_EQ;
8068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
8078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fne:
8098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
8108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_NE;
8118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
8128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fceil:
8148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CEIL_F(b, src[0], 0);
8158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ffloor:
8178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_FLOOR_F(b, src[0], 0);
8188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ftrunc:
8208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_TRUNC_F(b, src[0], 0);
8218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fround_even:
8238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_RNDNE_F(b, src[0], 0);
8248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fsign:
8268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_SIGN_F(b, src[0], 0);
8278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
8298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fsin:
8308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_SIN(b, src[0], 0);
8318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fcos:
8338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_COS(b, src[0], 0);
8348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_frsq:
8368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_RSQ(b, src[0], 0);
8378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_frcp:
8398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_RCP(b, src[0], 0);
8408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_flog2:
8428b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_LOG2(b, src[0], 0);
8438b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fexp2:
8458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_EXP2(b, src[0], 0);
8468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_fsqrt:
8488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_SQRT(b, src[0], 0);
8498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
8518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_iabs:
8528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SABS);
8538b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8548b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_iadd:
8558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_ADD_U(b, src[0], 0, src[1], 0);
8568b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_iand:
8588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_AND_B(b, src[0], 0, src[1], 0);
8598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_imax:
8618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_MAX_S(b, src[0], 0, src[1], 0);
8628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8631ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2Rob Clark	case nir_op_umax:
8641ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2Rob Clark		dst[0] = ir3_MAX_U(b, src[0], 0, src[1], 0);
8651ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2Rob Clark		break;
8668b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_imin:
8678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_MIN_S(b, src[0], 0, src[1], 0);
8688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8691ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2Rob Clark	case nir_op_umin:
8701ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2Rob Clark		dst[0] = ir3_MIN_U(b, src[0], 0, src[1], 0);
8711ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2Rob Clark		break;
8728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_imul:
8738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/*
8748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 * dst = (al * bl) + (ah * bl << 16) + (al * bh << 16)
8758b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 *   mull.u tmp0, a, b           ; mul low, i.e. al * bl
8768b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 *   madsh.m16 tmp1, a, b, tmp0  ; mul-add shift high mix, i.e. ah * bl << 16
8778b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 *   madsh.m16 dst, b, a, tmp1   ; i.e. al * bh << 16
8788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 */
8798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_MADSH_M16(b, src[1], 0, src[0], 0,
8808b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark					ir3_MADSH_M16(b, src[0], 0, src[1], 0,
8818b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark						ir3_MULL_U(b, src[0], 0, src[1], 0), 0), 0);
8828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ineg:
8848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG);
8858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_inot:
8878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_NOT_B(b, src[0], 0);
8888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ior:
8908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_OR_B(b, src[0], 0, src[1], 0);
8918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8928b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ishl:
8938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_SHL_B(b, src[0], 0, src[1], 0);
8948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8958b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ishr:
8968b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_ASHR_B(b, src[0], 0, src[1], 0);
8978b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
8988b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_isign: {
8998b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/* maybe this would be sane to lower in nir.. */
9008b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		struct ir3_instruction *neg, *pos;
9018b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
9028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		neg = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
9038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		neg->cat2.condition = IR3_COND_LT;
9048b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
9058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		pos = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
9068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		pos->cat2.condition = IR3_COND_GT;
9078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
9088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_SUB_U(b, pos, 0, neg, 0);
9098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
9108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
9128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_isub:
9138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_SUB_U(b, src[0], 0, src[1], 0);
9148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ixor:
9168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_XOR_B(b, src[0], 0, src[1], 0);
9178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ushr:
9198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_SHR_B(b, src[0], 0, src[1], 0);
9208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ilt:
9228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
9238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_LT;
9248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
9258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ige:
9278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
9288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_GE;
9298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
9308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ieq:
9328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
9338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_EQ;
9348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
9358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ine:
9378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
9388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_NE;
9398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
9408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_ult:
9428b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0);
9438b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_LT;
9448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
9458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_uge:
9478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0);
9488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0]->cat2.condition = IR3_COND_GE;
9498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_n2b(b, dst[0]);
9508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
9528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_op_bcsel:
9538b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[0] = ir3_SEL_B32(b, src[1], 0, ir3_b2n(b, src[0]), 0, src[2], 0);
9548b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
956754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin	case nir_op_bit_count:
957754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		dst[0] = ir3_CBITS_B(b, src[0], 0);
958754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		break;
959754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin	case nir_op_ifind_msb: {
960754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		struct ir3_instruction *cmp;
961754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		dst[0] = ir3_CLZ_S(b, src[0], 0);
962754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0);
963754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		cmp->cat2.condition = IR3_COND_GE;
964754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		dst[0] = ir3_SEL_B32(b,
965754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin				ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
966754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin				cmp, 0, dst[0], 0);
967754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		break;
968754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin	}
969754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin	case nir_op_ufind_msb:
970754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		dst[0] = ir3_CLZ_B(b, src[0], 0);
971754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		dst[0] = ir3_SEL_B32(b,
972754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin				ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
973754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin				src[0], 0, dst[0], 0);
974754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		break;
975754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin	case nir_op_find_lsb:
976754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		dst[0] = ir3_BFREV_B(b, src[0], 0);
977754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		dst[0] = ir3_CLZ_B(b, dst[0], 0);
978754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		break;
979754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin	case nir_op_bitfield_reverse:
980754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		dst[0] = ir3_BFREV_B(b, src[0], 0);
981754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin		break;
982754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1aIlia Mirkin
9838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	default:
9848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		compile_error(ctx, "Unhandled ALU op: %s\n",
9858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				nir_op_infos[alu->op].name);
9868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
9878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
9888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
9898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
99057f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark/* handles direct/indirect UBO reads: */
99157f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clarkstatic void
99257f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clarkemit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
99357f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		struct ir3_instruction **dst)
99457f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark{
99557f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	struct ir3_block *b = ctx->block;
9966d77ceb701b889e76c93092b845986fc53e89257Rob Clark	struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1;
99778b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand	nir_const_value *const_offset;
99857f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	/* UBO addresses are the first driver params: */
999fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark	unsigned ubo = regid(ctx->so->constbase.ubo, 0);
10006d77ceb701b889e76c93092b845986fc53e89257Rob Clark	const unsigned ptrsz = pointer_size(ctx);
10016d77ceb701b889e76c93092b845986fc53e89257Rob Clark
10028b0fb1c152fe191768953aa8c77b89034a377f83Rob Clark	int off = 0;
100357f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark
100457f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	/* First src is ubo index, which could either be an immed or not: */
100557f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	src0 = get_src(ctx, &intr->src[0])[0];
100657f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	if (is_same_type_mov(src0) &&
100757f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark			(src0->regs[1]->flags & IR3_REG_IMMED)) {
10086d77ceb701b889e76c93092b845986fc53e89257Rob Clark		base_lo = create_uniform(ctx, ubo + (src0->regs[1]->iim_val * ptrsz));
10096d77ceb701b889e76c93092b845986fc53e89257Rob Clark		base_hi = create_uniform(ctx, ubo + (src0->regs[1]->iim_val * ptrsz) + 1);
101057f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	} else {
10116d77ceb701b889e76c93092b845986fc53e89257Rob Clark		base_lo = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0));
10126d77ceb701b889e76c93092b845986fc53e89257Rob Clark		base_hi = create_uniform_indirect(ctx, ubo + 1, get_addr(ctx, src0));
101357f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	}
101457f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark
10156d77ceb701b889e76c93092b845986fc53e89257Rob Clark	/* note: on 32bit gpu's base_hi is ignored and DCE'd */
10166d77ceb701b889e76c93092b845986fc53e89257Rob Clark	addr = base_lo;
10176d77ceb701b889e76c93092b845986fc53e89257Rob Clark
101878b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand	const_offset = nir_src_as_const_value(intr->src[1]);
101978b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand	if (const_offset) {
1020084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga		off += const_offset->u32[0];
102178b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand	} else {
102257f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		/* For load_ubo_indirect, second src is indirect offset: */
102357f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		src1 = get_src(ctx, &intr->src[1])[0];
102457f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark
102557f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		/* and add offset to addr: */
102657f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		addr = ir3_ADD_S(b, addr, 0, src1, 0);
102757f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	}
102857f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark
102957f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	/* if offset is to large to encode in the ldg, split it out: */
103057f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	if ((off + (intr->num_components * 4)) > 1024) {
103157f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		/* split out the minimal amount to improve the odds that
103257f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		 * cp can fit the immediate in the add.s instruction:
103357f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		 */
103457f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		unsigned off2 = off + (intr->num_components * 4) - 1024;
103557f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		addr = ir3_ADD_S(b, addr, 0, create_immed(b, off2), 0);
103657f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		off -= off2;
103757f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	}
103857f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark
10396d77ceb701b889e76c93092b845986fc53e89257Rob Clark	if (ptrsz == 2) {
10406d77ceb701b889e76c93092b845986fc53e89257Rob Clark		struct ir3_instruction *carry;
10416d77ceb701b889e76c93092b845986fc53e89257Rob Clark
10426d77ceb701b889e76c93092b845986fc53e89257Rob Clark		/* handle 32b rollover, ie:
10436d77ceb701b889e76c93092b845986fc53e89257Rob Clark		 *   if (addr < base_lo)
10446d77ceb701b889e76c93092b845986fc53e89257Rob Clark		 *      base_hi++
10456d77ceb701b889e76c93092b845986fc53e89257Rob Clark		 */
10466d77ceb701b889e76c93092b845986fc53e89257Rob Clark		carry = ir3_CMPS_U(b, addr, 0, base_lo, 0);
10476d77ceb701b889e76c93092b845986fc53e89257Rob Clark		carry->cat2.condition = IR3_COND_LT;
10486d77ceb701b889e76c93092b845986fc53e89257Rob Clark		base_hi = ir3_ADD_S(b, base_hi, 0, carry, 0);
10496d77ceb701b889e76c93092b845986fc53e89257Rob Clark
10506d77ceb701b889e76c93092b845986fc53e89257Rob Clark		addr = create_collect(b, (struct ir3_instruction*[]){ addr, base_hi }, 2);
10516d77ceb701b889e76c93092b845986fc53e89257Rob Clark	}
10526d77ceb701b889e76c93092b845986fc53e89257Rob Clark
105357f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	for (int i = 0; i < intr->num_components; i++) {
105457f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		struct ir3_instruction *load =
105557f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark				ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
105657f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		load->cat6.type = TYPE_U32;
1057bc5e2bec303acd7fd962996bf369be5ce0e15cd2Rob Clark		load->cat6.src_offset = off + i * 4;     /* byte offset */
105857f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		dst[i] = load;
105957f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	}
106057f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark}
106157f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark
10624cf4006674bd7c507688316e2033d77066c45c90Rob Clark/* handles array reads: */
10634cf4006674bd7c507688316e2033d77066c45c90Rob Clarkstatic void
1064843cec6d3a5cd2ef0986ddcaa0960895b99f066bRob Clarkemit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
10654cf4006674bd7c507688316e2033d77066c45c90Rob Clark		struct ir3_instruction **dst)
10664cf4006674bd7c507688316e2033d77066c45c90Rob Clark{
10674cf4006674bd7c507688316e2033d77066c45c90Rob Clark	nir_deref_var *dvar = intr->variables[0];
10684cf4006674bd7c507688316e2033d77066c45c90Rob Clark	nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
1069fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	struct ir3_array *arr = get_var(ctx, dvar->var);
10704cf4006674bd7c507688316e2033d77066c45c90Rob Clark
10714cf4006674bd7c507688316e2033d77066c45c90Rob Clark	compile_assert(ctx, dvar->deref.child &&
10724cf4006674bd7c507688316e2033d77066c45c90Rob Clark		(dvar->deref.child->deref_type == nir_deref_type_array));
10734cf4006674bd7c507688316e2033d77066c45c90Rob Clark
10744cf4006674bd7c507688316e2033d77066c45c90Rob Clark	switch (darr->deref_array_type) {
10754cf4006674bd7c507688316e2033d77066c45c90Rob Clark	case nir_deref_array_type_direct:
10764cf4006674bd7c507688316e2033d77066c45c90Rob Clark		/* direct access does not require anything special: */
10774cf4006674bd7c507688316e2033d77066c45c90Rob Clark		for (int i = 0; i < intr->num_components; i++) {
10784cf4006674bd7c507688316e2033d77066c45c90Rob Clark			unsigned n = darr->base_offset * 4 + i;
10794cf4006674bd7c507688316e2033d77066c45c90Rob Clark			compile_assert(ctx, n < arr->length);
1080fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark			dst[i] = create_var_load(ctx, arr, n, NULL);
10814cf4006674bd7c507688316e2033d77066c45c90Rob Clark		}
10824cf4006674bd7c507688316e2033d77066c45c90Rob Clark		break;
10834cf4006674bd7c507688316e2033d77066c45c90Rob Clark	case nir_deref_array_type_indirect: {
10844cf4006674bd7c507688316e2033d77066c45c90Rob Clark		/* for indirect, we need to collect all the array elements: */
10854cf4006674bd7c507688316e2033d77066c45c90Rob Clark		struct ir3_instruction *addr =
10864cf4006674bd7c507688316e2033d77066c45c90Rob Clark				get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
10874cf4006674bd7c507688316e2033d77066c45c90Rob Clark		for (int i = 0; i < intr->num_components; i++) {
10884cf4006674bd7c507688316e2033d77066c45c90Rob Clark			unsigned n = darr->base_offset * 4 + i;
10894cf4006674bd7c507688316e2033d77066c45c90Rob Clark			compile_assert(ctx, n < arr->length);
1090fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark			dst[i] = create_var_load(ctx, arr, n, addr);
10914cf4006674bd7c507688316e2033d77066c45c90Rob Clark		}
10924cf4006674bd7c507688316e2033d77066c45c90Rob Clark		break;
10934cf4006674bd7c507688316e2033d77066c45c90Rob Clark	}
10944cf4006674bd7c507688316e2033d77066c45c90Rob Clark	default:
10954cf4006674bd7c507688316e2033d77066c45c90Rob Clark		compile_error(ctx, "Unhandled load deref type: %u\n",
10964cf4006674bd7c507688316e2033d77066c45c90Rob Clark				darr->deref_array_type);
10974cf4006674bd7c507688316e2033d77066c45c90Rob Clark		break;
10984cf4006674bd7c507688316e2033d77066c45c90Rob Clark	}
10994cf4006674bd7c507688316e2033d77066c45c90Rob Clark}
11004cf4006674bd7c507688316e2033d77066c45c90Rob Clark
11014cf4006674bd7c507688316e2033d77066c45c90Rob Clark/* handles array writes: */
11024cf4006674bd7c507688316e2033d77066c45c90Rob Clarkstatic void
1103843cec6d3a5cd2ef0986ddcaa0960895b99f066bRob Clarkemit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
11044cf4006674bd7c507688316e2033d77066c45c90Rob Clark{
11054cf4006674bd7c507688316e2033d77066c45c90Rob Clark	nir_deref_var *dvar = intr->variables[0];
11064cf4006674bd7c507688316e2033d77066c45c90Rob Clark	nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
1107fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	struct ir3_array *arr = get_var(ctx, dvar->var);
110878ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	struct ir3_instruction *addr;
110978ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	struct ir3_instruction * const *src;
11108b0fb1c152fe191768953aa8c77b89034a377f83Rob Clark	unsigned wrmask = nir_intrinsic_write_mask(intr);
11114cf4006674bd7c507688316e2033d77066c45c90Rob Clark
11124cf4006674bd7c507688316e2033d77066c45c90Rob Clark	compile_assert(ctx, dvar->deref.child &&
11134cf4006674bd7c507688316e2033d77066c45c90Rob Clark		(dvar->deref.child->deref_type == nir_deref_type_array));
11144cf4006674bd7c507688316e2033d77066c45c90Rob Clark
11154cf4006674bd7c507688316e2033d77066c45c90Rob Clark	src = get_src(ctx, &intr->src[0]);
11164cf4006674bd7c507688316e2033d77066c45c90Rob Clark
11174cf4006674bd7c507688316e2033d77066c45c90Rob Clark	switch (darr->deref_array_type) {
11184cf4006674bd7c507688316e2033d77066c45c90Rob Clark	case nir_deref_array_type_direct:
1119fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		addr = NULL;
11204cf4006674bd7c507688316e2033d77066c45c90Rob Clark		break;
1121fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	case nir_deref_array_type_indirect:
1122fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		addr = get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
11234cf4006674bd7c507688316e2033d77066c45c90Rob Clark		break;
11244cf4006674bd7c507688316e2033d77066c45c90Rob Clark	default:
11254cf4006674bd7c507688316e2033d77066c45c90Rob Clark		compile_error(ctx, "Unhandled store deref type: %u\n",
11264cf4006674bd7c507688316e2033d77066c45c90Rob Clark				darr->deref_array_type);
11273ca034cada87aea58a92113cb38cf92a97d70c55Rob Clark		return;
11284cf4006674bd7c507688316e2033d77066c45c90Rob Clark	}
1129fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark
1130fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	for (int i = 0; i < intr->num_components; i++) {
1131fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		if (!(wrmask & (1 << i)))
1132fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark			continue;
1133fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		unsigned n = darr->base_offset * 4 + i;
1134fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		compile_assert(ctx, n < arr->length);
1135fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark		create_var_store(ctx, arr, n, src[i], addr);
1136fad158a0e01f4c28851477e6d1eb5c8fd67e226bRob Clark	}
11374cf4006674bd7c507688316e2033d77066c45c90Rob Clark}
11384cf4006674bd7c507688316e2033d77066c45c90Rob Clark
1139c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clarkstatic void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
11401b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		struct ir3_instruction *instr)
11411b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark{
11421b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark	struct ir3_shader_variant *so = ctx->so;
11431b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark	unsigned r = regid(so->inputs_count, 0);
11441b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark	unsigned n = so->inputs_count++;
11451b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark
1146c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark	so->inputs[n].sysval = true;
1147c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark	so->inputs[n].slot = slot;
11481b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark	so->inputs[n].compmask = 1;
11491b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark	so->inputs[n].regid = r;
1150ac1181ffbef5250cb3b651e047cce5116727c34cKenneth Graunke	so->inputs[n].interpolate = INTERP_MODE_FLAT;
11511b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark	so->total_in++;
11521b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark
1153c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1);
1154c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ctx->ir->inputs[r] = instr;
11551b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark}
11561b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark
11578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
1158843cec6d3a5cd2ef0986ddcaa0960895b99f066bRob Clarkemit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
11598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
11608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
116178ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	struct ir3_instruction **dst;
116278ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	struct ir3_instruction * const *src;
11638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_block *b = ctx->block;
116478b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand	nir_const_value *const_offset;
11658b0fb1c152fe191768953aa8c77b89034a377f83Rob Clark	int idx;
11668b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
11678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (info->has_dest) {
11688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst = get_dst(ctx, &intr->dest, intr->num_components);
11697674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark	} else {
11707674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark		dst = NULL;
11718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
11728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
11738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	switch (intr->intrinsic) {
11748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_intrinsic_load_uniform:
11758b0fb1c152fe191768953aa8c77b89034a377f83Rob Clark		idx = nir_intrinsic_base(intr);
117678b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand		const_offset = nir_src_as_const_value(intr->src[0]);
117778b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand		if (const_offset) {
1178084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga			idx += const_offset->u32[0];
117978b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			for (int i = 0; i < intr->num_components; i++) {
118078b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand				unsigned n = idx * 4 + i;
118178b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand				dst[i] = create_uniform(ctx, n);
118278b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			}
118378b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand		} else {
118478b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			src = get_src(ctx, &intr->src[0]);
118578b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			for (int i = 0; i < intr->num_components; i++) {
11866a33c5c0dffce136bdc95daa2db2d3e9d3c1741fRob Clark				int n = idx * 4 + i;
118778b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand				dst[i] = create_uniform_indirect(ctx, n,
118878b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand						get_addr(ctx, src[0]));
118978b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			}
119078b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			/* NOTE: if relative addressing is used, we set
119178b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			 * constlen in the compiler (to worst-case value)
119278b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			 * since we don't know in the assembler what the max
119378b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			 * addr reg value can be:
119478b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			 */
119578b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			ctx->so->constlen = ctx->s->num_uniforms;
11968b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
11978b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
119857f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark	case nir_intrinsic_load_ubo:
119957f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		emit_intrinsic_load_ubo(ctx, intr, dst);
120057f0d3b3c6ae3b9f79a03517410b8dbfab0382c6Rob Clark		break;
12018b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_intrinsic_load_input:
12028b0fb1c152fe191768953aa8c77b89034a377f83Rob Clark		idx = nir_intrinsic_base(intr);
120378b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand		const_offset = nir_src_as_const_value(intr->src[0]);
120478b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand		if (const_offset) {
1205084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga			idx += const_offset->u32[0];
120678b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			for (int i = 0; i < intr->num_components; i++) {
120778b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand				unsigned n = idx * 4 + i;
120878b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand				dst[i] = ctx->ir->inputs[n];
120978b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			}
121078b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand		} else {
121178b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			src = get_src(ctx, &intr->src[0]);
121278b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			struct ir3_instruction *collect =
121378b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand					create_collect(b, ctx->ir->inputs, ctx->ir->ninputs);
121478b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			struct ir3_instruction *addr = get_addr(ctx, src[0]);
121578b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			for (int i = 0; i < intr->num_components; i++) {
121678b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand				unsigned n = idx * 4 + i;
121778b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand				dst[i] = create_indirect_load(ctx, ctx->ir->ninputs,
121878b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand						n, addr, collect);
121978b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand			}
12208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
12218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
12224cf4006674bd7c507688316e2033d77066c45c90Rob Clark	case nir_intrinsic_load_var:
1223843cec6d3a5cd2ef0986ddcaa0960895b99f066bRob Clark		emit_intrinsic_load_var(ctx, intr, dst);
12244cf4006674bd7c507688316e2033d77066c45c90Rob Clark		break;
12254cf4006674bd7c507688316e2033d77066c45c90Rob Clark	case nir_intrinsic_store_var:
1226843cec6d3a5cd2ef0986ddcaa0960895b99f066bRob Clark		emit_intrinsic_store_var(ctx, intr);
12274cf4006674bd7c507688316e2033d77066c45c90Rob Clark		break;
12288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_intrinsic_store_output:
12298b0fb1c152fe191768953aa8c77b89034a377f83Rob Clark		idx = nir_intrinsic_base(intr);
123078b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand		const_offset = nir_src_as_const_value(intr->src[1]);
123178b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand		compile_assert(ctx, const_offset != NULL);
1232084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga		idx += const_offset->u32[0];
123378b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand
12348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		src = get_src(ctx, &intr->src[0]);
12358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		for (int i = 0; i < intr->num_components; i++) {
12368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			unsigned n = idx * 4 + i;
1237c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark			ctx->ir->outputs[n] = src[i];
12388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
12398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
12401b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark	case nir_intrinsic_load_base_vertex:
12411b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		if (!ctx->basevertex) {
1242a240748de52f2e469e91b60d29ae872828a594d7Rob Clark			ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
1243c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			add_sysval_input(ctx, SYSTEM_VALUE_BASE_VERTEX,
12441b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark					ctx->basevertex);
12451b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		}
12461b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		dst[0] = ctx->basevertex;
12471b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		break;
12481b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark	case nir_intrinsic_load_vertex_id_zero_base:
1249b48fde15767284814b3df6e9d7946ca7f5eccbfbRob Clark	case nir_intrinsic_load_vertex_id:
12501b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		if (!ctx->vertex_id) {
1251b48fde15767284814b3df6e9d7946ca7f5eccbfbRob Clark			gl_system_value sv = (intr->intrinsic == nir_intrinsic_load_vertex_id) ?
1252b48fde15767284814b3df6e9d7946ca7f5eccbfbRob Clark				SYSTEM_VALUE_VERTEX_ID : SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
1253f212d7dc50a6a81e65f146d0ccef5776ec5e497fRob Clark			ctx->vertex_id = create_input(b, 0);
1254b48fde15767284814b3df6e9d7946ca7f5eccbfbRob Clark			add_sysval_input(ctx, sv, ctx->vertex_id);
12551b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		}
12561b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		dst[0] = ctx->vertex_id;
12571b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		break;
12581b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark	case nir_intrinsic_load_instance_id:
12591b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		if (!ctx->instance_id) {
1260f212d7dc50a6a81e65f146d0ccef5776ec5e497fRob Clark			ctx->instance_id = create_input(b, 0);
1261c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
12621b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark					ctx->instance_id);
12631b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		}
12641b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		dst[0] = ctx->instance_id;
12651b936bb9f8da72baaef5c7454e8bebb63bbe067aRob Clark		break;
126691ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark	case nir_intrinsic_load_user_clip_plane:
12678b0fb1c152fe191768953aa8c77b89034a377f83Rob Clark		idx = nir_intrinsic_ucp_id(intr);
126891ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark		for (int i = 0; i < intr->num_components; i++) {
126991ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark			unsigned n = idx * 4 + i;
127091ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark			dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
127191ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark		}
127291ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark		break;
1273f212d7dc50a6a81e65f146d0ccef5776ec5e497fRob Clark	case nir_intrinsic_load_front_face:
1274f212d7dc50a6a81e65f146d0ccef5776ec5e497fRob Clark		if (!ctx->frag_face) {
1275f212d7dc50a6a81e65f146d0ccef5776ec5e497fRob Clark			ctx->so->frag_face = true;
1276f212d7dc50a6a81e65f146d0ccef5776ec5e497fRob Clark			ctx->frag_face = create_input(b, 0);
1277f212d7dc50a6a81e65f146d0ccef5776ec5e497fRob Clark			ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
1278f212d7dc50a6a81e65f146d0ccef5776ec5e497fRob Clark		}
1279b4c72b792caecd8be271af20de92d24b4ae7da4cRob Clark		/* for fragface, we always get -1 or 0, but that is inverse
1280b4c72b792caecd8be271af20de92d24b4ae7da4cRob Clark		 * of what nir expects (where ~0 is true).  Unfortunately
1281b4c72b792caecd8be271af20de92d24b4ae7da4cRob Clark		 * trying to widen from half to full in add.s seems to do a
1282b4c72b792caecd8be271af20de92d24b4ae7da4cRob Clark		 * non-sign-extending widen (resulting in something that
1283b4c72b792caecd8be271af20de92d24b4ae7da4cRob Clark		 * gets interpreted as float Inf??)
1284b4c72b792caecd8be271af20de92d24b4ae7da4cRob Clark		 */
1285b4c72b792caecd8be271af20de92d24b4ae7da4cRob Clark		dst[0] = ir3_COV(b, ctx->frag_face, TYPE_S16, TYPE_S32);
1286b4c72b792caecd8be271af20de92d24b4ae7da4cRob Clark		dst[0] = ir3_ADD_S(b, dst[0], 0, create_immed(b, 1), 0);
1287f212d7dc50a6a81e65f146d0ccef5776ec5e497fRob Clark		break;
12888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_intrinsic_discard_if:
12898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_intrinsic_discard: {
12908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		struct ir3_instruction *cond, *kill;
12918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
12928b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		if (intr->intrinsic == nir_intrinsic_discard_if) {
12938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			/* conditional discard: */
12948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			src = get_src(ctx, &intr->src[0]);
12958b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			cond = ir3_b2n(b, src[0]);
12968b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		} else {
12978b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			/* unconditional discard: */
12988b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			cond = create_immed(b, 1);
12998b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
13008b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
1301457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		/* NOTE: only cmps.*.* can write p0.x: */
13028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		cond = ir3_CMPS_S(b, cond, 0, create_immed(b, 0), 0);
13038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		cond->cat2.condition = IR3_COND_NE;
13048b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
13058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/* condition always goes in predicate register: */
13068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		cond->regs[0]->num = regid(REG_P0, 0);
13078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
13088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		kill = ir3_KILL(b, cond, 0);
13097273cb4e933f8be65fc73b9d8c69c76d1078cb14Rob Clark		array_insert(ctx->ir->predicates, kill);
13108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
131196d4db683f90f02e72d34ece544de7eedfa873eeRob Clark		array_insert(ctx->ir->keeps, kill);
13128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ctx->so->has_kill = true;
13138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
13148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
13158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
13168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	default:
13178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		compile_error(ctx, "Unhandled intrinsic type: %s\n",
13188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				nir_intrinsic_infos[intr->intrinsic].name);
13198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
13208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
13218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
13228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
13238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
13248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkemit_load_const(struct ir3_compile *ctx, nir_load_const_instr *instr)
13258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
13268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction **dst = get_dst_ssa(ctx, &instr->def,
13278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			instr->def.num_components);
13288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	for (int i = 0; i < instr->def.num_components; i++)
1329084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga		dst[i] = create_immed(ctx->block, instr->value.u32[i]);
13308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
13318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
13328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
13338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkemit_undef(struct ir3_compile *ctx, nir_ssa_undef_instr *undef)
13348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
13358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction **dst = get_dst_ssa(ctx, &undef->def,
13368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			undef->def.num_components);
13378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* backend doesn't want undefined instructions, so just plug
13388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * in 0.0..
13398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
13408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	for (int i = 0; i < undef->def.num_components; i++)
13418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		dst[i] = create_immed(ctx->block, fui(0.0));
13428b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
13438b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
13448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/*
13458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * texture fetch/sample instructions:
13468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark */
13478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
13488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
134997e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clarktex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
135097e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark{
135197e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	unsigned coords, flags = 0;
135297e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark
135397e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	/* note: would use tex->coord_components.. except txs.. also,
135497e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	 * since array index goes after shadow ref, we don't want to
135597e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	 * count it:
135697e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	 */
135797e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	switch (tex->sampler_dim) {
135897e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	case GLSL_SAMPLER_DIM_1D:
135997e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	case GLSL_SAMPLER_DIM_BUF:
136097e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		coords = 1;
136197e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		break;
136297e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	case GLSL_SAMPLER_DIM_2D:
136397e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	case GLSL_SAMPLER_DIM_RECT:
136497e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	case GLSL_SAMPLER_DIM_EXTERNAL:
136597e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	case GLSL_SAMPLER_DIM_MS:
136697e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		coords = 2;
136797e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		break;
136897e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	case GLSL_SAMPLER_DIM_3D:
136997e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	case GLSL_SAMPLER_DIM_CUBE:
137097e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		coords = 3;
137197e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		flags |= IR3_INSTR_3D;
137297e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		break;
13737674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark	default:
13747674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark		unreachable("bad sampler_dim");
137597e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	}
137697e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark
1377190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin	if (tex->is_shadow && tex->op != nir_texop_lod)
137897e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		flags |= IR3_INSTR_S;
137997e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark
1380190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin	if (tex->is_array && tex->op != nir_texop_lod)
138197e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark		flags |= IR3_INSTR_A;
138297e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark
138397e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	*flagsp = flags;
138497e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	*coordsp = coords;
138597e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark}
138697e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark
138797e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clarkstatic void
13888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkemit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
13898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
13908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_block *b = ctx->block;
13916e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark	struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
139278ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	struct ir3_instruction * const *coord, * const *off, * const *ddx, * const *ddy;
139378ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	struct ir3_instruction *lod, *compare, *proj;
13948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
139597e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1aRob Clark	unsigned i, coords, flags;
13966e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark	unsigned nsrc0 = 0, nsrc1 = 0;
13976e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark	type_t type;
13987674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark	opc_t opc = 0;
13997674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark
14007674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark	coord = off = ddx = ddy = NULL;
14017674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark	lod = proj = compare = NULL;
14028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
14038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* TODO: might just be one component for gathers? */
14048b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	dst = get_dst(ctx, &tex->dest, 4);
14058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
14068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	for (unsigned i = 0; i < tex->num_srcs; i++) {
14078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		switch (tex->src[i].src_type) {
14088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_tex_src_coord:
14098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			coord = get_src(ctx, &tex->src[i].src);
14108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
14118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_tex_src_bias:
14128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			lod = get_src(ctx, &tex->src[i].src)[0];
14138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			has_bias = true;
14148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
14158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_tex_src_lod:
14168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			lod = get_src(ctx, &tex->src[i].src)[0];
14178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			has_lod = true;
14188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
1419fd249c803e3ae2acb83f5e3b7152728e73228b7bIlia Mirkin		case nir_tex_src_comparator: /* shadow comparator */
14208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			compare = get_src(ctx, &tex->src[i].src)[0];
14218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
14228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_tex_src_projector:
14238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			proj = get_src(ctx, &tex->src[i].src)[0];
14248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			has_proj = true;
14258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
14268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_tex_src_offset:
14278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			off = get_src(ctx, &tex->src[i].src);
14288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			has_off = true;
14298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
14308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_tex_src_ddx:
14318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			ddx = get_src(ctx, &tex->src[i].src);
14328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
14338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_tex_src_ddy:
14348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			ddy = get_src(ctx, &tex->src[i].src);
14358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
14368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		default:
1437f15447e7c9dc1e00973b02098637da0aa74de7d5Rob Clark			compile_error(ctx, "Unhandled NIR tex src type: %d\n",
14388b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark					tex->src[i].src_type);
14398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			return;
14408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
14418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
14428b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
144395e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	switch (tex->op) {
144495e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	case nir_texop_tex:      opc = OPC_SAM;      break;
144595e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	case nir_texop_txb:      opc = OPC_SAMB;     break;
144695e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	case nir_texop_txl:      opc = OPC_SAML;     break;
144795e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	case nir_texop_txd:      opc = OPC_SAMGQ;    break;
144895e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	case nir_texop_txf:      opc = OPC_ISAML;    break;
1449190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin	case nir_texop_lod:      opc = OPC_GETLOD;   break;
145095e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	case nir_texop_txf_ms:
145195e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	case nir_texop_txs:
145295e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	case nir_texop_tg4:
145395e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	case nir_texop_query_levels:
145459519c2283e91ce4b8c2028673d6d8dc4ee5138fRob Clark	case nir_texop_texture_samples:
1455457bb290efc162ea3c7c51a820ab7cf88a4efb8dIan Romanick	case nir_texop_samples_identical:
1456b65bd3dee5d84f4bd7806518282299960d426dc1Rob Clark	case nir_texop_txf_ms_mcs:
145795e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark		compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
145895e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark		return;
145995e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	}
146095e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark
146195e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark	tex_info(tex, &flags, &coords);
146295e68adcd9f2589ae6d998328c72b84ffc49edc7Rob Clark
14638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/*
14648b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * lay out the first argument in the proper order:
14658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *  - actual coordinates first
14668b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *  - shadow reference
14678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *  - array index
14688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *  - projection w
14698b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *  - starting at offset 4, dpdx.xy, dpdy.xy
14708b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *
14718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * bias/lod go into the second arg
14728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
14738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
14748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* insert tex coords: */
14758b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	for (i = 0; i < coords; i++)
147678ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark		src0[i] = coord[i];
147778ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark
147878ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	nsrc0 = i;
147978ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark
148078ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	/* scale up integer coords for TXF based on the LOD */
148178ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	if (ctx->unminify_coords && (opc == OPC_ISAML)) {
148278ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark		assert(has_lod);
148378ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark		for (i = 0; i < coords; i++)
148478ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark			src0[i] = ir3_SHL_B(b, src0[i], 0, lod, 0);
148578ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	}
14868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
14878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (coords == 1) {
14888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/* hw doesn't do 1d, so we treat it as 2d with
14898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 * height of 1, and patch up the y coord.
14908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 * TODO: y coord should be (int)0 in some cases..
14918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 */
14926e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		src0[nsrc0++] = create_immed(b, fui(0.5));
14938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
14948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
1495190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin	if (tex->is_shadow && tex->op != nir_texop_lod)
14966e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		src0[nsrc0++] = compare;
14978b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
149878ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	if (tex->is_array && tex->op != nir_texop_lod) {
149978ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark		struct ir3_instruction *idx = coord[coords];
150078ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark
150178ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark		/* the array coord for cube arrays needs 0.5 added to it */
150278ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark		if (ctx->array_index_add_half && (opc != OPC_ISAML))
150378ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark			idx = ir3_ADD_F(b, idx, 0, create_immed(b, fui(0.5)), 0);
150478ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark
150578ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark		src0[nsrc0++] = idx;
150678ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	}
15078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
15088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (has_proj) {
15096e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		src0[nsrc0++] = proj;
15108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		flags |= IR3_INSTR_P;
15118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
15128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
15138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* pad to 4, then ddx/ddy: */
15148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (tex->op == nir_texop_txd) {
15156e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		while (nsrc0 < 4)
15166e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark			src0[nsrc0++] = create_immed(b, fui(0.0));
15176e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		for (i = 0; i < coords; i++)
15186e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark			src0[nsrc0++] = ddx[i];
15196e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		if (coords < 2)
15206e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark			src0[nsrc0++] = create_immed(b, fui(0.0));
15216e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		for (i = 0; i < coords; i++)
15226e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark			src0[nsrc0++] = ddy[i];
15236e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		if (coords < 2)
15246e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark			src0[nsrc0++] = create_immed(b, fui(0.0));
15258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
15268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
15278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/*
15288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * second argument (if applicable):
15298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *  - offsets
15308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *  - lod
15318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 *  - bias
15328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
15338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (has_off | has_lod | has_bias) {
15348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		if (has_off) {
15356e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark			for (i = 0; i < coords; i++)
15366e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark				src1[nsrc1++] = off[i];
15376e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark			if (coords < 2)
15386e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark				src1[nsrc1++] = create_immed(b, fui(0.0));
15398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			flags |= IR3_INSTR_O;
15408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
15418b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
15426e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		if (has_lod | has_bias)
15436e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark			src1[nsrc1++] = lod;
15448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
15458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
15468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	switch (tex->dest_type) {
15478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_type_invalid:
15488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_type_float:
15496e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		type = TYPE_F32;
15508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
15518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_type_int:
15526e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		type = TYPE_S32;
15538b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
1554f58813842bcece3498f55ec5d582466ccff92a5eJason Ekstrand	case nir_type_uint:
15558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_type_bool:
15566e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		type = TYPE_U32;
15576e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark		break;
15587674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark	default:
15597674ab12e826d2ea33f13fb2e6ca8ae2a62fe460Rob Clark		unreachable("bad dest_type");
15608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
15618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
1562190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin	if (opc == OPC_GETLOD)
1563190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin		type = TYPE_U32;
1564190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin
1565adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	unsigned tex_idx = tex->texture_index;
1566adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
1567adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx);
1568adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
1569adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	struct ir3_instruction *col0 = create_collect(b, src0, nsrc0);
1570adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	struct ir3_instruction *col1 = create_collect(b, src1, nsrc1);
1571adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
1572adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, flags,
1573adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark			tex_idx, tex_idx, col0, col1);
1574adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
1575adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	if ((ctx->astc_srgb & (1 << tex_idx)) && !nir_tex_instr_is_query(tex)) {
1576adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		/* only need first 3 components: */
1577adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		sam->regs[0]->wrmask = 0x7;
1578adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		split_dest(b, dst, sam, 0, 3);
15796e8160d6e3ea7b000de112538dcbb0e29a6c3838Rob Clark
1580adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		/* we need to sample the alpha separately with a non-ASTC
1581adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		 * texture state:
1582adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		 */
1583adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_W, flags,
1584adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark				tex_idx, tex_idx, col0, col1);
1585adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
1586adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		array_insert(ctx->ir->astc_srgb, sam);
1587adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
1588adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		/* fixup .w component: */
1589adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		split_dest(b, &dst[3], sam, 3, 1);
1590adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	} else {
1591adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		/* normal (non-workaround) case: */
1592adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		split_dest(b, dst, sam, 0, 4);
1593adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	}
1594190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin
1595190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin	/* GETLOD returns results in 4.8 fixed point */
1596190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin	if (opc == OPC_GETLOD) {
1597190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin		struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256));
1598190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin
1599190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin		compile_assert(ctx, tex->dest_type == nir_type_float);
1600190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin		for (i = 0; i < 2; i++) {
1601190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin			dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0,
1602190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin							   factor, 0);
1603190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin		}
1604190acb34ca165c840f87a25149eab1d1b7dc85c4Ilia Mirkin	}
16058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
16068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
1607715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clarkstatic void
1608715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clarkemit_tex_query_levels(struct ir3_compile *ctx, nir_tex_instr *tex)
1609715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark{
1610715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	struct ir3_block *b = ctx->block;
1611715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	struct ir3_instruction **dst, *sam;
1612715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1613715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	dst = get_dst(ctx, &tex->dest, 1);
1614715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1615715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	sam = ir3_SAM(b, OPC_GETINFO, TYPE_U32, TGSI_WRITEMASK_Z, 0,
1616ee85014b90af1d94d637ec763a803479e9bac5dcJason Ekstrand			tex->texture_index, tex->texture_index, NULL, NULL);
1617715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1618715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	/* even though there is only one component, since it ends
1619715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	 * up in .z rather than .x, we need a split_dest()
1620715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	 */
1621adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	split_dest(b, dst, sam, 0, 3);
1622715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1623715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	/* The # of levels comes from getinfo.z. We need to add 1 to it, since
1624715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	 * the value in TEX_CONST_0 is zero-based.
1625715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	 */
1626715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	if (ctx->levels_add_one)
1627715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		dst[0] = ir3_ADD_U(b, dst[0], 0, create_immed(b, 1), 0);
1628715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark}
1629715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1630715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clarkstatic void
1631715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clarkemit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex)
1632715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark{
1633715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	struct ir3_block *b = ctx->block;
163478ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	struct ir3_instruction **dst, *sam;
163578ba262d004989c43b0a9e76c84b71bb16d4b333Rob Clark	struct ir3_instruction *lod;
1636715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	unsigned flags, coords;
1637715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1638715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	tex_info(tex, &flags, &coords);
1639715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1640b4ace13eeae7ec58262d8a3ec38adca63b6add76Ilia Mirkin	/* Actually we want the number of dimensions, not coordinates. This
1641b4ace13eeae7ec58262d8a3ec38adca63b6add76Ilia Mirkin	 * distinction only matters for cubes.
1642b4ace13eeae7ec58262d8a3ec38adca63b6add76Ilia Mirkin	 */
1643b4ace13eeae7ec58262d8a3ec38adca63b6add76Ilia Mirkin	if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
1644b4ace13eeae7ec58262d8a3ec38adca63b6add76Ilia Mirkin		coords = 2;
1645b4ace13eeae7ec58262d8a3ec38adca63b6add76Ilia Mirkin
1646715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	dst = get_dst(ctx, &tex->dest, 4);
1647715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1648715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	compile_assert(ctx, tex->num_srcs == 1);
1649715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	compile_assert(ctx, tex->src[0].src_type == nir_tex_src_lod);
1650715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1651715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	lod = get_src(ctx, &tex->src[0].src)[0];
1652715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1653715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags,
1654ee85014b90af1d94d637ec763a803479e9bac5dcJason Ekstrand			tex->texture_index, tex->texture_index, lod, NULL);
1655715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1656adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	split_dest(b, dst, sam, 0, 4);
1657715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark
1658715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	/* Array size actually ends up in .w rather than .z. This doesn't
1659715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	 * matter for miplevel 0, but for higher mips the value in z is
1660715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	 * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
1661715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	 * returned, which means that we have to add 1 to it for arrays.
1662715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	 */
1663715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	if (tex->is_array) {
1664715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		if (ctx->levels_add_one) {
1665715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark			dst[coords] = ir3_ADD_U(b, dst[3], 0, create_immed(b, 1), 0);
1666715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		} else {
1667715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark			dst[coords] = ir3_MOV(b, dst[3], TYPE_U32);
1668715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		}
1669715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	}
1670715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark}
16718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
16728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
1673457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkemit_phi(struct ir3_compile *ctx, nir_phi_instr *nphi)
1674457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark{
1675457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct ir3_instruction *phi, **dst;
1676457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1677457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	/* NOTE: phi's should be lowered to scalar at this point */
1678457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	compile_assert(ctx, nphi->dest.ssa.num_components == 1);
1679457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1680457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	dst = get_dst(ctx, &nphi->dest, 1);
1681457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
168238ae05a340bdf526d5da62159223ad9938fea36aRob Clark	phi = ir3_instr_create2(ctx->block, OPC_META_PHI,
1683457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			1 + exec_list_length(&nphi->srcs));
1684457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	ir3_reg_create(phi, 0, 0);         /* dst */
1685457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	phi->phi.nphi = nphi;
1686457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1687457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	dst[0] = phi;
1688457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark}
1689457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1690457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark/* phi instructions are left partially constructed.  We don't resolve
1691457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark * their srcs until the end of the block, since (eg. loops) one of
1692457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark * the phi's srcs might be defined after the phi due to back edges in
1693457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark * the CFG.
1694457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark */
1695457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic void
1696457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkresolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
1697457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark{
1698457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
1699457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		nir_phi_instr *nphi;
1700457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1701457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		/* phi's only come at start of block: */
170219739e4fb9024f42a8fc332e6fa94c292bb6bc16Rob Clark		if (instr->opc != OPC_META_PHI)
1703457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			break;
1704457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1705457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		if (!instr->phi.nphi)
1706457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			break;
1707457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1708457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		nphi = instr->phi.nphi;
1709457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		instr->phi.nphi = NULL;
1710457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1711457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		foreach_list_typed(nir_phi_src, nsrc, node, &nphi->srcs) {
1712457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			struct ir3_instruction *src = get_src(ctx, &nsrc->src)[0];
1713506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark
1714506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark			/* NOTE: src might not be in the same block as it comes from
1715506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark			 * according to the phi.. but in the end the backend assumes
1716506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark			 * it will be able to assign the same register to each (which
1717506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark			 * only works if it is assigned in the src block), so insert
1718506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark			 * an extra mov to make sure the phi src is assigned in the
1719506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark			 * block it comes from:
1720506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark			 */
1721506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark			src = ir3_MOV(get_block(ctx, nsrc->pred), src, TYPE_U32);
1722506b561ba7e3df2a7759dded684fae84bf459f65Rob Clark
1723457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
1724457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		}
1725457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	}
1726457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark}
1727457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1728457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic void
1729457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkemit_jump(struct ir3_compile *ctx, nir_jump_instr *jump)
1730457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark{
1731457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	switch (jump->type) {
1732457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	case nir_jump_break:
1733457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	case nir_jump_continue:
1734457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		/* I *think* we can simply just ignore this, and use the
1735457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		 * successor block link to figure out where we need to
1736457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		 * jump to for break/continue
1737457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		 */
1738457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		break;
1739457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	default:
1740457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		compile_error(ctx, "Unhandled NIR jump type: %d\n", jump->type);
1741457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		break;
1742457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	}
1743457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark}
1744457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1745457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic void
17468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkemit_instr(struct ir3_compile *ctx, nir_instr *instr)
17478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
17488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	switch (instr->type) {
17498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_instr_type_alu:
17508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		emit_alu(ctx, nir_instr_as_alu(instr));
17518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
17528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_instr_type_intrinsic:
1753843cec6d3a5cd2ef0986ddcaa0960895b99f066bRob Clark		emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
17548b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
17558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_instr_type_load_const:
17568b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		emit_load_const(ctx, nir_instr_as_load_const(instr));
17578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
17588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_instr_type_ssa_undef:
17598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		emit_undef(ctx, nir_instr_as_ssa_undef(instr));
17608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
1761715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	case nir_instr_type_tex: {
1762715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		nir_tex_instr *tex = nir_instr_as_tex(instr);
1763715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		/* couple tex instructions get special-cased:
1764715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		 */
1765715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		switch (tex->op) {
1766715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		case nir_texop_txs:
1767715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark			emit_tex_txs(ctx, tex);
1768715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark			break;
1769715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		case nir_texop_query_levels:
1770715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark			emit_tex_query_levels(ctx, tex);
1771715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark			break;
1772715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		default:
1773715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark			emit_tex(ctx, tex);
1774715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark			break;
1775715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark		}
17768b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
1777715b2e0dbb88ef80880b8517f8fe822c26ef3be5Rob Clark	}
17788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_instr_type_phi:
1779457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		emit_phi(ctx, nir_instr_as_phi(instr));
1780457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		break;
1781457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	case nir_instr_type_jump:
1782457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		emit_jump(ctx, nir_instr_as_jump(instr));
1783457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		break;
1784457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	case nir_instr_type_call:
17858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	case nir_instr_type_parallel_copy:
17868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		compile_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type);
17878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		break;
17888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
17898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
17908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
1791457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic struct ir3_block *
1792457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkget_block(struct ir3_compile *ctx, nir_block *nblock)
1793457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark{
1794457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct ir3_block *block;
1795457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct hash_entry *entry;
1796457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	entry = _mesa_hash_table_search(ctx->block_ht, nblock);
1797457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	if (entry)
1798457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		return entry->data;
1799457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1800457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	block = ir3_block_create(ctx->ir);
1801457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	block->nblock = nblock;
1802457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	_mesa_hash_table_insert(ctx->block_ht, nblock, block);
1803457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1804457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	return block;
1805457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark}
1806457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
18078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
1808457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkemit_block(struct ir3_compile *ctx, nir_block *nblock)
18098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
1810457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct ir3_block *block = get_block(ctx, nblock);
1811457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1812457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
1813457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		if (nblock->successors[i]) {
1814457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			block->successors[i] =
1815457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark				get_block(ctx, nblock->successors[i]);
1816457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		}
1817457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	}
1818457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1819457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	ctx->block = block;
1820457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	list_addtail(&block->node, &ctx->ir->block_list);
1821457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1822ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark	/* re-emit addr register in each block if needed: */
1823ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark	_mesa_hash_table_destroy(ctx->addr_ht, NULL);
1824ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark	ctx->addr_ht = NULL;
1825ad2cc7bddc094639508e4942310dbd2896be7774Rob Clark
1826707e72f13bb78869ee95d3286980bf1709cba6cfJason Ekstrand	nir_foreach_instr(instr, nblock) {
18278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		emit_instr(ctx, instr);
18288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		if (ctx->error)
18298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			return;
18308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
18318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
18328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
1833457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic void emit_cf_list(struct ir3_compile *ctx, struct exec_list *list);
1834457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
18358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
1836457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkemit_if(struct ir3_compile *ctx, nir_if *nif)
1837457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark{
1838457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	struct ir3_instruction *condition = get_src(ctx, &nif->condition)[0];
1839457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1840457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	ctx->block->condition =
1841457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		get_predicate(ctx, ir3_b2n(condition->block, condition));
1842457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1843457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	emit_cf_list(ctx, &nif->then_list);
1844457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	emit_cf_list(ctx, &nif->else_list);
1845457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark}
1846457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1847457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic void
1848457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkemit_loop(struct ir3_compile *ctx, nir_loop *nloop)
1849457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark{
1850457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	emit_cf_list(ctx, &nloop->body);
1851457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark}
1852457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1853457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic void
1854457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkemit_cf_list(struct ir3_compile *ctx, struct exec_list *list)
18558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
1856457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	foreach_list_typed(nir_cf_node, node, node, list) {
18578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		switch (node->type) {
18588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_cf_node_block:
18598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			emit_block(ctx, nir_cf_node_as_block(node));
18608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
18618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_cf_node_if:
1862457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			emit_if(ctx, nir_cf_node_as_if(node));
1863457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			break;
18648b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_cf_node_loop:
1865457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			emit_loop(ctx, nir_cf_node_as_loop(node));
1866457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			break;
18678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		case nir_cf_node_function:
18688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			compile_error(ctx, "TODO\n");
18698b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
18708b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
18718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
18728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
18738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
187498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark/* emit stream-out code.  At this point, the current block is the original
187598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark * (nir) end block, and nir ensures that all flow control paths terminate
187698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark * into the end block.  We re-purpose the original end block to generate
187798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark * the 'if (vtxcnt < maxvtxcnt)' condition, then append the conditional
187898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark * block holding stream-out write instructions, followed by the new end
187998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark * block:
188098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *
188198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *   blockOrigEnd {
188298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *      p0.x = (vtxcnt < maxvtxcnt)
188398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *      // succs: blockStreamOut, blockNewEnd
188498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *   }
188598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *   blockStreamOut {
188698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *      ... stream-out instructions ...
188798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *      // succs: blockNewEnd
188898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *   }
188998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *   blockNewEnd {
189098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark *   }
189198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark */
189298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clarkstatic void
189398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clarkemit_stream_out(struct ir3_compile *ctx)
189498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark{
189598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	struct ir3_shader_variant *v = ctx->so;
189698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	struct ir3 *ir = ctx->ir;
189798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	struct pipe_stream_output_info *strmout =
189898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			&ctx->so->shader->stream_output;
189998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	struct ir3_block *orig_end_block, *stream_out_block, *new_end_block;
190098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	struct ir3_instruction *vtxcnt, *maxvtxcnt, *cond;
190198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	struct ir3_instruction *bases[PIPE_MAX_SO_BUFFERS];
190298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
190398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	/* create vtxcnt input in input block at top of shader,
190498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * so that it is seen as live over the entire duration
190598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * of the shader:
190698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 */
190798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	vtxcnt = create_input(ctx->in_block, 0);
1908c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark	add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt);
190998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
191098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
191198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
191298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	/* at this point, we are at the original 'end' block,
191398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * re-purpose this block to stream-out condition, then
191498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * append stream-out block and new-end block
191598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 */
191698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	orig_end_block = ctx->block;
191798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
191898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	stream_out_block = ir3_block_create(ir);
191998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	list_addtail(&stream_out_block->node, &ir->block_list);
192098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
192198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	new_end_block = ir3_block_create(ir);
192298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	list_addtail(&new_end_block->node, &ir->block_list);
192398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
192498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	orig_end_block->successors[0] = stream_out_block;
192598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	orig_end_block->successors[1] = new_end_block;
192698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	stream_out_block->successors[0] = new_end_block;
192798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
192898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	/* setup 'if (vtxcnt < maxvtxcnt)' condition: */
192998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
193098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	cond->regs[0]->num = regid(REG_P0, 0);
193198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	cond->cat2.condition = IR3_COND_LT;
193298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
193398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	/* condition goes on previous block to the conditional,
193498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * since it is used to pick which of the two successor
193598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * paths to take:
193698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 */
193798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	orig_end_block->condition = cond;
193898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
193998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	/* switch to stream_out_block to generate the stream-out
194098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * instructions:
194198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 */
194298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	ctx->block = stream_out_block;
194398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
194498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	/* Calculate base addresses based on vtxcnt.  Instructions
194598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * generated for bases not used in following loop will be
194698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * stripped out in the backend.
194798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 */
194898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
194998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark		unsigned stride = strmout->stride[i];
195098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark		struct ir3_instruction *base, *off;
195198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
1952fc10dc9fdea6ad7d04dfcdb8fd2e2d59ea67f68bRob Clark		base = create_uniform(ctx, regid(v->constbase.tfbo, i));
195398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
195498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark		/* 24-bit should be enough: */
195598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark		off = ir3_MUL_U(ctx->block, vtxcnt, 0,
195698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark				create_immed(ctx->block, stride * 4), 0);
195798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
195898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark		bases[i] = ir3_ADD_S(ctx->block, off, 0, base, 0);
195998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	}
196098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
196198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	/* Generate the per-output store instructions: */
196298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	for (unsigned i = 0; i < strmout->num_outputs; i++) {
196398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark		for (unsigned j = 0; j < strmout->output[i].num_components; j++) {
196498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			unsigned c = j + strmout->output[i].start_component;
196598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			struct ir3_instruction *base, *out, *stg;
196698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
196798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			base = bases[strmout->output[i].output_buffer];
196898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			out = ctx->ir->outputs[regid(strmout->output[i].register_index, c)];
196998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
197098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			stg = ir3_STG(ctx->block, base, 0, out, 0,
197198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark					create_immed(ctx->block, 1), 0);
197298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			stg->cat6.type = TYPE_U32;
197398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4;
197498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
197598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			array_insert(ctx->ir->keeps, stg);
197698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark		}
197798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	}
197898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
197998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	/* and finally switch to the new_end_block: */
198098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	ctx->block = new_end_block;
198198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark}
198298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
19838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
1984457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkemit_function(struct ir3_compile *ctx, nir_function_impl *impl)
1985457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark{
198623bd6affb24662e9e8dbe1ed353babd17b5a016dRob Clark	nir_metadata_require(impl, nir_metadata_block_index);
198723bd6affb24662e9e8dbe1ed353babd17b5a016dRob Clark
1988457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	emit_cf_list(ctx, &impl->body);
1989457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	emit_block(ctx, impl->end_block);
1990457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
1991457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	/* at this point, we should have a single empty block,
1992457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	 * into which we emit the 'end' instruction.
1993457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	 */
1994457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	compile_assert(ctx, list_empty(&ctx->block->instr_list));
199598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
199698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	/* If stream-out (aka transform-feedback) enabled, emit the
199798a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * stream-out instructions, followed by a new empty block (into
199898a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * which the 'end' instruction lands).
199998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 *
200098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * NOTE: it is done in this order, rather than inserting before
200198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * we emit end_block, because NIR guarantees that all blocks
200298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * flow into end_block, and that end_block has no successors.
200398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * So by re-purposing end_block as the first block of stream-
200498a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * out, we guarantee that all exit paths flow into the stream-
200598a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 * out instructions.
200698a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	 */
2007d10c5a24818b39585acfa60ceb6dbbae22b5be09Rob Clark	if ((ctx->compiler->gpu_id < 500) &&
2008d10c5a24818b39585acfa60ceb6dbbae22b5be09Rob Clark			(ctx->so->shader->stream_output.num_outputs > 0) &&
200998a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark			!ctx->so->key.binning_pass) {
201098a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark		debug_assert(ctx->so->type == SHADER_VERTEX);
201198a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark		emit_stream_out(ctx);
201298a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark	}
201398a4b111fbb9e3ae45e907ddd4d2407e5ab669ecRob Clark
2014457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	ir3_END(ctx->block);
2015457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark}
2016457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
2017457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clarkstatic void
20188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarksetup_input(struct ir3_compile *ctx, nir_variable *in)
20198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
20208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_shader_variant *so = ctx->so;
20218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	unsigned array_len = MAX2(glsl_get_length(in->type), 1);
20228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	unsigned ncomp = glsl_get_components(in->type);
20238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	unsigned n = in->data.driver_location;
20248fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt	unsigned slot = in->data.location;
20258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
20268fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt	DBG("; in: slot=%u, len=%ux%u, drvloc=%u",
20278fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt			slot, array_len, ncomp, n);
20288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2029784086f3c1f50ca78fe62f925dfe66fb3aa5f22cRob Clark	/* let's pretend things other than vec4 don't exist: */
2030784086f3c1f50ca78fe62f925dfe66fb3aa5f22cRob Clark	ncomp = MAX2(ncomp, 4);
2031784086f3c1f50ca78fe62f925dfe66fb3aa5f22cRob Clark	compile_assert(ctx, ncomp == 4);
2032784086f3c1f50ca78fe62f925dfe66fb3aa5f22cRob Clark
2033c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark	so->inputs[n].slot = slot;
20348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	so->inputs[n].compmask = (1 << ncomp) - 1;
20358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	so->inputs_count = MAX2(so->inputs_count, n + 1);
2036e523f69b1d2f0cb3ff7659e3c55b9a2e40240c9cRob Clark	so->inputs[n].interpolate = in->data.interpolation;
20378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
20388fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt	if (ctx->so->type == SHADER_FRAGMENT) {
20398fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt		for (int i = 0; i < ncomp; i++) {
20408fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt			struct ir3_instruction *instr = NULL;
20418fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt			unsigned idx = (n * 4) + i;
20428b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2043c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			if (slot == VARYING_SLOT_POS) {
20448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				so->inputs[n].bary = false;
20458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				so->frag_coord = true;
20468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				instr = create_frag_coord(ctx, i);
204753cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark			} else if (slot == VARYING_SLOT_PNTC) {
204853cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				/* see for example st_get_generic_varying_index().. this is
204953cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				 * maybe a bit mesa/st specific.  But we need things to line
205053cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				 * up for this in fdN_program:
205153cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				 *    unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
205253cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				 *    if (emit->sprite_coord_enable & texmask) {
205353cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				 *       ...
205453cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				 *    }
205553cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				 */
205653cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				so->inputs[n].slot = VARYING_SLOT_VAR8;
205753cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				so->inputs[n].bary = true;
205853cde5e295077e2a51df3a3d0db474cff5c10313Rob Clark				instr = create_frag_input(ctx, false);
20598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			} else {
20608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				bool use_ldlv = false;
20618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2062e523f69b1d2f0cb3ff7659e3c55b9a2e40240c9cRob Clark				/* detect the special case for front/back colors where
2063e523f69b1d2f0cb3ff7659e3c55b9a2e40240c9cRob Clark				 * we need to do flat vs smooth shading depending on
2064e523f69b1d2f0cb3ff7659e3c55b9a2e40240c9cRob Clark				 * rast state:
20658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				 */
2066ac1181ffbef5250cb3b651e047cce5116727c34cKenneth Graunke				if (in->data.interpolation == INTERP_MODE_NONE) {
2067c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark					switch (slot) {
2068c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark					case VARYING_SLOT_COL0:
2069c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark					case VARYING_SLOT_COL1:
2070c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark					case VARYING_SLOT_BFC0:
2071c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark					case VARYING_SLOT_BFC1:
2072c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark						so->inputs[n].rasterflat = true;
2073c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark						break;
2074c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark					default:
2075c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark						break;
2076c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark					}
2077c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark				}
20788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
20798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				if (ctx->flat_bypass) {
2080ac1181ffbef5250cb3b651e047cce5116727c34cKenneth Graunke					if ((so->inputs[n].interpolate == INTERP_MODE_FLAT) ||
2081e523f69b1d2f0cb3ff7659e3c55b9a2e40240c9cRob Clark							(so->inputs[n].rasterflat && ctx->so->key.rasterflat))
20828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark						use_ldlv = true;
20838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				}
20848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
20858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				so->inputs[n].bary = true;
20868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
208757fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark				instr = create_frag_input(ctx, use_ldlv);
20888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			}
20898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2090f33083a216563abac307c414bba2e32dc7405feaRob Clark			compile_assert(ctx, idx < ctx->ir->ninputs);
2091f33083a216563abac307c414bba2e32dc7405feaRob Clark
20928fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt			ctx->ir->inputs[idx] = instr;
20938fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt		}
20948fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt	} else if (ctx->so->type == SHADER_VERTEX) {
20958fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt		for (int i = 0; i < ncomp; i++) {
20968fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt			unsigned idx = (n * 4) + i;
2097f33083a216563abac307c414bba2e32dc7405feaRob Clark			compile_assert(ctx, idx < ctx->ir->ninputs);
20988fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt			ctx->ir->inputs[idx] = create_input(ctx->block, idx);
20998fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt		}
21008fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt	} else {
21018fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt		compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
21028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
21038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
21048b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
21058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		so->total_in += ncomp;
21068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
21078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
21088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
21098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
21108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarksetup_output(struct ir3_compile *ctx, nir_variable *out)
21118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
21128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_shader_variant *so = ctx->so;
21138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	unsigned array_len = MAX2(glsl_get_length(out->type), 1);
21148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	unsigned ncomp = glsl_get_components(out->type);
21158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	unsigned n = out->data.driver_location;
21168fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt	unsigned slot = out->data.location;
21178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	unsigned comp = 0;
21188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
21198fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt	DBG("; out: slot=%u, len=%ux%u, drvloc=%u",
21208fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt			slot, array_len, ncomp, n);
21218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2122784086f3c1f50ca78fe62f925dfe66fb3aa5f22cRob Clark	/* let's pretend things other than vec4 don't exist: */
2123784086f3c1f50ca78fe62f925dfe66fb3aa5f22cRob Clark	ncomp = MAX2(ncomp, 4);
2124784086f3c1f50ca78fe62f925dfe66fb3aa5f22cRob Clark	compile_assert(ctx, ncomp == 4);
2125784086f3c1f50ca78fe62f925dfe66fb3aa5f22cRob Clark
2126c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark	if (ctx->so->type == SHADER_FRAGMENT) {
2127c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		switch (slot) {
2128c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		case FRAG_RESULT_DEPTH:
2129c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			comp = 2;  /* tgsi will write to .z component */
21308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			so->writes_pos = true;
21318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
2132c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		case FRAG_RESULT_COLOR:
2133c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			so->color0_mrt = 1;
21348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
21358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		default:
2136c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			if (slot >= FRAG_RESULT_DATA0)
2137c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark				break;
2138c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			compile_error(ctx, "unknown FS output name: %s\n",
2139c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark					gl_frag_result_name(slot));
21408b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
2141c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark	} else if (ctx->so->type == SHADER_VERTEX) {
2142c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		switch (slot) {
2143c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		case VARYING_SLOT_POS:
21448b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			so->writes_pos = true;
21458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			break;
2146c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		case VARYING_SLOT_PSIZ:
2147c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			so->writes_psize = true;
2148c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			break;
2149c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		case VARYING_SLOT_COL0:
2150c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		case VARYING_SLOT_COL1:
2151c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		case VARYING_SLOT_BFC0:
2152c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		case VARYING_SLOT_BFC1:
2153c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark		case VARYING_SLOT_FOGC:
215491ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark		case VARYING_SLOT_CLIP_DIST0:
215591ec210ea8e35af8a7b30fa599b67b1faa55f34cRob Clark		case VARYING_SLOT_CLIP_DIST1:
2156f15447e7c9dc1e00973b02098637da0aa74de7d5Rob Clark		case VARYING_SLOT_CLIP_VERTEX:
2157c416ea31cfe50e3d8ac3e433f9e4490a850b8e56Rob Clark			break;
21588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		default:
2159c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			if (slot >= VARYING_SLOT_VAR0)
2160c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark				break;
2161c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7))
2162c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark				break;
2163c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			compile_error(ctx, "unknown VS output name: %s\n",
2164c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark					gl_varying_slot_name(slot));
21658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
21668fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt	} else {
21678fd3e53f3dc40e4013348e63a0cc7a2787410899Eric Anholt		compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
21688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
21698b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
21708b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
21718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2172c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark	so->outputs[n].slot = slot;
21738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	so->outputs[n].regid = regid(n, comp);
21748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	so->outputs_count = MAX2(so->outputs_count, n + 1);
21758b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
21768b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	for (int i = 0; i < ncomp; i++) {
21778b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		unsigned idx = (n * 4) + i;
21788623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark		compile_assert(ctx, idx < ctx->ir->noutputs);
2179c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0));
21808b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
21818b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
21828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
21838623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clarkstatic int
21848623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clarkmax_drvloc(struct exec_list *vars)
21858623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark{
21868623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark	int drvloc = -1;
21878623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark	nir_foreach_variable(var, vars) {
21888623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark		drvloc = MAX2(drvloc, (int)var->data.driver_location);
21898623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark	}
21908623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark	return drvloc;
21918623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark}
21928623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark
21938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
21948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkemit_instructions(struct ir3_compile *ctx)
21958b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
2196457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	unsigned ninputs, noutputs;
219793bfa1d7a2e70a72a01c48a04c208845c22f9376Kenneth Graunke	nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s);
21988623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark
21998623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark	ninputs  = (max_drvloc(&ctx->s->inputs) + 1) * 4;
22008623e599fc050e33a1e19bc7f5aac59bc7fa3ae3Rob Clark	noutputs = (max_drvloc(&ctx->s->outputs) + 1) * 4;
22018b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
220296d4db683f90f02e72d34ece544de7eedfa873eeRob Clark	/* or vtx shaders, we need to leave room for sysvals:
22038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
220496d4db683f90f02e72d34ece544de7eedfa873eeRob Clark	if (ctx->so->type == SHADER_VERTEX) {
2205f33083a216563abac307c414bba2e32dc7405feaRob Clark		ninputs += 16;
2206632ea2a1139f4b228ca55331e411dbae9920c28dRob Clark	}
22078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2208c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs);
2209457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
2210457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	/* Create inputs in first block: */
22118b2d0bb844e4c9b6141f68431b6e6dc135eb3503Rob Clark	ctx->block = get_block(ctx, nir_start_block(fxn));
2212457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	ctx->in_block = ctx->block;
2213457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	list_addtail(&ctx->block->node, &ctx->ir->block_list);
22148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
221596d4db683f90f02e72d34ece544de7eedfa873eeRob Clark	if (ctx->so->type == SHADER_VERTEX) {
2216f33083a216563abac307c414bba2e32dc7405feaRob Clark		ctx->ir->ninputs -= 16;
2217632ea2a1139f4b228ca55331e411dbae9920c28dRob Clark	}
22188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
22198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* for fragment shader, we have a single input register (usually
22208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * r0.xy) which is used as the base for bary.f varying fetch instrs:
22218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
22228b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (ctx->so->type == SHADER_FRAGMENT) {
22238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		// TODO maybe a helper for fi since we need it a few places..
22248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		struct ir3_instruction *instr;
222538ae05a340bdf526d5da62159223ad9938fea36aRob Clark		instr = ir3_instr_create(ctx->block, OPC_META_FI);
22268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ir3_reg_create(instr, 0, 0);
22278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ir3_reg_create(instr, 0, IR3_REG_SSA);    /* r0.x */
22288b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ir3_reg_create(instr, 0, IR3_REG_SSA);    /* r0.y */
22298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ctx->frag_pos = instr;
22308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
22318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
22328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* Setup inputs: */
22338f55ebe802ea930d14eef9cd622aeb9a8d989e01Boyan Ding	nir_foreach_variable(var, &ctx->s->inputs) {
22348b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		setup_input(ctx, var);
22358b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
22368b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
22378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* Setup outputs: */
22388f55ebe802ea930d14eef9cd622aeb9a8d989e01Boyan Ding	nir_foreach_variable(var, &ctx->s->outputs) {
22398b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		setup_output(ctx, var);
22404cf4006674bd7c507688316e2033d77066c45c90Rob Clark	}
22414cf4006674bd7c507688316e2033d77066c45c90Rob Clark
2242f20cf22b54dfda13324aa714debe55a8f093640dRob Clark	/* Setup global variables (which should only be arrays): */
22438f55ebe802ea930d14eef9cd622aeb9a8d989e01Boyan Ding	nir_foreach_variable(var, &ctx->s->globals) {
22444cf4006674bd7c507688316e2033d77066c45c90Rob Clark		declare_var(ctx, var);
22458b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
22468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2247f20cf22b54dfda13324aa714debe55a8f093640dRob Clark	/* Setup local variables (which should only be arrays): */
2248f20cf22b54dfda13324aa714debe55a8f093640dRob Clark	/* NOTE: need to do something more clever when we support >1 fxn */
2249f20cf22b54dfda13324aa714debe55a8f093640dRob Clark	nir_foreach_variable(var, &fxn->locals) {
2250f20cf22b54dfda13324aa714debe55a8f093640dRob Clark		declare_var(ctx, var);
2251f20cf22b54dfda13324aa714debe55a8f093640dRob Clark	}
2252f20cf22b54dfda13324aa714debe55a8f093640dRob Clark
2253457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	/* And emit the body: */
2254457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	ctx->impl = fxn;
2255457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	emit_function(ctx, fxn);
2256457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
2257457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
2258457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark		resolve_phis(ctx, block);
22598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
22608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
22618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
22628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark/* from NIR perspective, we actually have inputs.  But most of the "inputs"
22638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * for a fragment shader are just bary.f instructions.  The *actual* inputs
22648b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * from the hw perspective are the frag_pos and optionally frag_coord and
22658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark * frag_face.
22668b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark */
22678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkstatic void
22688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkfixup_frag_inputs(struct ir3_compile *ctx)
22698b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
22708b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_shader_variant *so = ctx->so;
2271c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	struct ir3 *ir = ctx->ir;
22728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction **inputs;
22738b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction *instr;
22748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	int n, regid = 0;
22758b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2276c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ir->ninputs = 0;
22778b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
22788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	n  = 4;  /* always have frag_pos */
22798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	n += COND(so->frag_face, 4);
22808b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	n += COND(so->frag_coord, 4);
22818b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
22828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
22838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
22848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (so->frag_face) {
22858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/* this ultimately gets assigned to hr0.x so doesn't conflict
22868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 * with frag_coord/frag_pos..
22878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 */
2288c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		inputs[ir->ninputs++] = ctx->frag_face;
22898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ctx->frag_face->regs[0]->num = 0;
22908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
22918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		/* remaining channels not used, but let's avoid confusing
22928b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 * other parts that expect inputs to come in groups of vec4
22938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		 */
2294c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		inputs[ir->ninputs++] = NULL;
2295c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		inputs[ir->ninputs++] = NULL;
2296c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		inputs[ir->ninputs++] = NULL;
22978b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
22988b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
22998b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* since we don't know where to set the regid for frag_coord,
23008b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * we have to use r0.x for it.  But we don't want to *always*
23018b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * use r1.x for frag_pos as that could increase the register
23028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * footprint on simple shaders:
23038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
23048b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (so->frag_coord) {
23058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ctx->frag_coord[0]->regs[0]->num = regid++;
23068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ctx->frag_coord[1]->regs[0]->num = regid++;
23078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ctx->frag_coord[2]->regs[0]->num = regid++;
23088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ctx->frag_coord[3]->regs[0]->num = regid++;
23098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2310c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		inputs[ir->ninputs++] = ctx->frag_coord[0];
2311c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		inputs[ir->ninputs++] = ctx->frag_coord[1];
2312c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		inputs[ir->ninputs++] = ctx->frag_coord[2];
2313c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		inputs[ir->ninputs++] = ctx->frag_coord[3];
23148b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
23158b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
23168b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* we always have frag_pos: */
23178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	so->pos_regid = regid;
23188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
23198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* r0.x */
2320810763deb514c3fec41c3e95761de34e6211d291Rob Clark	instr = create_input(ctx->in_block, ir->ninputs);
23218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	instr->regs[0]->num = regid++;
2322c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	inputs[ir->ninputs++] = instr;
23238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ctx->frag_pos->regs[1]->instr = instr;
23248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
23258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* r0.y */
2326810763deb514c3fec41c3e95761de34e6211d291Rob Clark	instr = create_input(ctx->in_block, ir->ninputs);
23278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	instr->regs[0]->num = regid++;
2328c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	inputs[ir->ninputs++] = instr;
23298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	ctx->frag_pos->regs[2]->instr = instr;
23308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2331c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ir->inputs = inputs;
23328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
23338b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2334adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark/* Fixup tex sampler state for astc/srgb workaround instructions.  We
2335adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark * need to assign the tex state indexes for these after we know the
2336adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark * max tex index.
2337adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark */
2338adf795432f788b33822d3a94b704be4ca536c8f1Rob Clarkstatic void
2339adf795432f788b33822d3a94b704be4ca536c8f1Rob Clarkfixup_astc_srgb(struct ir3_compile *ctx)
2340adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark{
2341adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	struct ir3_shader_variant *so = ctx->so;
2342adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	/* indexed by original tex idx, value is newly assigned alpha sampler
2343adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	 * state tex idx.  Zero is invalid since there is at least one sampler
2344adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	 * if we get here.
2345adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	 */
2346adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	unsigned alt_tex_state[16] = {0};
2347adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	unsigned tex_idx = ctx->max_texture_index + 1;
2348adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	unsigned idx = 0;
2349adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
2350adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	so->astc_srgb.base = tex_idx;
2351adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
2352adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	for (unsigned i = 0; i < ctx->ir->astc_srgb_count; i++) {
2353adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		struct ir3_instruction *sam = ctx->ir->astc_srgb[i];
2354adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
2355adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		compile_assert(ctx, sam->cat5.tex < ARRAY_SIZE(alt_tex_state));
2356adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
2357adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		if (alt_tex_state[sam->cat5.tex] == 0) {
2358adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark			/* assign new alternate/alpha tex state slot: */
2359adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark			alt_tex_state[sam->cat5.tex] = tex_idx++;
2360adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark			so->astc_srgb.orig_idx[idx++] = sam->cat5.tex;
2361adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark			so->astc_srgb.count++;
2362adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		}
2363adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
2364adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		sam->cat5.tex = alt_tex_state[sam->cat5.tex];
2365adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	}
2366adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark}
2367adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
23688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkint
2369694beb8b830c993e9bfb744655be3dbd558ab3a8Rob Clarkir3_compile_shader_nir(struct ir3_compiler *compiler,
23700815729d964f4e8e6e263acf70b5b91577de027aRob Clark		struct ir3_shader_variant *so)
23718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark{
23728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_compile *ctx;
2373c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	struct ir3 *ir;
23748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	struct ir3_instruction **inputs;
237557fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	unsigned i, j, actual_in, inloc;
23768b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	int ret = 0, max_bary;
23778b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
23788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	assert(!so->ir);
23798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
238074135f804a4f18040a0a62664df67d35c8090d1dRob Clark	ctx = compile_init(compiler, so);
23818b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (!ctx) {
23828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		DBG("INIT failed!");
23838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ret = -1;
23848b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		goto out;
23858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
23868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
23878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	emit_instructions(ctx);
23888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
23898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (ctx->error) {
23908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		DBG("EMIT failed!");
23918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		ret = -1;
23928b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		goto out;
23938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
23948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2395c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ir = so->ir = ctx->ir;
23968b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
23978b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* keep track of the inputs from TGSI perspective.. */
2398c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	inputs = ir->inputs;
23998b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24008b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* but fixup actual inputs for frag shader: */
24018b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (so->type == SHADER_FRAGMENT)
24028b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		fixup_frag_inputs(ctx);
24038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24048b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* at this point, for binning pass, throw away unneeded outputs: */
24050815729d964f4e8e6e263acf70b5b91577de027aRob Clark	if (so->key.binning_pass) {
24068b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		for (i = 0, j = 0; i < so->outputs_count; i++) {
2407c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			unsigned slot = so->outputs[i].slot;
24088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24098b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			/* throw away everything but first position/psize */
2410c4572b7dfe7a4ae9dc6e900f89786fa9cf7769dfRob Clark			if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
24118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				if (i != j) {
24128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark					so->outputs[j] = so->outputs[i];
2413c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark					ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
2414c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark					ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1];
2415c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark					ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2];
2416c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark					ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3];
24178b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				}
24188b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				j++;
24198b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			}
24208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
24218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		so->outputs_count = j;
2422c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		ir->noutputs = j * 4;
24238b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24248b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24258b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* if we want half-precision outputs, mark the output registers
24268b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * as half:
24278b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
24280815729d964f4e8e6e263acf70b5b91577de027aRob Clark	if (so->key.half_precision) {
2429c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		for (i = 0; i < ir->noutputs; i++) {
2430c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark			struct ir3_instruction *out = ir->outputs[i];
2431d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark			if (!out)
24328b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				continue;
2433d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark			out->regs[0]->flags |= IR3_REG_HALF;
2434d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark			/* output could be a fanout (ie. texture fetch output)
2435d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark			 * in which case we need to propagate the half-reg flag
2436d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark			 * up to the definer so that RA sees it:
2437d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark			 */
243819739e4fb9024f42a8fc332e6fa94c292bb6bc16Rob Clark			if (out->opc == OPC_META_FO) {
2439d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark				out = out->regs[1]->instr;
2440d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark				out->regs[0]->flags |= IR3_REG_HALF;
2441d52fb2f5ad828f879286b9068023b82b9897bc17Rob Clark			}
2442457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark
244319739e4fb9024f42a8fc332e6fa94c292bb6bc16Rob Clark			if (out->opc == OPC_MOV) {
2444457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark				out->cat1.dst_type = half_type(out->cat1.dst_type);
2445457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3Rob Clark			}
24468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
24478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24498b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
24508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		printf("BEFORE CP:\n");
2451c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		ir3_print(ir);
24528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24538b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2454b15c7fc268785cc8c960368d287ec799fe9dc502Rob Clark	ir3_cp(ir, so);
24558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24568b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
24578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		printf("BEFORE GROUPING:\n");
2458c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		ir3_print(ir);
24598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24618b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* Group left/right neighbors, inserting mov's where needed to
24628b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 * solve conflicts:
24638b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
2464c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ir3_group(ir);
24658b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2466c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ir3_depth(ir);
24678b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24688b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
24698b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		printf("AFTER DEPTH:\n");
2470c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		ir3_print(ir);
24718b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24728b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2473c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ret = ir3_sched(ir);
24748b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (ret) {
24758b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		DBG("SCHED failed!");
24768b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		goto out;
24778b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24788b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24798b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
24808b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		printf("AFTER SCHED:\n");
2481c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		ir3_print(ir);
24828b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24838b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2484c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark	ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face);
24858b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (ret) {
24868b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		DBG("RA failed!");
24878b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		goto out;
24888b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24898b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24908b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
24918b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		printf("AFTER RA:\n");
2492c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		ir3_print(ir);
24938b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24948b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
24958b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* fixup input/outputs: */
24968b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	for (i = 0; i < so->outputs_count; i++) {
2497c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14Rob Clark		so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num;
24988b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
24998b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
25008b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	/* Note that some or all channels of an input may be unused: */
25018b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	actual_in = 0;
250257fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	inloc = 0;
25038b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	for (i = 0; i < so->inputs_count; i++) {
2504832dddcf91f168ab057cb5c7f6914b24ae6b864cRob Clark		unsigned j, regid = ~0, compmask = 0, maxcomp = 0;
25058b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		so->inputs[i].ncomp = 0;
2506728e2c4d38b2c03ad1fdc997bef70e646ada9fe4Rob Clark		so->inputs[i].inloc = inloc;
25078b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		for (j = 0; j < 4; j++) {
25088b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			struct ir3_instruction *in = inputs[(i*4) + j];
250957fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark			if (in && !(in->flags & IR3_INSTR_UNUSED)) {
25108b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				compmask |= (1 << j);
25118b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				regid = in->regs[0]->num - j;
25128b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				actual_in++;
25138b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark				so->inputs[i].ncomp++;
251457fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark				if ((so->type == SHADER_FRAGMENT) && so->inputs[i].bary) {
251557fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark					/* assign inloc: */
251657fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark					assert(in->regs[1]->flags & IR3_REG_IMMED);
2517832dddcf91f168ab057cb5c7f6914b24ae6b864cRob Clark					in->regs[1]->iim_val = inloc + j;
2518832dddcf91f168ab057cb5c7f6914b24ae6b864cRob Clark					maxcomp = j + 1;
251957fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark				}
25208b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark			}
25218b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		}
2522832dddcf91f168ab057cb5c7f6914b24ae6b864cRob Clark		if ((so->type == SHADER_FRAGMENT) && compmask && so->inputs[i].bary) {
252357fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark			so->varying_in++;
2524832dddcf91f168ab057cb5c7f6914b24ae6b864cRob Clark			so->inputs[i].compmask = (1 << maxcomp) - 1;
2525832dddcf91f168ab057cb5c7f6914b24ae6b864cRob Clark			inloc += maxcomp;
2526832dddcf91f168ab057cb5c7f6914b24ae6b864cRob Clark		} else {
2527832dddcf91f168ab057cb5c7f6914b24ae6b864cRob Clark			so->inputs[i].compmask = compmask;
2528832dddcf91f168ab057cb5c7f6914b24ae6b864cRob Clark		}
25298b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		so->inputs[i].regid = regid;
25308b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
25318b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
2532adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark	if (ctx->astc_srgb)
2533adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark		fixup_astc_srgb(ctx);
2534adf795432f788b33822d3a94b704be4ca536c8f1Rob Clark
253557fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	/* We need to do legalize after (for frag shader's) the "bary.f"
253657fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	 * offsets (inloc) have been assigned.
25378b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	 */
253857fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	ir3_legalize(ir, &so->has_samp, &max_bary);
253957fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark
254057fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
254157fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark		printf("AFTER LEGALIZE:\n");
254257fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark		ir3_print(ir);
254357fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	}
254457fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark
254557fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark	/* Note that actual_in counts inputs that are not bary.f'd for FS: */
25468b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (so->type == SHADER_VERTEX)
25478b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		so->total_in = actual_in;
25488b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	else
254957fc0dd8d5610a0a25cece53b172b0c992421db0Rob Clark		so->total_in = max_bary + 1;
25508b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
25518b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clarkout:
25528b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	if (ret) {
25531ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2Rob Clark		if (so->ir)
25541ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2Rob Clark			ir3_destroy(so->ir);
25558b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark		so->ir = NULL;
25568b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	}
25578b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	compile_free(ctx);
25588b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark
25598b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark	return ret;
25608b0b81339b9c3806981ee2ec7c08501bbd8bb7a3Rob Clark}
2561