1/*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "ac_nir_to_llvm.h"
25#include "ac_llvm_util.h"
26#include "ac_binary.h"
27#include "sid.h"
28#include "nir/nir.h"
29#include "../vulkan/radv_descriptor_set.h"
30#include "util/bitscan.h"
31#include <llvm-c/Transforms/Scalar.h>
32
33enum radeon_llvm_calling_convention {
34	RADEON_LLVM_AMDGPU_VS = 87,
35	RADEON_LLVM_AMDGPU_GS = 88,
36	RADEON_LLVM_AMDGPU_PS = 89,
37	RADEON_LLVM_AMDGPU_CS = 90,
38};
39
40#define CONST_ADDR_SPACE 2
41#define LOCAL_ADDR_SPACE 3
42
43#define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
44#define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
45
46enum desc_type {
47	DESC_IMAGE,
48	DESC_FMASK,
49	DESC_SAMPLER,
50	DESC_BUFFER,
51};
52
53struct nir_to_llvm_context {
54	struct ac_llvm_context ac;
55	const struct ac_nir_compiler_options *options;
56	struct ac_shader_variant_info *shader_info;
57
58	LLVMContextRef context;
59	LLVMModuleRef module;
60	LLVMBuilderRef builder;
61	LLVMValueRef main_function;
62
63	struct hash_table *defs;
64	struct hash_table *phis;
65
66	LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
67	LLVMValueRef push_constants;
68	LLVMValueRef num_work_groups;
69	LLVMValueRef workgroup_ids;
70	LLVMValueRef local_invocation_ids;
71	LLVMValueRef tg_size;
72
73	LLVMValueRef vertex_buffers;
74	LLVMValueRef base_vertex;
75	LLVMValueRef start_instance;
76	LLVMValueRef vertex_id;
77	LLVMValueRef rel_auto_id;
78	LLVMValueRef vs_prim_id;
79	LLVMValueRef instance_id;
80
81	LLVMValueRef prim_mask;
82	LLVMValueRef sample_positions;
83	LLVMValueRef persp_sample, persp_center, persp_centroid;
84	LLVMValueRef linear_sample, linear_center, linear_centroid;
85	LLVMValueRef front_face;
86	LLVMValueRef ancillary;
87	LLVMValueRef frag_pos[4];
88
89	LLVMBasicBlockRef continue_block;
90	LLVMBasicBlockRef break_block;
91
92	LLVMTypeRef i1;
93	LLVMTypeRef i8;
94	LLVMTypeRef i16;
95	LLVMTypeRef i32;
96	LLVMTypeRef i64;
97	LLVMTypeRef v2i32;
98	LLVMTypeRef v3i32;
99	LLVMTypeRef v4i32;
100	LLVMTypeRef v8i32;
101	LLVMTypeRef f32;
102	LLVMTypeRef f16;
103	LLVMTypeRef v2f32;
104	LLVMTypeRef v4f32;
105	LLVMTypeRef v16i8;
106	LLVMTypeRef voidt;
107
108	LLVMValueRef i32zero;
109	LLVMValueRef i32one;
110	LLVMValueRef f32zero;
111	LLVMValueRef f32one;
112	LLVMValueRef v4f32empty;
113
114	unsigned range_md_kind;
115	unsigned uniform_md_kind;
116	unsigned invariant_load_md_kind;
117	LLVMValueRef empty_md;
118	gl_shader_stage stage;
119
120	LLVMValueRef lds;
121	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
122	LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
123
124	LLVMValueRef shared_memory;
125	uint64_t input_mask;
126	uint64_t output_mask;
127	int num_locals;
128	LLVMValueRef *locals;
129	bool has_ddxy;
130	unsigned num_clips;
131	unsigned num_culls;
132
133	bool has_ds_bpermute;
134};
135
136struct ac_tex_info {
137	LLVMValueRef args[12];
138	int arg_count;
139	LLVMTypeRef dst_type;
140	bool has_offset;
141};
142
143static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
144				     nir_deref_var *deref,
145				     enum desc_type desc_type);
146static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
147{
148	return (index * 4) + chan;
149}
150
151static unsigned llvm_get_type_size(LLVMTypeRef type)
152{
153	LLVMTypeKind kind = LLVMGetTypeKind(type);
154
155	switch (kind) {
156	case LLVMIntegerTypeKind:
157		return LLVMGetIntTypeWidth(type) / 8;
158	case LLVMFloatTypeKind:
159		return 4;
160	case LLVMPointerTypeKind:
161		return 8;
162	case LLVMVectorTypeKind:
163		return LLVMGetVectorSize(type) *
164		       llvm_get_type_size(LLVMGetElementType(type));
165	default:
166		assert(0);
167		return 0;
168	}
169}
170
171static void set_llvm_calling_convention(LLVMValueRef func,
172                                        gl_shader_stage stage)
173{
174	enum radeon_llvm_calling_convention calling_conv;
175
176	switch (stage) {
177	case MESA_SHADER_VERTEX:
178	case MESA_SHADER_TESS_CTRL:
179	case MESA_SHADER_TESS_EVAL:
180		calling_conv = RADEON_LLVM_AMDGPU_VS;
181		break;
182	case MESA_SHADER_GEOMETRY:
183		calling_conv = RADEON_LLVM_AMDGPU_GS;
184		break;
185	case MESA_SHADER_FRAGMENT:
186		calling_conv = RADEON_LLVM_AMDGPU_PS;
187		break;
188	case MESA_SHADER_COMPUTE:
189		calling_conv = RADEON_LLVM_AMDGPU_CS;
190		break;
191	default:
192		unreachable("Unhandle shader type");
193	}
194
195	LLVMSetFunctionCallConv(func, calling_conv);
196}
197
198static LLVMValueRef
199create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
200                     LLVMBuilderRef builder, LLVMTypeRef *return_types,
201                     unsigned num_return_elems, LLVMTypeRef *param_types,
202                     unsigned param_count, unsigned array_params_mask,
203                     unsigned sgpr_params, bool unsafe_math)
204{
205	LLVMTypeRef main_function_type, ret_type;
206	LLVMBasicBlockRef main_function_body;
207
208	if (num_return_elems)
209		ret_type = LLVMStructTypeInContext(ctx, return_types,
210		                                   num_return_elems, true);
211	else
212		ret_type = LLVMVoidTypeInContext(ctx);
213
214	/* Setup the function */
215	main_function_type =
216	    LLVMFunctionType(ret_type, param_types, param_count, 0);
217	LLVMValueRef main_function =
218	    LLVMAddFunction(module, "main", main_function_type);
219	main_function_body =
220	    LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
221	LLVMPositionBuilderAtEnd(builder, main_function_body);
222
223	LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
224	for (unsigned i = 0; i < sgpr_params; ++i) {
225		if (array_params_mask & (1 << i)) {
226			LLVMValueRef P = LLVMGetParam(main_function, i);
227			ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL);
228			ac_add_attr_dereferenceable(P, UINT64_MAX);
229		}
230		else {
231			ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG);
232		}
233	}
234
235	if (unsafe_math) {
236		/* These were copied from some LLVM test. */
237		LLVMAddTargetDependentFunctionAttr(main_function,
238						   "less-precise-fpmad",
239						   "true");
240		LLVMAddTargetDependentFunctionAttr(main_function,
241						   "no-infs-fp-math",
242						   "true");
243		LLVMAddTargetDependentFunctionAttr(main_function,
244						   "no-nans-fp-math",
245						   "true");
246		LLVMAddTargetDependentFunctionAttr(main_function,
247						   "unsafe-fp-math",
248						   "true");
249	}
250	return main_function;
251}
252
253static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
254{
255	return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
256	                       CONST_ADDR_SPACE);
257}
258
259static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
260					  int idx,
261					  LLVMTypeRef type)
262{
263	LLVMValueRef offset;
264	LLVMValueRef ptr;
265	int addr_space;
266
267	offset = LLVMConstInt(ctx->i32, idx * 16, false);
268
269	ptr = ctx->shared_memory;
270	ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
271	addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
272	ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
273	return ptr;
274}
275
276static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v)
277{
278	LLVMTypeRef type = LLVMTypeOf(v);
279	if (type == ctx->f32) {
280		return LLVMBuildBitCast(ctx->builder, v, ctx->i32, "");
281	} else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
282		LLVMTypeRef elem_type = LLVMGetElementType(type);
283		if (elem_type == ctx->f32) {
284			LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type));
285			return LLVMBuildBitCast(ctx->builder, v, nt, "");
286		}
287	}
288	return v;
289}
290
291static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
292{
293	LLVMTypeRef type = LLVMTypeOf(v);
294	if (type == ctx->i32) {
295		return LLVMBuildBitCast(ctx->builder, v, ctx->f32, "");
296	} else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
297		LLVMTypeRef elem_type = LLVMGetElementType(type);
298		if (elem_type == ctx->i32) {
299			LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
300			return LLVMBuildBitCast(ctx->builder, v, nt, "");
301		}
302	}
303	return v;
304}
305
306static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
307				 LLVMValueRef param, unsigned rshift,
308				 unsigned bitwidth)
309{
310	LLVMValueRef value = param;
311	if (rshift)
312		value = LLVMBuildLShr(ctx->builder, value,
313				      LLVMConstInt(ctx->i32, rshift, false), "");
314
315	if (rshift + bitwidth < 32) {
316		unsigned mask = (1 << bitwidth) - 1;
317		value = LLVMBuildAnd(ctx->builder, value,
318				     LLVMConstInt(ctx->i32, mask, false), "");
319	}
320	return value;
321}
322
323static LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx,
324			       LLVMValueRef base_ptr, LLVMValueRef index)
325{
326	LLVMValueRef indices[2] = {
327		ctx->i32zero,
328		index,
329	};
330	return LLVMBuildGEP(ctx->builder, base_ptr,
331			    indices, 2, "");
332}
333
334static LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx,
335				       LLVMValueRef base_ptr, LLVMValueRef index,
336				       bool uniform)
337{
338	LLVMValueRef pointer;
339	pointer = build_gep0(ctx, base_ptr, index);
340	if (uniform)
341		LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
342	return LLVMBuildLoad(ctx->builder, pointer, "");
343}
344
345static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx,
346					     LLVMValueRef base_ptr, LLVMValueRef index)
347{
348	LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
349	LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
350	return result;
351}
352
353static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs)
354{
355	ud_info->sgpr_idx = sgpr_idx;
356	ud_info->num_sgprs = num_sgprs;
357	ud_info->indirect = false;
358	ud_info->indirect_offset = 0;
359}
360
361static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
362					 int idx, uint8_t sgpr_idx, uint8_t num_sgprs)
363{
364	set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
365}
366
367#if 0
368static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
369					   uint32_t indirect_offset)
370{
371	ud_info->sgpr_idx = sgpr_idx;
372	ud_info->num_sgprs = num_sgprs;
373	ud_info->indirect = true;
374	ud_info->indirect_offset = indirect_offset;
375}
376#endif
377
378static void create_function(struct nir_to_llvm_context *ctx)
379{
380	LLVMTypeRef arg_types[23];
381	unsigned arg_idx = 0;
382	unsigned array_params_mask = 0;
383	unsigned sgpr_count = 0, user_sgpr_count;
384	unsigned i;
385	unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
386	unsigned user_sgpr_idx;
387	bool need_push_constants;
388
389	need_push_constants = true;
390	if (!ctx->options->layout)
391		need_push_constants = false;
392	else if (!ctx->options->layout->push_constant_size &&
393		 !ctx->options->layout->dynamic_offset_count)
394		need_push_constants = false;
395
396	/* 1 for each descriptor set */
397	for (unsigned i = 0; i < num_sets; ++i) {
398		if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
399			array_params_mask |= (1 << arg_idx);
400			arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
401		}
402	}
403
404	if (need_push_constants) {
405		/* 1 for push constants and dynamic descriptors */
406		array_params_mask |= (1 << arg_idx);
407		arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
408	}
409
410	switch (ctx->stage) {
411	case MESA_SHADER_COMPUTE:
412		arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
413		user_sgpr_count = arg_idx;
414		arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
415		arg_types[arg_idx++] = ctx->i32;
416		sgpr_count = arg_idx;
417
418		arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
419		break;
420	case MESA_SHADER_VERTEX:
421		arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */
422		arg_types[arg_idx++] = ctx->i32; // base vertex
423		arg_types[arg_idx++] = ctx->i32; // start instance
424		user_sgpr_count = sgpr_count = arg_idx;
425		arg_types[arg_idx++] = ctx->i32; // vertex id
426		arg_types[arg_idx++] = ctx->i32; // rel auto id
427		arg_types[arg_idx++] = ctx->i32; // vs prim id
428		arg_types[arg_idx++] = ctx->i32; // instance id
429		break;
430	case MESA_SHADER_FRAGMENT:
431		arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
432		user_sgpr_count = arg_idx;
433		arg_types[arg_idx++] = ctx->i32; /* prim mask */
434		sgpr_count = arg_idx;
435		arg_types[arg_idx++] = ctx->v2i32; /* persp sample */
436		arg_types[arg_idx++] = ctx->v2i32; /* persp center */
437		arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */
438		arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */
439		arg_types[arg_idx++] = ctx->v2i32; /* linear sample */
440		arg_types[arg_idx++] = ctx->v2i32; /* linear center */
441		arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */
442		arg_types[arg_idx++] = ctx->f32;  /* line stipple tex */
443		arg_types[arg_idx++] = ctx->f32;  /* pos x float */
444		arg_types[arg_idx++] = ctx->f32;  /* pos y float */
445		arg_types[arg_idx++] = ctx->f32;  /* pos z float */
446		arg_types[arg_idx++] = ctx->f32;  /* pos w float */
447		arg_types[arg_idx++] = ctx->i32;  /* front face */
448		arg_types[arg_idx++] = ctx->i32;  /* ancillary */
449		arg_types[arg_idx++] = ctx->f32;  /* sample coverage */
450		arg_types[arg_idx++] = ctx->i32;  /* fixed pt */
451		break;
452	default:
453		unreachable("Shader stage not implemented");
454	}
455
456	ctx->main_function = create_llvm_function(
457	    ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
458	    arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
459	set_llvm_calling_convention(ctx->main_function, ctx->stage);
460
461
462	ctx->shader_info->num_input_sgprs = 0;
463	ctx->shader_info->num_input_vgprs = 0;
464
465	for (i = 0; i < user_sgpr_count; i++)
466		ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
467
468	ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs;
469	for (; i < sgpr_count; i++)
470		ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4;
471
472	if (ctx->stage != MESA_SHADER_FRAGMENT)
473		for (; i < arg_idx; ++i)
474			ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
475
476	arg_idx = 0;
477	user_sgpr_idx = 0;
478	for (unsigned i = 0; i < num_sets; ++i) {
479		if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
480			set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
481			user_sgpr_idx += 2;
482			ctx->descriptor_sets[i] =
483				LLVMGetParam(ctx->main_function, arg_idx++);
484		} else
485			ctx->descriptor_sets[i] = NULL;
486	}
487
488	if (need_push_constants) {
489		ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
490		set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
491		user_sgpr_idx += 2;
492	}
493
494	switch (ctx->stage) {
495	case MESA_SHADER_COMPUTE:
496		set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
497		user_sgpr_idx += 3;
498		ctx->num_work_groups =
499		    LLVMGetParam(ctx->main_function, arg_idx++);
500		ctx->workgroup_ids =
501		    LLVMGetParam(ctx->main_function, arg_idx++);
502		ctx->tg_size =
503		    LLVMGetParam(ctx->main_function, arg_idx++);
504		ctx->local_invocation_ids =
505		    LLVMGetParam(ctx->main_function, arg_idx++);
506		break;
507	case MESA_SHADER_VERTEX:
508		set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
509		user_sgpr_idx += 2;
510		ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
511		set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 2);
512		user_sgpr_idx += 2;
513		ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
514		ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
515		ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
516		ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++);
517		ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
518		ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
519		break;
520	case MESA_SHADER_FRAGMENT:
521		set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2);
522		user_sgpr_idx += 2;
523		ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
524		ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
525		ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
526		ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
527		ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
528		arg_idx++;
529		ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++);
530		ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++);
531		ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
532		arg_idx++; /* line stipple */
533		ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++);
534		ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++);
535		ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++);
536		ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++);
537		ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++);
538		ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++);
539		break;
540	default:
541		unreachable("Shader stage not implemented");
542	}
543}
544
545static void setup_types(struct nir_to_llvm_context *ctx)
546{
547	LLVMValueRef args[4];
548
549	ctx->voidt = LLVMVoidTypeInContext(ctx->context);
550	ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
551	ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
552	ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
553	ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
554	ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
555	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
556	ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
557	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
558	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
559	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
560	ctx->f16 = LLVMHalfTypeInContext(ctx->context);
561	ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
562	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
563	ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
564
565	ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
566	ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
567	ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
568	ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
569
570	args[0] = ctx->f32zero;
571	args[1] = ctx->f32zero;
572	args[2] = ctx->f32zero;
573	args[3] = ctx->f32one;
574	ctx->v4f32empty = LLVMConstVector(args, 4);
575
576	ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
577						      "range", 5);
578	ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
579							       "invariant.load", 14);
580	ctx->uniform_md_kind =
581	    LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
582	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
583
584	args[0] = LLVMConstReal(ctx->f32, 2.5);
585}
586
587static int get_llvm_num_components(LLVMValueRef value)
588{
589	LLVMTypeRef type = LLVMTypeOf(value);
590	unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
591	                              ? LLVMGetVectorSize(type)
592	                              : 1;
593	return num_components;
594}
595
596static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
597				      LLVMValueRef value,
598				      int index)
599{
600	int count = get_llvm_num_components(value);
601
602	assert(index < count);
603	if (count == 1)
604		return value;
605
606	return LLVMBuildExtractElement(ctx->builder, value,
607				       LLVMConstInt(ctx->i32, index, false), "");
608}
609
610static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
611                                LLVMValueRef value, unsigned count)
612{
613	unsigned num_components = get_llvm_num_components(value);
614	if (count == num_components)
615		return value;
616
617	LLVMValueRef masks[] = {
618	    LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
619	    LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
620
621	if (count == 1)
622		return LLVMBuildExtractElement(ctx->builder, value, masks[0],
623		                               "");
624
625	LLVMValueRef swizzle = LLVMConstVector(masks, count);
626	return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
627}
628
629static void
630build_store_values_extended(struct nir_to_llvm_context *ctx,
631			     LLVMValueRef *values,
632			     unsigned value_count,
633			     unsigned value_stride,
634			     LLVMValueRef vec)
635{
636	LLVMBuilderRef builder = ctx->builder;
637	unsigned i;
638
639	if (value_count == 1) {
640		LLVMBuildStore(builder, vec, values[0]);
641		return;
642	}
643
644	for (i = 0; i < value_count; i++) {
645		LLVMValueRef ptr = values[i * value_stride];
646		LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
647		LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
648		LLVMBuildStore(builder, value, ptr);
649	}
650}
651
652static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
653                                nir_ssa_def *def)
654{
655	LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
656	if (def->num_components > 1) {
657		type = LLVMVectorType(type, def->num_components);
658	}
659	return type;
660}
661
662static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
663{
664	assert(src.is_ssa);
665	struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
666	return (LLVMValueRef)entry->data;
667}
668
669
670static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
671                                   struct nir_block *b)
672{
673	struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
674	return (LLVMBasicBlockRef)entry->data;
675}
676
677static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
678                                nir_alu_src src,
679                                unsigned num_components)
680{
681	LLVMValueRef value = get_src(ctx, src.src);
682	bool need_swizzle = false;
683
684	assert(value);
685	LLVMTypeRef type = LLVMTypeOf(value);
686	unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
687	                              ? LLVMGetVectorSize(type)
688	                              : 1;
689
690	for (unsigned i = 0; i < num_components; ++i) {
691		assert(src.swizzle[i] < src_components);
692		if (src.swizzle[i] != i)
693			need_swizzle = true;
694	}
695
696	if (need_swizzle || num_components != src_components) {
697		LLVMValueRef masks[] = {
698		    LLVMConstInt(ctx->i32, src.swizzle[0], false),
699		    LLVMConstInt(ctx->i32, src.swizzle[1], false),
700		    LLVMConstInt(ctx->i32, src.swizzle[2], false),
701		    LLVMConstInt(ctx->i32, src.swizzle[3], false)};
702
703		if (src_components > 1 && num_components == 1) {
704			value = LLVMBuildExtractElement(ctx->builder, value,
705			                                masks[0], "");
706		} else if (src_components == 1 && num_components > 1) {
707			LLVMValueRef values[] = {value, value, value, value};
708			value = ac_build_gather_values(&ctx->ac, values, num_components);
709		} else {
710			LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
711			value = LLVMBuildShuffleVector(ctx->builder, value, value,
712		                                       swizzle, "");
713		}
714	}
715	assert(!src.negate);
716	assert(!src.abs);
717	return value;
718}
719
720static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx,
721                                 LLVMIntPredicate pred, LLVMValueRef src0,
722                                 LLVMValueRef src1)
723{
724	LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
725	return LLVMBuildSelect(ctx->builder, result,
726	                       LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
727	                       LLVMConstInt(ctx->i32, 0, false), "");
728}
729
730static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx,
731                                   LLVMRealPredicate pred, LLVMValueRef src0,
732                                   LLVMValueRef src1)
733{
734	LLVMValueRef result;
735	src0 = to_float(ctx, src0);
736	src1 = to_float(ctx, src1);
737	result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
738	return LLVMBuildSelect(ctx->builder, result,
739	                       LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
740	                       LLVMConstInt(ctx->i32, 0, false), "");
741}
742
743static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx,
744					 const char *intrin,
745					 LLVMValueRef src0)
746{
747	LLVMValueRef params[] = {
748		to_float(ctx, src0),
749	};
750	return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE);
751}
752
753static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx,
754				       const char *intrin,
755				       LLVMValueRef src0, LLVMValueRef src1)
756{
757	LLVMValueRef params[] = {
758		to_float(ctx, src0),
759		to_float(ctx, src1),
760	};
761	return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 2, AC_FUNC_ATTR_READNONE);
762}
763
764static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx,
765					 const char *intrin,
766					 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
767{
768	LLVMValueRef params[] = {
769		to_float(ctx, src0),
770		to_float(ctx, src1),
771		to_float(ctx, src2),
772	};
773	return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 3, AC_FUNC_ATTR_READNONE);
774}
775
776static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx,
777			       LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
778{
779	LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
780				       ctx->i32zero, "");
781	return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
782}
783
784static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx,
785				  LLVMValueRef src0)
786{
787	LLVMValueRef params[2] = {
788		src0,
789
790		/* The value of 1 means that ffs(x=0) = undef, so LLVM won't
791		 * add special code to check for x=0. The reason is that
792		 * the LLVM behavior for x=0 is different from what we
793		 * need here.
794		 *
795		 * The hardware already implements the correct behavior.
796		 */
797		LLVMConstInt(ctx->i32, 1, false),
798	};
799	return ac_emit_llvm_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
800}
801
802static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx,
803				   LLVMValueRef src0)
804{
805	LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.flbit.i32",
806					       ctx->i32, &src0, 1,
807					       AC_FUNC_ATTR_READNONE);
808
809	/* The HW returns the last bit index from MSB, but NIR wants
810	 * the index from LSB. Invert it by doing "31 - msb". */
811	msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
812			   msb, "");
813
814	LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
815	LLVMValueRef cond = LLVMBuildOr(ctx->builder,
816					LLVMBuildICmp(ctx->builder, LLVMIntEQ,
817						      src0, ctx->i32zero, ""),
818					LLVMBuildICmp(ctx->builder, LLVMIntEQ,
819						      src0, all_ones, ""), "");
820
821	return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
822}
823
824static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
825				   LLVMValueRef src0)
826{
827	LLVMValueRef args[2] = {
828		src0,
829		ctx->i32one,
830	};
831	LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctlz.i32",
832					       ctx->i32, args, ARRAY_SIZE(args),
833					       AC_FUNC_ATTR_READNONE);
834
835	/* The HW returns the last bit index from MSB, but NIR wants
836	 * the index from LSB. Invert it by doing "31 - msb". */
837	msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
838			   msb, "");
839
840	return LLVMBuildSelect(ctx->builder,
841			       LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0,
842					     ctx->i32zero, ""),
843			       LLVMConstInt(ctx->i32, -1, true), msb, "");
844}
845
846static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
847				    LLVMIntPredicate pred,
848				    LLVMValueRef src0, LLVMValueRef src1)
849{
850	return LLVMBuildSelect(ctx->builder,
851			       LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
852			       src0,
853			       src1, "");
854
855}
856static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx,
857			      LLVMValueRef src0)
858{
859	return emit_minmax_int(ctx, LLVMIntSGT, src0,
860			       LLVMBuildNeg(ctx->builder, src0, ""));
861}
862
863static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx,
864			       LLVMValueRef src0)
865{
866	LLVMValueRef cmp, val;
867
868	cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, "");
869	val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, "");
870	cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, "");
871	val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
872	return val;
873}
874
875static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx,
876			       LLVMValueRef src0)
877{
878	LLVMValueRef cmp, val;
879
880	cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, "");
881	val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, "");
882	cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, "");
883	val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
884	return val;
885}
886
887static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx,
888				LLVMValueRef src0)
889{
890	const char *intr = "llvm.floor.f32";
891	LLVMValueRef fsrc0 = to_float(ctx, src0);
892	LLVMValueRef params[] = {
893		fsrc0,
894	};
895	LLVMValueRef floor = ac_emit_llvm_intrinsic(&ctx->ac, intr,
896						 ctx->f32, params, 1,
897						 AC_FUNC_ATTR_READNONE);
898	return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
899}
900
901static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx,
902				    const char *intrin,
903				    LLVMValueRef src0, LLVMValueRef src1)
904{
905	LLVMTypeRef ret_type;
906	LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
907	LLVMValueRef res;
908	LLVMValueRef params[] = { src0, src1 };
909	ret_type = LLVMStructTypeInContext(ctx->context, types,
910					   2, true);
911
912	res = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ret_type,
913				  params, 2, AC_FUNC_ATTR_READNONE);
914
915	res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
916	res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
917	return res;
918}
919
920static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
921			     LLVMValueRef src0)
922{
923	return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
924}
925
926static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
927				   LLVMValueRef src0, LLVMValueRef src1)
928{
929	LLVMValueRef dst64, result;
930	src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
931	src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
932
933	dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
934	dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
935	result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
936	return result;
937}
938
939static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
940				   LLVMValueRef src0, LLVMValueRef src1)
941{
942	LLVMValueRef dst64, result;
943	src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
944	src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
945
946	dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
947	dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
948	result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
949	return result;
950}
951
952static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
953					  const char *intrin,
954					  LLVMValueRef srcs[3])
955{
956	LLVMValueRef result;
957	LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
958	result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
959
960	result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
961	return result;
962}
963
964static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
965					 LLVMValueRef src0, LLVMValueRef src1,
966					 LLVMValueRef src2, LLVMValueRef src3)
967{
968	LLVMValueRef bfi_args[3], result;
969
970	bfi_args[0] = LLVMBuildShl(ctx->builder,
971				   LLVMBuildSub(ctx->builder,
972						LLVMBuildShl(ctx->builder,
973							     ctx->i32one,
974							     src3, ""),
975						ctx->i32one, ""),
976				   src2, "");
977	bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
978	bfi_args[2] = src0;
979
980	LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
981
982	/* Calculate:
983	 *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
984	 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
985	 */
986	result = LLVMBuildXor(ctx->builder, bfi_args[2],
987			      LLVMBuildAnd(ctx->builder, bfi_args[0],
988					   LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
989
990	result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
991	return result;
992}
993
994static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx,
995					LLVMValueRef src0)
996{
997	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
998	int i;
999	LLVMValueRef comp[2];
1000
1001	src0 = to_float(ctx, src0);
1002	comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "");
1003	comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "");
1004	for (i = 0; i < 2; i++) {
1005		comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1006		comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1007		comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1008	}
1009
1010	comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1011	comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1012
1013	return comp[0];
1014}
1015
1016static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx,
1017					  LLVMValueRef src0)
1018{
1019	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1020	LLVMValueRef temps[2], result, val;
1021	int i;
1022
1023	for (i = 0; i < 2; i++) {
1024		val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1025		val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1026		val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1027		temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1028	}
1029
1030	result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1031					ctx->i32zero, "");
1032	result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1033					ctx->i32one, "");
1034	return result;
1035}
1036
1037/**
1038 * Set range metadata on an instruction.  This can only be used on load and
1039 * call instructions.  If you know an instruction can only produce the values
1040 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
1041 * \p lo is the minimum value inclusive.
1042 * \p hi is the maximum value exclusive.
1043 */
1044static void set_range_metadata(struct nir_to_llvm_context *ctx,
1045			       LLVMValueRef value, unsigned lo, unsigned hi)
1046{
1047	LLVMValueRef range_md, md_args[2];
1048	LLVMTypeRef type = LLVMTypeOf(value);
1049	LLVMContextRef context = LLVMGetTypeContext(type);
1050
1051	md_args[0] = LLVMConstInt(type, lo, false);
1052	md_args[1] = LLVMConstInt(type, hi, false);
1053	range_md = LLVMMDNodeInContext(context, md_args, 2);
1054	LLVMSetMetadata(value, ctx->range_md_kind, range_md);
1055}
1056
1057static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
1058{
1059	LLVMValueRef tid;
1060	LLVMValueRef tid_args[2];
1061	tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
1062	tid_args[1] = ctx->i32zero;
1063	tid_args[1] = ac_emit_llvm_intrinsic(&ctx->ac,
1064					  "llvm.amdgcn.mbcnt.lo", ctx->i32,
1065					  tid_args, 2, AC_FUNC_ATTR_READNONE);
1066
1067	tid = ac_emit_llvm_intrinsic(&ctx->ac,
1068				  "llvm.amdgcn.mbcnt.hi", ctx->i32,
1069				  tid_args, 2, AC_FUNC_ATTR_READNONE);
1070	set_range_metadata(ctx, tid, 0, 64);
1071	return tid;
1072}
1073
1074/*
1075 * SI implements derivatives using the local data store (LDS)
1076 * All writes to the LDS happen in all executing threads at
1077 * the same time. TID is the Thread ID for the current
1078 * thread and is a value between 0 and 63, representing
1079 * the thread's position in the wavefront.
1080 *
1081 * For the pixel shader threads are grouped into quads of four pixels.
1082 * The TIDs of the pixels of a quad are:
1083 *
1084 *  +------+------+
1085 *  |4n + 0|4n + 1|
1086 *  +------+------+
1087 *  |4n + 2|4n + 3|
1088 *  +------+------+
1089 *
1090 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
1091 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
1092 * the current pixel's column, and masking with 0xfffffffe yields the TID
1093 * of the left pixel of the current pixel's row.
1094 *
1095 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
1096 * adding 2 yields the TID of the pixel below the top pixel.
1097 */
1098/* masks for thread ID. */
1099#define TID_MASK_TOP_LEFT 0xfffffffc
1100#define TID_MASK_TOP      0xfffffffd
1101#define TID_MASK_LEFT     0xfffffffe
1102static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1103			      nir_op op,
1104			      LLVMValueRef src0)
1105{
1106	LLVMValueRef tl, trbl, result;
1107	LLVMValueRef tl_tid, trbl_tid;
1108	LLVMValueRef args[2];
1109	LLVMValueRef thread_id;
1110	unsigned mask;
1111	int idx;
1112	ctx->has_ddxy = true;
1113
1114	if (!ctx->lds && !ctx->has_ds_bpermute)
1115		ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1116						       LLVMArrayType(ctx->i32, 64),
1117						       "ddxy_lds", LOCAL_ADDR_SPACE);
1118
1119	thread_id = get_thread_id(ctx);
1120	if (op == nir_op_fddx_fine || op == nir_op_fddx)
1121		mask = TID_MASK_LEFT;
1122	else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1123		mask = TID_MASK_TOP;
1124	else
1125		mask = TID_MASK_TOP_LEFT;
1126
1127	tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
1128			      LLVMConstInt(ctx->i32, mask, false), "");
1129	/* for DDX we want to next X pixel, DDY next Y pixel. */
1130	if (op == nir_op_fddx_fine ||
1131	    op == nir_op_fddx_coarse ||
1132	    op == nir_op_fddx)
1133		idx = 1;
1134	else
1135		idx = 2;
1136
1137	trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
1138				LLVMConstInt(ctx->i32, idx, false), "");
1139
1140	if (ctx->has_ds_bpermute) {
1141		args[0] = LLVMBuildMul(ctx->builder, tl_tid,
1142				       LLVMConstInt(ctx->i32, 4, false), "");
1143		args[1] = src0;
1144		tl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1145					 ctx->i32, args, 2,
1146					 AC_FUNC_ATTR_READNONE);
1147
1148		args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
1149				       LLVMConstInt(ctx->i32, 4, false), "");
1150		trbl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1151					   ctx->i32, args, 2,
1152					   AC_FUNC_ATTR_READNONE);
1153	} else {
1154		LLVMValueRef store_ptr, load_ptr0, load_ptr1;
1155
1156		store_ptr = build_gep0(ctx, ctx->lds, thread_id);
1157		load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
1158		load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
1159
1160		LLVMBuildStore(ctx->builder, src0, store_ptr);
1161		tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
1162		trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
1163	}
1164	tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
1165	trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
1166	result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
1167	return result;
1168}
1169
1170/*
1171 * this takes an I,J coordinate pair,
1172 * and works out the X and Y derivatives.
1173 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1174 */
1175static LLVMValueRef emit_ddxy_interp(
1176	struct nir_to_llvm_context *ctx,
1177	LLVMValueRef interp_ij)
1178{
1179	LLVMValueRef result[4], a;
1180	unsigned i;
1181
1182	for (i = 0; i < 2; i++) {
1183		a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1184					    LLVMConstInt(ctx->i32, i, false), "");
1185		result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1186		result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1187	}
1188	return ac_build_gather_values(&ctx->ac, result, 4);
1189}
1190
1191static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
1192{
1193	LLVMValueRef src[4], result = NULL;
1194	unsigned num_components = instr->dest.dest.ssa.num_components;
1195	unsigned src_components;
1196
1197	assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1198	switch (instr->op) {
1199	case nir_op_vec2:
1200	case nir_op_vec3:
1201	case nir_op_vec4:
1202		src_components = 1;
1203		break;
1204	case nir_op_pack_half_2x16:
1205		src_components = 2;
1206		break;
1207	case nir_op_unpack_half_2x16:
1208		src_components = 1;
1209		break;
1210	default:
1211		src_components = num_components;
1212		break;
1213	}
1214	for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1215		src[i] = get_alu_src(ctx, instr->src[i], src_components);
1216
1217	switch (instr->op) {
1218	case nir_op_fmov:
1219	case nir_op_imov:
1220		result = src[0];
1221		break;
1222	case nir_op_fneg:
1223	        src[0] = to_float(ctx, src[0]);
1224		result = LLVMBuildFNeg(ctx->builder, src[0], "");
1225		break;
1226	case nir_op_ineg:
1227		result = LLVMBuildNeg(ctx->builder, src[0], "");
1228		break;
1229	case nir_op_inot:
1230		result = LLVMBuildNot(ctx->builder, src[0], "");
1231		break;
1232	case nir_op_iadd:
1233		result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
1234		break;
1235	case nir_op_fadd:
1236		src[0] = to_float(ctx, src[0]);
1237		src[1] = to_float(ctx, src[1]);
1238		result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
1239		break;
1240	case nir_op_fsub:
1241		src[0] = to_float(ctx, src[0]);
1242		src[1] = to_float(ctx, src[1]);
1243		result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
1244		break;
1245	case nir_op_isub:
1246		result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
1247		break;
1248	case nir_op_imul:
1249		result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
1250		break;
1251	case nir_op_imod:
1252		result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1253		break;
1254	case nir_op_umod:
1255		result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
1256		break;
1257	case nir_op_fmod:
1258		src[0] = to_float(ctx, src[0]);
1259		src[1] = to_float(ctx, src[1]);
1260		result = ac_emit_fdiv(&ctx->ac, src[0], src[1]);
1261		result = emit_intrin_1f_param(ctx, "llvm.floor.f32", result);
1262		result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
1263		result = LLVMBuildFSub(ctx->builder, src[0], result, "");
1264		break;
1265	case nir_op_frem:
1266		src[0] = to_float(ctx, src[0]);
1267		src[1] = to_float(ctx, src[1]);
1268		result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
1269		break;
1270	case nir_op_irem:
1271		result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1272		break;
1273	case nir_op_idiv:
1274		result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
1275		break;
1276	case nir_op_udiv:
1277		result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
1278		break;
1279	case nir_op_fmul:
1280		src[0] = to_float(ctx, src[0]);
1281		src[1] = to_float(ctx, src[1]);
1282		result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
1283		break;
1284	case nir_op_fdiv:
1285		src[0] = to_float(ctx, src[0]);
1286		src[1] = to_float(ctx, src[1]);
1287		result = ac_emit_fdiv(&ctx->ac, src[0], src[1]);
1288		break;
1289	case nir_op_frcp:
1290		src[0] = to_float(ctx, src[0]);
1291		result = ac_emit_fdiv(&ctx->ac, ctx->f32one, src[0]);
1292		break;
1293	case nir_op_iand:
1294		result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
1295		break;
1296	case nir_op_ior:
1297		result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
1298		break;
1299	case nir_op_ixor:
1300		result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
1301		break;
1302	case nir_op_ishl:
1303		result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
1304		break;
1305	case nir_op_ishr:
1306		result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
1307		break;
1308	case nir_op_ushr:
1309		result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
1310		break;
1311	case nir_op_ilt:
1312		result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]);
1313		break;
1314	case nir_op_ine:
1315		result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]);
1316		break;
1317	case nir_op_ieq:
1318		result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]);
1319		break;
1320	case nir_op_ige:
1321		result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]);
1322		break;
1323	case nir_op_ult:
1324		result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]);
1325		break;
1326	case nir_op_uge:
1327		result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]);
1328		break;
1329	case nir_op_feq:
1330		result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]);
1331		break;
1332	case nir_op_fne:
1333		result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]);
1334		break;
1335	case nir_op_flt:
1336		result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]);
1337		break;
1338	case nir_op_fge:
1339		result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]);
1340		break;
1341	case nir_op_fabs:
1342		result = emit_intrin_1f_param(ctx, "llvm.fabs.f32", src[0]);
1343		break;
1344	case nir_op_iabs:
1345		result = emit_iabs(ctx, src[0]);
1346		break;
1347	case nir_op_imax:
1348		result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]);
1349		break;
1350	case nir_op_imin:
1351		result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]);
1352		break;
1353	case nir_op_umax:
1354		result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]);
1355		break;
1356	case nir_op_umin:
1357		result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]);
1358		break;
1359	case nir_op_isign:
1360		result = emit_isign(ctx, src[0]);
1361		break;
1362	case nir_op_fsign:
1363		src[0] = to_float(ctx, src[0]);
1364		result = emit_fsign(ctx, src[0]);
1365		break;
1366	case nir_op_ffloor:
1367		result = emit_intrin_1f_param(ctx, "llvm.floor.f32", src[0]);
1368		break;
1369	case nir_op_ftrunc:
1370		result = emit_intrin_1f_param(ctx, "llvm.trunc.f32", src[0]);
1371		break;
1372	case nir_op_fceil:
1373		result = emit_intrin_1f_param(ctx, "llvm.ceil.f32", src[0]);
1374		break;
1375	case nir_op_fround_even:
1376		result = emit_intrin_1f_param(ctx, "llvm.rint.f32", src[0]);
1377		break;
1378	case nir_op_ffract:
1379		result = emit_ffract(ctx, src[0]);
1380		break;
1381	case nir_op_fsin:
1382		result = emit_intrin_1f_param(ctx, "llvm.sin.f32", src[0]);
1383		break;
1384	case nir_op_fcos:
1385		result = emit_intrin_1f_param(ctx, "llvm.cos.f32", src[0]);
1386		break;
1387	case nir_op_fsqrt:
1388		result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1389		break;
1390	case nir_op_fexp2:
1391		result = emit_intrin_1f_param(ctx, "llvm.exp2.f32", src[0]);
1392		break;
1393	case nir_op_flog2:
1394		result = emit_intrin_1f_param(ctx, "llvm.log2.f32", src[0]);
1395		break;
1396	case nir_op_frsq:
1397		result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1398		result = ac_emit_fdiv(&ctx->ac, ctx->f32one, result);
1399		break;
1400	case nir_op_fpow:
1401		result = emit_intrin_2f_param(ctx, "llvm.pow.f32", src[0], src[1]);
1402		break;
1403	case nir_op_fmax:
1404		result = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", src[0], src[1]);
1405		break;
1406	case nir_op_fmin:
1407		result = emit_intrin_2f_param(ctx, "llvm.minnum.f32", src[0], src[1]);
1408		break;
1409	case nir_op_ffma:
1410		result = emit_intrin_3f_param(ctx, "llvm.fma.f32", src[0], src[1], src[2]);
1411		break;
1412	case nir_op_ibitfield_extract:
1413		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
1414		break;
1415	case nir_op_ubitfield_extract:
1416		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
1417		break;
1418	case nir_op_bitfield_insert:
1419		result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
1420		break;
1421	case nir_op_bitfield_reverse:
1422		result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1423		break;
1424	case nir_op_bit_count:
1425		result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1426		break;
1427	case nir_op_vec2:
1428	case nir_op_vec3:
1429	case nir_op_vec4:
1430		for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1431			src[i] = to_integer(ctx, src[i]);
1432		result = ac_build_gather_values(&ctx->ac, src, num_components);
1433		break;
1434	case nir_op_f2i:
1435		src[0] = to_float(ctx, src[0]);
1436		result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, "");
1437		break;
1438	case nir_op_f2u:
1439		src[0] = to_float(ctx, src[0]);
1440		result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, "");
1441		break;
1442	case nir_op_i2f:
1443		result = LLVMBuildSIToFP(ctx->builder, src[0], ctx->f32, "");
1444		break;
1445	case nir_op_u2f:
1446		result = LLVMBuildUIToFP(ctx->builder, src[0], ctx->f32, "");
1447		break;
1448	case nir_op_bcsel:
1449		result = emit_bcsel(ctx, src[0], src[1], src[2]);
1450		break;
1451	case nir_op_find_lsb:
1452		result = emit_find_lsb(ctx, src[0]);
1453		break;
1454	case nir_op_ufind_msb:
1455		result = emit_ufind_msb(ctx, src[0]);
1456		break;
1457	case nir_op_ifind_msb:
1458		result = emit_ifind_msb(ctx, src[0]);
1459		break;
1460	case nir_op_uadd_carry:
1461		result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1462		break;
1463	case nir_op_usub_borrow:
1464		result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]);
1465		break;
1466	case nir_op_b2f:
1467		result = emit_b2f(ctx, src[0]);
1468		break;
1469	case nir_op_fquantize2f16:
1470		src[0] = to_float(ctx, src[0]);
1471		result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
1472		/* need to convert back up to f32 */
1473		result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1474		break;
1475	case nir_op_umul_high:
1476		result = emit_umul_high(ctx, src[0], src[1]);
1477		break;
1478	case nir_op_imul_high:
1479		result = emit_imul_high(ctx, src[0], src[1]);
1480		break;
1481	case nir_op_pack_half_2x16:
1482		result = emit_pack_half_2x16(ctx, src[0]);
1483		break;
1484	case nir_op_unpack_half_2x16:
1485		result = emit_unpack_half_2x16(ctx, src[0]);
1486		break;
1487	case nir_op_fddx:
1488	case nir_op_fddy:
1489	case nir_op_fddx_fine:
1490	case nir_op_fddy_fine:
1491	case nir_op_fddx_coarse:
1492	case nir_op_fddy_coarse:
1493		result = emit_ddxy(ctx, instr->op, src[0]);
1494		break;
1495	default:
1496		fprintf(stderr, "Unknown NIR alu instr: ");
1497		nir_print_instr(&instr->instr, stderr);
1498		fprintf(stderr, "\n");
1499		abort();
1500	}
1501
1502	if (result) {
1503		assert(instr->dest.dest.is_ssa);
1504		result = to_integer(ctx, result);
1505		_mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1506		                        result);
1507	}
1508}
1509
1510static void visit_load_const(struct nir_to_llvm_context *ctx,
1511                             nir_load_const_instr *instr)
1512{
1513	LLVMValueRef values[4], value = NULL;
1514	LLVMTypeRef element_type =
1515	    LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
1516
1517	for (unsigned i = 0; i < instr->def.num_components; ++i) {
1518		switch (instr->def.bit_size) {
1519		case 32:
1520			values[i] = LLVMConstInt(element_type,
1521			                         instr->value.u32[i], false);
1522			break;
1523		case 64:
1524			values[i] = LLVMConstInt(element_type,
1525			                         instr->value.u64[i], false);
1526			break;
1527		default:
1528			fprintf(stderr,
1529			        "unsupported nir load_const bit_size: %d\n",
1530			        instr->def.bit_size);
1531			abort();
1532		}
1533	}
1534	if (instr->def.num_components > 1) {
1535		value = LLVMConstVector(values, instr->def.num_components);
1536	} else
1537		value = values[0];
1538
1539	_mesa_hash_table_insert(ctx->defs, &instr->def, value);
1540}
1541
1542static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1543                             LLVMTypeRef type)
1544{
1545	int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
1546	return LLVMBuildBitCast(ctx->builder, ptr,
1547	                        LLVMPointerType(type, addr_space), "");
1548}
1549
1550static LLVMValueRef
1551get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1552{
1553	LLVMValueRef size =
1554		LLVMBuildExtractElement(ctx->builder, descriptor,
1555					LLVMConstInt(ctx->i32, 2, false), "");
1556
1557	/* VI only */
1558	if (ctx->options->chip_class >= VI && in_elements) {
1559		/* On VI, the descriptor contains the size in bytes,
1560		 * but TXQ must return the size in elements.
1561		 * The stride is always non-zero for resources using TXQ.
1562		 */
1563		LLVMValueRef stride =
1564			LLVMBuildExtractElement(ctx->builder, descriptor,
1565						LLVMConstInt(ctx->i32, 1, false), "");
1566		stride = LLVMBuildLShr(ctx->builder, stride,
1567				       LLVMConstInt(ctx->i32, 16, false), "");
1568		stride = LLVMBuildAnd(ctx->builder, stride,
1569				      LLVMConstInt(ctx->i32, 0x3fff, false), "");
1570
1571		size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1572	}
1573	return size;
1574}
1575
1576/**
1577 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1578 * intrinsic names).
1579 */
1580static void build_int_type_name(
1581	LLVMTypeRef type,
1582	char *buf, unsigned bufsize)
1583{
1584	assert(bufsize >= 6);
1585
1586	if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1587		snprintf(buf, bufsize, "v%ui32",
1588			 LLVMGetVectorSize(type));
1589	else
1590		strcpy(buf, "i32");
1591}
1592
1593static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
1594					       struct ac_tex_info *tinfo,
1595					       nir_tex_instr *instr,
1596					       const char *intr_name,
1597					       unsigned coord_vgpr_index)
1598{
1599	LLVMValueRef coord = tinfo->args[0];
1600	LLVMValueRef half_texel[2];
1601	int c;
1602
1603	//TODO Rect
1604	{
1605		LLVMValueRef txq_args[10];
1606		int txq_arg_count = 0;
1607		LLVMValueRef size;
1608		bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
1609		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false);
1610		txq_args[txq_arg_count++] = tinfo->args[1];
1611		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
1612		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
1613		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1614		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
1615		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1616		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1617		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1618		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1619		size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
1620					   txq_args, txq_arg_count,
1621					   AC_FUNC_ATTR_READNONE);
1622
1623		for (c = 0; c < 2; c++) {
1624			half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1625								LLVMConstInt(ctx->i32, c, false), "");
1626			half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1627			half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
1628			half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1629						      LLVMConstReal(ctx->f32, -0.5), "");
1630		}
1631	}
1632
1633	for (c = 0; c < 2; c++) {
1634		LLVMValueRef tmp;
1635		LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1636		tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
1637		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1638		tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1639		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1640		coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
1641	}
1642
1643	tinfo->args[0] = coord;
1644	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1645				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1646
1647}
1648
1649static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
1650					nir_tex_instr *instr,
1651					struct ac_tex_info *tinfo)
1652{
1653	const char *name = "llvm.SI.image.sample";
1654	const char *infix = "";
1655	char intr_name[127];
1656	char type[64];
1657	bool is_shadow = instr->is_shadow;
1658	bool has_offset = tinfo->has_offset;
1659	switch (instr->op) {
1660	case nir_texop_txf:
1661	case nir_texop_txf_ms:
1662	case nir_texop_samples_identical:
1663		name = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? "llvm.SI.image.load" :
1664		       instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? "llvm.SI.vs.load.input" :
1665			"llvm.SI.image.load.mip";
1666		is_shadow = false;
1667		has_offset = false;
1668		break;
1669	case nir_texop_txb:
1670		infix = ".b";
1671		break;
1672	case nir_texop_txl:
1673		infix = ".l";
1674		break;
1675	case nir_texop_txs:
1676		name = "llvm.SI.getresinfo";
1677		break;
1678	case nir_texop_query_levels:
1679		name = "llvm.SI.getresinfo";
1680		break;
1681	case nir_texop_tex:
1682		if (ctx->stage != MESA_SHADER_FRAGMENT)
1683			infix = ".lz";
1684		break;
1685	case nir_texop_txd:
1686		infix = ".d";
1687		break;
1688	case nir_texop_tg4:
1689		name = "llvm.SI.gather4";
1690		infix = ".lz";
1691		break;
1692	case nir_texop_lod:
1693		name = "llvm.SI.getlod";
1694		is_shadow = false;
1695		has_offset = false;
1696		break;
1697	default:
1698		break;
1699	}
1700
1701	build_int_type_name(LLVMTypeOf(tinfo->args[0]), type, sizeof(type));
1702	sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix,
1703		has_offset ? ".o" : "", type);
1704
1705	if (instr->op == nir_texop_tg4) {
1706		enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1707		if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1708			return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
1709							  (int)has_offset + (int)is_shadow);
1710		}
1711	}
1712	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1713				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1714
1715}
1716
1717static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
1718                                                nir_intrinsic_instr *instr)
1719{
1720	LLVMValueRef index = get_src(ctx, instr->src[0]);
1721	unsigned desc_set = nir_intrinsic_desc_set(instr);
1722	unsigned binding = nir_intrinsic_binding(instr);
1723	LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
1724	struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
1725	struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
1726	unsigned base_offset = layout->binding[binding].offset;
1727	LLVMValueRef offset, stride;
1728
1729	if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
1730	    layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
1731		unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
1732			layout->binding[binding].dynamic_offset_offset;
1733		desc_ptr = ctx->push_constants;
1734		base_offset = pipeline_layout->push_constant_size + 16 * idx;
1735		stride = LLVMConstInt(ctx->i32, 16, false);
1736	} else
1737		stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
1738
1739	offset = LLVMConstInt(ctx->i32, base_offset, false);
1740	index = LLVMBuildMul(ctx->builder, index, stride, "");
1741	offset = LLVMBuildAdd(ctx->builder, offset, index, "");
1742
1743	desc_ptr = build_gep0(ctx, desc_ptr, offset);
1744	desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
1745	LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
1746
1747	return LLVMBuildLoad(ctx->builder, desc_ptr, "");
1748}
1749
1750static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
1751                                             nir_intrinsic_instr *instr)
1752{
1753	LLVMValueRef ptr, addr;
1754
1755	addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0);
1756	addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), "");
1757
1758	ptr = build_gep0(ctx, ctx->push_constants, addr);
1759	ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
1760
1761	return LLVMBuildLoad(ctx->builder, ptr, "");
1762}
1763
1764static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
1765                                          nir_intrinsic_instr *instr)
1766{
1767	LLVMValueRef desc = get_src(ctx, instr->src[0]);
1768
1769	return get_buffer_size(ctx, desc, false);
1770}
1771static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
1772                             nir_intrinsic_instr *instr)
1773{
1774	const char *store_name;
1775	LLVMTypeRef data_type = ctx->f32;
1776	unsigned writemask = nir_intrinsic_write_mask(instr);
1777	LLVMValueRef base_data, base_offset;
1778	LLVMValueRef params[6];
1779
1780	if (ctx->stage == MESA_SHADER_FRAGMENT)
1781		ctx->shader_info->fs.writes_memory = true;
1782
1783	params[1] = get_src(ctx, instr->src[1]);
1784	params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1785	params[4] = LLVMConstInt(ctx->i1, 0, false);  /* glc */
1786	params[5] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
1787
1788	if (instr->num_components > 1)
1789		data_type = LLVMVectorType(ctx->f32, instr->num_components);
1790
1791	base_data = to_float(ctx, get_src(ctx, instr->src[0]));
1792	base_data = trim_vector(ctx, base_data, instr->num_components);
1793	base_data = LLVMBuildBitCast(ctx->builder, base_data,
1794				     data_type, "");
1795	base_offset = get_src(ctx, instr->src[2]);      /* voffset */
1796	while (writemask) {
1797		int start, count;
1798		LLVMValueRef data;
1799		LLVMValueRef offset;
1800		LLVMValueRef tmp;
1801		u_bit_scan_consecutive_range(&writemask, &start, &count);
1802
1803		/* Due to an LLVM limitation, split 3-element writes
1804		 * into a 2-element and a 1-element write. */
1805		if (count == 3) {
1806			writemask |= 1 << (start + 2);
1807			count = 2;
1808		}
1809
1810		if (count == 4) {
1811			store_name = "llvm.amdgcn.buffer.store.v4f32";
1812			data = base_data;
1813		} else if (count == 2) {
1814			tmp = LLVMBuildExtractElement(ctx->builder,
1815						      base_data, LLVMConstInt(ctx->i32, start, false), "");
1816			data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
1817						      ctx->i32zero, "");
1818
1819			tmp = LLVMBuildExtractElement(ctx->builder,
1820						      base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
1821			data = LLVMBuildInsertElement(ctx->builder, data, tmp,
1822						      ctx->i32one, "");
1823			store_name = "llvm.amdgcn.buffer.store.v2f32";
1824
1825		} else {
1826			assert(count == 1);
1827			if (get_llvm_num_components(base_data) > 1)
1828				data = LLVMBuildExtractElement(ctx->builder, base_data,
1829							       LLVMConstInt(ctx->i32, start, false), "");
1830			else
1831				data = base_data;
1832			store_name = "llvm.amdgcn.buffer.store.f32";
1833		}
1834
1835		offset = base_offset;
1836		if (start != 0) {
1837			offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
1838		}
1839		params[0] = data;
1840		params[3] = offset;
1841		ac_emit_llvm_intrinsic(&ctx->ac, store_name,
1842				       ctx->voidt, params, 6, 0);
1843	}
1844}
1845
1846static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
1847                                      nir_intrinsic_instr *instr)
1848{
1849	const char *name;
1850	LLVMValueRef params[6];
1851	int arg_count = 0;
1852	if (ctx->stage == MESA_SHADER_FRAGMENT)
1853		ctx->shader_info->fs.writes_memory = true;
1854
1855	if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1856		params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
1857	}
1858	params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
1859	params[arg_count++] = get_src(ctx, instr->src[0]);
1860	params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1861	params[arg_count++] = get_src(ctx, instr->src[1]);      /* voffset */
1862	params[arg_count++] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
1863
1864	switch (instr->intrinsic) {
1865	case nir_intrinsic_ssbo_atomic_add:
1866		name = "llvm.amdgcn.buffer.atomic.add";
1867		break;
1868	case nir_intrinsic_ssbo_atomic_imin:
1869		name = "llvm.amdgcn.buffer.atomic.smin";
1870		break;
1871	case nir_intrinsic_ssbo_atomic_umin:
1872		name = "llvm.amdgcn.buffer.atomic.umin";
1873		break;
1874	case nir_intrinsic_ssbo_atomic_imax:
1875		name = "llvm.amdgcn.buffer.atomic.smax";
1876		break;
1877	case nir_intrinsic_ssbo_atomic_umax:
1878		name = "llvm.amdgcn.buffer.atomic.umax";
1879		break;
1880	case nir_intrinsic_ssbo_atomic_and:
1881		name = "llvm.amdgcn.buffer.atomic.and";
1882		break;
1883	case nir_intrinsic_ssbo_atomic_or:
1884		name = "llvm.amdgcn.buffer.atomic.or";
1885		break;
1886	case nir_intrinsic_ssbo_atomic_xor:
1887		name = "llvm.amdgcn.buffer.atomic.xor";
1888		break;
1889	case nir_intrinsic_ssbo_atomic_exchange:
1890		name = "llvm.amdgcn.buffer.atomic.swap";
1891		break;
1892	case nir_intrinsic_ssbo_atomic_comp_swap:
1893		name = "llvm.amdgcn.buffer.atomic.cmpswap";
1894		break;
1895	default:
1896		abort();
1897	}
1898
1899	return ac_emit_llvm_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
1900}
1901
1902static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
1903                                      nir_intrinsic_instr *instr)
1904{
1905	const char *load_name;
1906	LLVMTypeRef data_type = ctx->f32;
1907	if (instr->num_components == 3)
1908		data_type = LLVMVectorType(ctx->f32, 4);
1909	else if (instr->num_components > 1)
1910		data_type = LLVMVectorType(ctx->f32, instr->num_components);
1911
1912	if (instr->num_components == 4 || instr->num_components == 3)
1913		load_name = "llvm.amdgcn.buffer.load.v4f32";
1914	else if (instr->num_components == 2)
1915		load_name = "llvm.amdgcn.buffer.load.v2f32";
1916	else if (instr->num_components == 1)
1917		load_name = "llvm.amdgcn.buffer.load.f32";
1918	else
1919		abort();
1920
1921	LLVMValueRef params[] = {
1922	    get_src(ctx, instr->src[0]),
1923	    LLVMConstInt(ctx->i32, 0, false),
1924	    get_src(ctx, instr->src[1]),
1925	    LLVMConstInt(ctx->i1, 0, false),
1926	    LLVMConstInt(ctx->i1, 0, false),
1927	};
1928
1929	LLVMValueRef ret =
1930	    ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
1931
1932	if (instr->num_components == 3)
1933		ret = trim_vector(ctx, ret, 3);
1934
1935	return LLVMBuildBitCast(ctx->builder, ret,
1936	                        get_def_type(ctx, &instr->dest.ssa), "");
1937}
1938
1939static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
1940                                          nir_intrinsic_instr *instr)
1941{
1942	LLVMValueRef results[4], ret;
1943	LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
1944	LLVMValueRef offset = get_src(ctx, instr->src[1]);
1945
1946	rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), "");
1947
1948	for (unsigned i = 0; i < instr->num_components; ++i) {
1949		LLVMValueRef params[] = {
1950			rsrc,
1951			LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
1952				     offset, "")
1953		};
1954		results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
1955						 params, 2, AC_FUNC_ATTR_READNONE);
1956	}
1957
1958
1959	ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
1960	return LLVMBuildBitCast(ctx->builder, ret,
1961	                        get_def_type(ctx, &instr->dest.ssa), "");
1962}
1963
1964static void
1965radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
1966                      bool vs_in, unsigned *const_out, LLVMValueRef *indir_out)
1967{
1968	unsigned const_offset = 0;
1969	LLVMValueRef offset = NULL;
1970
1971
1972	while (tail->child != NULL) {
1973		const struct glsl_type *parent_type = tail->type;
1974		tail = tail->child;
1975
1976		if (tail->deref_type == nir_deref_type_array) {
1977			nir_deref_array *deref_array = nir_deref_as_array(tail);
1978			LLVMValueRef index, stride, local_offset;
1979			unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
1980
1981			const_offset += size * deref_array->base_offset;
1982			if (deref_array->deref_array_type == nir_deref_array_type_direct)
1983				continue;
1984
1985			assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
1986			index = get_src(ctx, deref_array->indirect);
1987			stride = LLVMConstInt(ctx->i32, size, 0);
1988			local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
1989
1990			if (offset)
1991				offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
1992			else
1993				offset = local_offset;
1994		} else if (tail->deref_type == nir_deref_type_struct) {
1995			nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
1996
1997			for (unsigned i = 0; i < deref_struct->index; i++) {
1998				const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1999				const_offset += glsl_count_attribute_slots(ft, vs_in);
2000			}
2001		} else
2002			unreachable("unsupported deref type");
2003
2004	}
2005
2006	if (const_offset && offset)
2007		offset = LLVMBuildAdd(ctx->builder, offset,
2008				      LLVMConstInt(ctx->i32, const_offset, 0),
2009				      "");
2010
2011	*const_out = const_offset;
2012	*indir_out = offset;
2013}
2014
2015static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2016				   nir_intrinsic_instr *instr)
2017{
2018	LLVMValueRef values[4];
2019	int idx = instr->variables[0]->var->data.driver_location;
2020	int ve = instr->dest.ssa.num_components;
2021	LLVMValueRef indir_index;
2022	unsigned const_index;
2023	switch (instr->variables[0]->var->data.mode) {
2024	case nir_var_shader_in:
2025		radv_get_deref_offset(ctx, &instr->variables[0]->deref,
2026				      ctx->stage == MESA_SHADER_VERTEX,
2027				      &const_index, &indir_index);
2028		for (unsigned chan = 0; chan < ve; chan++) {
2029			if (indir_index) {
2030				unsigned count = glsl_count_attribute_slots(
2031						instr->variables[0]->var->type,
2032						ctx->stage == MESA_SHADER_VERTEX);
2033				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2034						&ctx->ac, ctx->inputs + idx + chan, count,
2035						4, false);
2036
2037				values[chan] = LLVMBuildExtractElement(ctx->builder,
2038								       tmp_vec,
2039								       indir_index, "");
2040			} else
2041				values[chan] = ctx->inputs[idx + chan + const_index * 4];
2042		}
2043		return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
2044		break;
2045	case nir_var_local:
2046		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2047				      &const_index, &indir_index);
2048		for (unsigned chan = 0; chan < ve; chan++) {
2049			if (indir_index) {
2050				unsigned count = glsl_count_attribute_slots(
2051					instr->variables[0]->var->type, false);
2052				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2053						&ctx->ac, ctx->locals + idx + chan, count,
2054						4, true);
2055
2056				values[chan] = LLVMBuildExtractElement(ctx->builder,
2057								       tmp_vec,
2058								       indir_index, "");
2059			} else {
2060				values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
2061			}
2062		}
2063		return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
2064	case nir_var_shader_out:
2065		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2066				      &const_index, &indir_index);
2067		for (unsigned chan = 0; chan < ve; chan++) {
2068			if (indir_index) {
2069				unsigned count = glsl_count_attribute_slots(
2070						instr->variables[0]->var->type, false);
2071				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2072						&ctx->ac, ctx->outputs + idx + chan, count,
2073						4, true);
2074
2075				values[chan] = LLVMBuildExtractElement(ctx->builder,
2076								       tmp_vec,
2077								       indir_index, "");
2078			} else {
2079			values[chan] = LLVMBuildLoad(ctx->builder,
2080						     ctx->outputs[idx + chan + const_index * 4],
2081						     "");
2082			}
2083		}
2084		return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
2085	case nir_var_shared: {
2086		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2087				      &const_index, &indir_index);
2088		LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2089		LLVMValueRef derived_ptr;
2090
2091		if (indir_index)
2092			indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
2093
2094		for (unsigned chan = 0; chan < ve; chan++) {
2095			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2096			if (indir_index)
2097				index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2098			derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2099			values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
2100		}
2101		return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
2102	}
2103	default:
2104		break;
2105	}
2106	return NULL;
2107}
2108
2109static void
2110visit_store_var(struct nir_to_llvm_context *ctx,
2111				   nir_intrinsic_instr *instr)
2112{
2113	LLVMValueRef temp_ptr, value;
2114	int idx = instr->variables[0]->var->data.driver_location;
2115	LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0]));
2116	int writemask = instr->const_index[0];
2117	LLVMValueRef indir_index;
2118	unsigned const_index;
2119	switch (instr->variables[0]->var->data.mode) {
2120	case nir_var_shader_out:
2121		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2122				      &const_index, &indir_index);
2123		for (unsigned chan = 0; chan < 4; chan++) {
2124			int stride = 4;
2125			if (!(writemask & (1 << chan)))
2126				continue;
2127			if (get_llvm_num_components(src) == 1)
2128				value = src;
2129			else
2130				value = LLVMBuildExtractElement(ctx->builder, src,
2131								LLVMConstInt(ctx->i32,
2132									     chan, false),
2133								"");
2134
2135			if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 ||
2136			    instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0)
2137				stride = 1;
2138			if (indir_index) {
2139				unsigned count = glsl_count_attribute_slots(
2140						instr->variables[0]->var->type, false);
2141				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2142						&ctx->ac, ctx->outputs + idx + chan, count,
2143						stride, true);
2144
2145				if (get_llvm_num_components(tmp_vec) > 1) {
2146					tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2147									 value, indir_index, "");
2148				} else
2149					tmp_vec = value;
2150				build_store_values_extended(ctx, ctx->outputs + idx + chan,
2151							    count, stride, tmp_vec);
2152
2153			} else {
2154				temp_ptr = ctx->outputs[idx + chan + const_index * stride];
2155
2156				LLVMBuildStore(ctx->builder, value, temp_ptr);
2157			}
2158		}
2159		break;
2160	case nir_var_local:
2161		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2162				      &const_index, &indir_index);
2163		for (unsigned chan = 0; chan < 4; chan++) {
2164			if (!(writemask & (1 << chan)))
2165				continue;
2166
2167			if (get_llvm_num_components(src) == 1)
2168				value = src;
2169			else
2170				value = LLVMBuildExtractElement(ctx->builder, src,
2171								LLVMConstInt(ctx->i32, chan, false), "");
2172			if (indir_index) {
2173				unsigned count = glsl_count_attribute_slots(
2174					instr->variables[0]->var->type, false);
2175				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2176					&ctx->ac, ctx->locals + idx + chan, count,
2177					4, true);
2178
2179				tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2180								 value, indir_index, "");
2181				build_store_values_extended(ctx, ctx->locals + idx + chan,
2182							    count, 4, tmp_vec);
2183			} else {
2184				temp_ptr = ctx->locals[idx + chan + const_index * 4];
2185
2186				LLVMBuildStore(ctx->builder, value, temp_ptr);
2187			}
2188		}
2189		break;
2190	case nir_var_shared: {
2191		LLVMValueRef ptr;
2192		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2193				      &const_index, &indir_index);
2194
2195		ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2196		LLVMValueRef derived_ptr;
2197
2198		if (indir_index)
2199			indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
2200
2201		for (unsigned chan = 0; chan < 4; chan++) {
2202			if (!(writemask & (1 << chan)))
2203				continue;
2204
2205			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2206
2207			if (get_llvm_num_components(src) == 1)
2208				value = src;
2209			else
2210				value = LLVMBuildExtractElement(ctx->builder, src,
2211								LLVMConstInt(ctx->i32,
2212									     chan, false),
2213								"");
2214
2215			if (indir_index)
2216				index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2217
2218			derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2219			LLVMBuildStore(ctx->builder,
2220				       to_integer(ctx, value), derived_ptr);
2221		}
2222		break;
2223	}
2224	default:
2225		break;
2226	}
2227}
2228
2229static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2230{
2231	switch (dim) {
2232	case GLSL_SAMPLER_DIM_BUF:
2233		return 1;
2234	case GLSL_SAMPLER_DIM_1D:
2235		return array ? 2 : 1;
2236	case GLSL_SAMPLER_DIM_2D:
2237		return array ? 3 : 2;
2238	case GLSL_SAMPLER_DIM_MS:
2239		return array ? 4 : 3;
2240	case GLSL_SAMPLER_DIM_3D:
2241	case GLSL_SAMPLER_DIM_CUBE:
2242		return 3;
2243	case GLSL_SAMPLER_DIM_RECT:
2244	case GLSL_SAMPLER_DIM_SUBPASS:
2245		return 2;
2246	case GLSL_SAMPLER_DIM_SUBPASS_MS:
2247		return 3;
2248	default:
2249		break;
2250	}
2251	return 0;
2252}
2253
2254static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
2255				     nir_intrinsic_instr *instr)
2256{
2257	const struct glsl_type *type = instr->variables[0]->var->type;
2258	if(instr->variables[0]->deref.child)
2259		type = instr->variables[0]->deref.child->type;
2260
2261	LLVMValueRef src0 = get_src(ctx, instr->src[0]);
2262	LLVMValueRef coords[4];
2263	LLVMValueRef masks[] = {
2264		LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2265		LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2266	};
2267	LLVMValueRef res;
2268	int count;
2269	enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2270	bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
2271			     dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2272	bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
2273		      dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2274
2275	count = image_type_to_components_count(dim,
2276					       glsl_sampler_type_is_array(type));
2277
2278	if (count == 1) {
2279		if (instr->src[0].ssa->num_components)
2280			res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
2281		else
2282			res = src0;
2283	} else {
2284		int chan;
2285		if (is_ms)
2286			count--;
2287		for (chan = 0; chan < count; ++chan) {
2288			coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
2289		}
2290
2291		if (add_frag_pos) {
2292			for (chan = 0; chan < count; ++chan)
2293				coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
2294		}
2295		if (is_ms) {
2296			coords[count] = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0);
2297			count++;
2298		}
2299
2300		if (count == 3) {
2301			coords[3] = LLVMGetUndef(ctx->i32);
2302			count = 4;
2303		}
2304		res = ac_build_gather_values(&ctx->ac, coords, count);
2305	}
2306	return res;
2307}
2308
2309static void build_type_name_for_intr(
2310        LLVMTypeRef type,
2311        char *buf, unsigned bufsize)
2312{
2313        LLVMTypeRef elem_type = type;
2314
2315        assert(bufsize >= 8);
2316
2317        if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
2318                int ret = snprintf(buf, bufsize, "v%u",
2319                                        LLVMGetVectorSize(type));
2320                if (ret < 0) {
2321                        char *type_name = LLVMPrintTypeToString(type);
2322                        fprintf(stderr, "Error building type name for: %s\n",
2323                                type_name);
2324                        return;
2325                }
2326                elem_type = LLVMGetElementType(type);
2327                buf += ret;
2328                bufsize -= ret;
2329        }
2330        switch (LLVMGetTypeKind(elem_type)) {
2331        default: break;
2332        case LLVMIntegerTypeKind:
2333                snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
2334                break;
2335        case LLVMFloatTypeKind:
2336                snprintf(buf, bufsize, "f32");
2337                break;
2338        case LLVMDoubleTypeKind:
2339                snprintf(buf, bufsize, "f64");
2340                break;
2341        }
2342}
2343
2344static void get_image_intr_name(const char *base_name,
2345                                LLVMTypeRef data_type,
2346                                LLVMTypeRef coords_type,
2347                                LLVMTypeRef rsrc_type,
2348                                char *out_name, unsigned out_len)
2349{
2350        char coords_type_name[8];
2351
2352        build_type_name_for_intr(coords_type, coords_type_name,
2353                            sizeof(coords_type_name));
2354
2355        if (HAVE_LLVM <= 0x0309) {
2356                snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
2357        } else {
2358                char data_type_name[8];
2359                char rsrc_type_name[8];
2360
2361                build_type_name_for_intr(data_type, data_type_name,
2362                                        sizeof(data_type_name));
2363                build_type_name_for_intr(rsrc_type, rsrc_type_name,
2364                                        sizeof(rsrc_type_name));
2365                snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
2366                         data_type_name, coords_type_name, rsrc_type_name);
2367        }
2368}
2369
2370static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
2371				     nir_intrinsic_instr *instr)
2372{
2373	LLVMValueRef params[7];
2374	LLVMValueRef res;
2375	char intrinsic_name[64];
2376	const nir_variable *var = instr->variables[0]->var;
2377	const struct glsl_type *type = var->type;
2378	if(instr->variables[0]->deref.child)
2379		type = instr->variables[0]->deref.child->type;
2380
2381	type = glsl_without_array(type);
2382	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2383		params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2384		params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2385						    LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2386		params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2387		params[3] = LLVMConstInt(ctx->i1, 0, false);  /* glc */
2388		params[4] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
2389		res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
2390					  params, 5, 0);
2391
2392		res = trim_vector(ctx, res, instr->dest.ssa.num_components);
2393		res = to_integer(ctx, res);
2394	} else {
2395		bool is_da = glsl_sampler_type_is_array(type) ||
2396			     glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2397		LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
2398		LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
2399		LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
2400
2401		params[0] = get_image_coords(ctx, instr);
2402		params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2403		params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2404		if (HAVE_LLVM <= 0x0309) {
2405			params[3] = LLVMConstInt(ctx->i1, 0, false);  /* r128 */
2406			params[4] = da;
2407			params[5] = glc;
2408			params[6] = slc;
2409		} else {
2410			LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false);
2411			params[3] = glc;
2412			params[4] = slc;
2413			params[5] = lwe;
2414			params[6] = da;
2415		}
2416
2417		get_image_intr_name("llvm.amdgcn.image.load",
2418				    ctx->v4f32, /* vdata */
2419				    LLVMTypeOf(params[0]), /* coords */
2420				    LLVMTypeOf(params[1]), /* rsrc */
2421				    intrinsic_name, sizeof(intrinsic_name));
2422
2423		res = ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
2424					  params, 7, AC_FUNC_ATTR_READONLY);
2425	}
2426	return to_integer(ctx, res);
2427}
2428
2429static void visit_image_store(struct nir_to_llvm_context *ctx,
2430			      nir_intrinsic_instr *instr)
2431{
2432	LLVMValueRef params[8];
2433	char intrinsic_name[64];
2434	const nir_variable *var = instr->variables[0]->var;
2435	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2436	LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2437	const struct glsl_type *type = glsl_without_array(var->type);
2438
2439	if (ctx->stage == MESA_SHADER_FRAGMENT)
2440		ctx->shader_info->fs.writes_memory = true;
2441
2442	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2443		params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */
2444		params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2445		params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2446						    LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2447		params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2448		params[4] = i1false;  /* glc */
2449		params[5] = i1false;  /* slc */
2450		ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
2451				    params, 6, 0);
2452	} else {
2453		bool is_da = glsl_sampler_type_is_array(type) ||
2454			     glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2455		LLVMValueRef da = is_da ? i1true : i1false;
2456		LLVMValueRef glc = i1false;
2457		LLVMValueRef slc = i1false;
2458
2459		params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
2460		params[1] = get_image_coords(ctx, instr); /* coords */
2461		params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2462		params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2463		if (HAVE_LLVM <= 0x0309) {
2464			params[4] = i1false;  /* r128 */
2465			params[5] = da;
2466			params[6] = glc;
2467			params[7] = slc;
2468		} else {
2469			LLVMValueRef lwe = i1false;
2470			params[4] = glc;
2471			params[5] = slc;
2472			params[6] = lwe;
2473			params[7] = da;
2474		}
2475
2476		get_image_intr_name("llvm.amdgcn.image.store",
2477				    LLVMTypeOf(params[0]), /* vdata */
2478				    LLVMTypeOf(params[1]), /* coords */
2479				    LLVMTypeOf(params[2]), /* rsrc */
2480				    intrinsic_name, sizeof(intrinsic_name));
2481
2482		ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
2483				    params, 8, 0);
2484	}
2485
2486}
2487
2488static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
2489                                       nir_intrinsic_instr *instr)
2490{
2491	LLVMValueRef params[6];
2492	int param_count = 0;
2493	const nir_variable *var = instr->variables[0]->var;
2494	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2495	LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2496	const char *base_name = "llvm.amdgcn.image.atomic";
2497	const char *atomic_name;
2498	LLVMValueRef coords;
2499	char intrinsic_name[32], coords_type[8];
2500	const struct glsl_type *type = glsl_without_array(var->type);
2501
2502	if (ctx->stage == MESA_SHADER_FRAGMENT)
2503		ctx->shader_info->fs.writes_memory = true;
2504
2505	params[param_count++] = get_src(ctx, instr->src[2]);
2506	if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
2507		params[param_count++] = get_src(ctx, instr->src[3]);
2508
2509	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2510		params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2511		coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2512									LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2513		params[param_count++] = ctx->i32zero; /* voffset */
2514		params[param_count++] = i1false;  /* glc */
2515		params[param_count++] = i1false;  /* slc */
2516	} else {
2517		bool da = glsl_sampler_type_is_array(type) ||
2518		          glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2519
2520		coords = params[param_count++] = get_image_coords(ctx, instr);
2521		params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2522		params[param_count++] = i1false; /* r128 */
2523		params[param_count++] = da ? i1true : i1false;      /* da */
2524		params[param_count++] = i1false;  /* slc */
2525	}
2526
2527	switch (instr->intrinsic) {
2528	case nir_intrinsic_image_atomic_add:
2529		atomic_name = "add";
2530		break;
2531	case nir_intrinsic_image_atomic_min:
2532		atomic_name = "smin";
2533		break;
2534	case nir_intrinsic_image_atomic_max:
2535		atomic_name = "smax";
2536		break;
2537	case nir_intrinsic_image_atomic_and:
2538		atomic_name = "and";
2539		break;
2540	case nir_intrinsic_image_atomic_or:
2541		atomic_name = "or";
2542		break;
2543	case nir_intrinsic_image_atomic_xor:
2544		atomic_name = "xor";
2545		break;
2546	case nir_intrinsic_image_atomic_exchange:
2547		atomic_name = "swap";
2548		break;
2549	case nir_intrinsic_image_atomic_comp_swap:
2550		atomic_name = "cmpswap";
2551		break;
2552	default:
2553		abort();
2554	}
2555	build_int_type_name(LLVMTypeOf(coords),
2556			    coords_type, sizeof(coords_type));
2557
2558	snprintf(intrinsic_name, sizeof(intrinsic_name),
2559			 "%s.%s.%s", base_name, atomic_name, coords_type);
2560	return ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
2561}
2562
2563static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
2564				     nir_intrinsic_instr *instr)
2565{
2566	LLVMValueRef res;
2567	LLVMValueRef params[10];
2568	const nir_variable *var = instr->variables[0]->var;
2569	const struct glsl_type *type = instr->variables[0]->var->type;
2570	bool da = glsl_sampler_type_is_array(var->type) ||
2571	          glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
2572	if(instr->variables[0]->deref.child)
2573		type = instr->variables[0]->deref.child->type;
2574
2575	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
2576		return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
2577	params[0] = ctx->i32zero;
2578	params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2579	params[2] = LLVMConstInt(ctx->i32, 15, false);
2580	params[3] = ctx->i32zero;
2581	params[4] = ctx->i32zero;
2582	params[5] = da ? ctx->i32one : ctx->i32zero;
2583	params[6] = ctx->i32zero;
2584	params[7] = ctx->i32zero;
2585	params[8] = ctx->i32zero;
2586	params[9] = ctx->i32zero;
2587
2588	res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
2589				  params, 10, AC_FUNC_ATTR_READNONE);
2590
2591	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
2592	    glsl_sampler_type_is_array(type)) {
2593		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
2594		LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
2595		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
2596		z = LLVMBuildSDiv(ctx->builder, z, six, "");
2597		res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
2598	}
2599	return res;
2600}
2601
2602static void emit_waitcnt(struct nir_to_llvm_context *ctx)
2603{
2604	LLVMValueRef args[1] = {
2605		LLVMConstInt(ctx->i32, 0xf70, false),
2606	};
2607	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
2608			    ctx->voidt, args, 1, 0);
2609}
2610
2611static void emit_barrier(struct nir_to_llvm_context *ctx)
2612{
2613	// TODO tess
2614	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
2615			    ctx->voidt, NULL, 0, 0);
2616}
2617
2618static void emit_discard_if(struct nir_to_llvm_context *ctx,
2619			    nir_intrinsic_instr *instr)
2620{
2621	LLVMValueRef cond;
2622	ctx->shader_info->fs.can_discard = true;
2623
2624	cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
2625			     get_src(ctx, instr->src[0]),
2626			     ctx->i32zero, "");
2627
2628	cond = LLVMBuildSelect(ctx->builder, cond,
2629			       LLVMConstReal(ctx->f32, -1.0f),
2630			       ctx->f32zero, "");
2631	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
2632			       ctx->voidt,
2633			       &cond, 1, 0);
2634}
2635
2636static LLVMValueRef
2637visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
2638{
2639	LLVMValueRef result;
2640	LLVMValueRef thread_id = get_thread_id(ctx);
2641	result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
2642			      LLVMConstInt(ctx->i32, 0xfc0, false), "");
2643
2644	return LLVMBuildAdd(ctx->builder, result, thread_id, "");
2645}
2646
2647static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
2648				     nir_intrinsic_instr *instr)
2649{
2650	LLVMValueRef ptr, result;
2651	int idx = instr->variables[0]->var->data.driver_location;
2652	LLVMValueRef src = get_src(ctx, instr->src[0]);
2653	ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2654
2655	if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
2656		LLVMValueRef src1 = get_src(ctx, instr->src[1]);
2657		result = LLVMBuildAtomicCmpXchg(ctx->builder,
2658						ptr, src, src1,
2659						LLVMAtomicOrderingSequentiallyConsistent,
2660						LLVMAtomicOrderingSequentiallyConsistent,
2661						false);
2662	} else {
2663		LLVMAtomicRMWBinOp op;
2664		switch (instr->intrinsic) {
2665		case nir_intrinsic_var_atomic_add:
2666			op = LLVMAtomicRMWBinOpAdd;
2667			break;
2668		case nir_intrinsic_var_atomic_umin:
2669			op = LLVMAtomicRMWBinOpUMin;
2670			break;
2671		case nir_intrinsic_var_atomic_umax:
2672			op = LLVMAtomicRMWBinOpUMax;
2673			break;
2674		case nir_intrinsic_var_atomic_imin:
2675			op = LLVMAtomicRMWBinOpMin;
2676			break;
2677		case nir_intrinsic_var_atomic_imax:
2678			op = LLVMAtomicRMWBinOpMax;
2679			break;
2680		case nir_intrinsic_var_atomic_and:
2681			op = LLVMAtomicRMWBinOpAnd;
2682			break;
2683		case nir_intrinsic_var_atomic_or:
2684			op = LLVMAtomicRMWBinOpOr;
2685			break;
2686		case nir_intrinsic_var_atomic_xor:
2687			op = LLVMAtomicRMWBinOpXor;
2688			break;
2689		case nir_intrinsic_var_atomic_exchange:
2690			op = LLVMAtomicRMWBinOpXchg;
2691			break;
2692		default:
2693			return NULL;
2694		}
2695
2696		result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src),
2697					    LLVMAtomicOrderingSequentiallyConsistent,
2698					    false);
2699	}
2700	return result;
2701}
2702
2703#define INTERP_CENTER 0
2704#define INTERP_CENTROID 1
2705#define INTERP_SAMPLE 2
2706
2707static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
2708					enum glsl_interp_mode interp, unsigned location)
2709{
2710	switch (interp) {
2711	case INTERP_MODE_FLAT:
2712	default:
2713		return NULL;
2714	case INTERP_MODE_SMOOTH:
2715	case INTERP_MODE_NONE:
2716		if (location == INTERP_CENTER)
2717			return ctx->persp_center;
2718		else if (location == INTERP_CENTROID)
2719			return ctx->persp_centroid;
2720		else if (location == INTERP_SAMPLE)
2721			return ctx->persp_sample;
2722		break;
2723	case INTERP_MODE_NOPERSPECTIVE:
2724		if (location == INTERP_CENTER)
2725			return ctx->linear_center;
2726		else if (location == INTERP_CENTROID)
2727			return ctx->linear_centroid;
2728		else if (location == INTERP_SAMPLE)
2729			return ctx->linear_sample;
2730		break;
2731	}
2732	return NULL;
2733}
2734
2735static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
2736					 LLVMValueRef sample_id)
2737{
2738	/* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
2739	LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
2740	LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
2741	LLVMValueRef result[2];
2742
2743	result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0);
2744	result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1);
2745
2746	return ac_build_gather_values(&ctx->ac, result, 2);
2747}
2748
2749static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
2750{
2751	LLVMValueRef values[2];
2752
2753	values[0] = emit_ffract(ctx, ctx->frag_pos[0]);
2754	values[1] = emit_ffract(ctx, ctx->frag_pos[1]);
2755	return ac_build_gather_values(&ctx->ac, values, 2);
2756}
2757
2758static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
2759				 nir_intrinsic_instr *instr)
2760{
2761	LLVMValueRef result[2];
2762	LLVMValueRef interp_param, attr_number;
2763	unsigned location;
2764	unsigned chan;
2765	LLVMValueRef src_c0, src_c1;
2766	const char *intr_name;
2767	LLVMValueRef src0;
2768	int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
2769	switch (instr->intrinsic) {
2770	case nir_intrinsic_interp_var_at_centroid:
2771		location = INTERP_CENTROID;
2772		break;
2773	case nir_intrinsic_interp_var_at_sample:
2774	case nir_intrinsic_interp_var_at_offset:
2775		location = INTERP_SAMPLE;
2776		src0 = get_src(ctx, instr->src[0]);
2777		break;
2778	default:
2779		break;
2780	}
2781
2782	if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
2783		src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
2784		src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
2785	} else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
2786		LLVMValueRef sample_position;
2787		LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
2788
2789		/* fetch sample ID */
2790		sample_position = load_sample_position(ctx, src0);
2791
2792		src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
2793		src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
2794		src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
2795		src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
2796	}
2797	interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
2798	attr_number = LLVMConstInt(ctx->i32, input_index, false);
2799
2800	if (location == INTERP_SAMPLE) {
2801		LLVMValueRef ij_out[2];
2802		LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
2803
2804		/*
2805		 * take the I then J parameters, and the DDX/Y for it, and
2806		 * calculate the IJ inputs for the interpolator.
2807		 * temp1 = ddx * offset/sample.x + I;
2808		 * interp_param.I = ddy * offset/sample.y + temp1;
2809		 * temp1 = ddx * offset/sample.x + J;
2810		 * interp_param.J = ddy * offset/sample.y + temp1;
2811		 */
2812		for (unsigned i = 0; i < 2; i++) {
2813			LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
2814			LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
2815			LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
2816								      ddxy_out, ix_ll, "");
2817			LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
2818								      ddxy_out, iy_ll, "");
2819			LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
2820									 interp_param, ix_ll, "");
2821			LLVMValueRef temp1, temp2;
2822
2823			interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
2824						     ctx->f32, "");
2825
2826			temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
2827			temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
2828
2829			temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
2830			temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
2831
2832			ij_out[i] = LLVMBuildBitCast(ctx->builder,
2833						     temp2, ctx->i32, "");
2834		}
2835		interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
2836
2837	}
2838	intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
2839	for (chan = 0; chan < 2; chan++) {
2840		LLVMValueRef args[4];
2841		LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
2842
2843		args[0] = llvm_chan;
2844		args[1] = attr_number;
2845		args[2] = ctx->prim_mask;
2846		args[3] = interp_param;
2847		result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
2848						   ctx->f32, args, args[3] ? 4 : 3,
2849						   AC_FUNC_ATTR_READNONE);
2850	}
2851	return ac_build_gather_values(&ctx->ac, result, 2);
2852}
2853
2854static void visit_intrinsic(struct nir_to_llvm_context *ctx,
2855                            nir_intrinsic_instr *instr)
2856{
2857	LLVMValueRef result = NULL;
2858
2859	switch (instr->intrinsic) {
2860	case nir_intrinsic_load_work_group_id: {
2861		result = ctx->workgroup_ids;
2862		break;
2863	}
2864	case nir_intrinsic_load_base_vertex: {
2865		result = ctx->base_vertex;
2866		break;
2867	}
2868	case nir_intrinsic_load_vertex_id_zero_base: {
2869		result = ctx->vertex_id;
2870		break;
2871	}
2872	case nir_intrinsic_load_local_invocation_id: {
2873		result = ctx->local_invocation_ids;
2874		break;
2875	}
2876	case nir_intrinsic_load_base_instance:
2877		result = ctx->start_instance;
2878		break;
2879	case nir_intrinsic_load_sample_id:
2880		ctx->shader_info->fs.force_persample = true;
2881		result = unpack_param(ctx, ctx->ancillary, 8, 4);
2882		break;
2883	case nir_intrinsic_load_sample_pos:
2884		ctx->shader_info->fs.force_persample = true;
2885		result = load_sample_pos(ctx);
2886		break;
2887	case nir_intrinsic_load_front_face:
2888		result = ctx->front_face;
2889		break;
2890	case nir_intrinsic_load_instance_id:
2891		result = ctx->instance_id;
2892		ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
2893		                            ctx->shader_info->vs.vgpr_comp_cnt);
2894		break;
2895	case nir_intrinsic_load_num_work_groups:
2896		result = ctx->num_work_groups;
2897		break;
2898	case nir_intrinsic_load_local_invocation_index:
2899		result = visit_load_local_invocation_index(ctx);
2900		break;
2901	case nir_intrinsic_load_push_constant:
2902		result = visit_load_push_constant(ctx, instr);
2903		break;
2904	case nir_intrinsic_vulkan_resource_index:
2905		result = visit_vulkan_resource_index(ctx, instr);
2906		break;
2907	case nir_intrinsic_store_ssbo:
2908		visit_store_ssbo(ctx, instr);
2909		break;
2910	case nir_intrinsic_load_ssbo:
2911		result = visit_load_buffer(ctx, instr);
2912		break;
2913	case nir_intrinsic_ssbo_atomic_add:
2914	case nir_intrinsic_ssbo_atomic_imin:
2915	case nir_intrinsic_ssbo_atomic_umin:
2916	case nir_intrinsic_ssbo_atomic_imax:
2917	case nir_intrinsic_ssbo_atomic_umax:
2918	case nir_intrinsic_ssbo_atomic_and:
2919	case nir_intrinsic_ssbo_atomic_or:
2920	case nir_intrinsic_ssbo_atomic_xor:
2921	case nir_intrinsic_ssbo_atomic_exchange:
2922	case nir_intrinsic_ssbo_atomic_comp_swap:
2923		result = visit_atomic_ssbo(ctx, instr);
2924		break;
2925	case nir_intrinsic_load_ubo:
2926		result = visit_load_ubo_buffer(ctx, instr);
2927		break;
2928	case nir_intrinsic_get_buffer_size:
2929		result = visit_get_buffer_size(ctx, instr);
2930		break;
2931	case nir_intrinsic_load_var:
2932		result = visit_load_var(ctx, instr);
2933		break;
2934	case nir_intrinsic_store_var:
2935		visit_store_var(ctx, instr);
2936		break;
2937	case nir_intrinsic_image_load:
2938		result = visit_image_load(ctx, instr);
2939		break;
2940	case nir_intrinsic_image_store:
2941		visit_image_store(ctx, instr);
2942		break;
2943	case nir_intrinsic_image_atomic_add:
2944	case nir_intrinsic_image_atomic_min:
2945	case nir_intrinsic_image_atomic_max:
2946	case nir_intrinsic_image_atomic_and:
2947	case nir_intrinsic_image_atomic_or:
2948	case nir_intrinsic_image_atomic_xor:
2949	case nir_intrinsic_image_atomic_exchange:
2950	case nir_intrinsic_image_atomic_comp_swap:
2951		result = visit_image_atomic(ctx, instr);
2952		break;
2953	case nir_intrinsic_image_size:
2954		result = visit_image_size(ctx, instr);
2955		break;
2956	case nir_intrinsic_discard:
2957		ctx->shader_info->fs.can_discard = true;
2958		ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
2959				       ctx->voidt,
2960				       NULL, 0, 0);
2961		break;
2962	case nir_intrinsic_discard_if:
2963		emit_discard_if(ctx, instr);
2964		break;
2965	case nir_intrinsic_memory_barrier:
2966		emit_waitcnt(ctx);
2967		break;
2968	case nir_intrinsic_barrier:
2969		emit_barrier(ctx);
2970		break;
2971	case nir_intrinsic_var_atomic_add:
2972	case nir_intrinsic_var_atomic_imin:
2973	case nir_intrinsic_var_atomic_umin:
2974	case nir_intrinsic_var_atomic_imax:
2975	case nir_intrinsic_var_atomic_umax:
2976	case nir_intrinsic_var_atomic_and:
2977	case nir_intrinsic_var_atomic_or:
2978	case nir_intrinsic_var_atomic_xor:
2979	case nir_intrinsic_var_atomic_exchange:
2980	case nir_intrinsic_var_atomic_comp_swap:
2981		result = visit_var_atomic(ctx, instr);
2982		break;
2983	case nir_intrinsic_interp_var_at_centroid:
2984	case nir_intrinsic_interp_var_at_sample:
2985	case nir_intrinsic_interp_var_at_offset:
2986		result = visit_interp(ctx, instr);
2987		break;
2988	default:
2989		fprintf(stderr, "Unknown intrinsic: ");
2990		nir_print_instr(&instr->instr, stderr);
2991		fprintf(stderr, "\n");
2992		break;
2993	}
2994	if (result) {
2995		_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
2996	}
2997}
2998
2999static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
3000					  nir_deref_var *deref,
3001					  enum desc_type desc_type)
3002{
3003	unsigned desc_set = deref->var->data.descriptor_set;
3004	LLVMValueRef list = ctx->descriptor_sets[desc_set];
3005	struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
3006	struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
3007	unsigned offset = binding->offset;
3008	unsigned stride = binding->size;
3009	unsigned type_size;
3010	LLVMBuilderRef builder = ctx->builder;
3011	LLVMTypeRef type;
3012	LLVMValueRef index = NULL;
3013
3014	assert(deref->var->data.binding < layout->binding_count);
3015
3016	switch (desc_type) {
3017	case DESC_IMAGE:
3018		type = ctx->v8i32;
3019		type_size = 32;
3020		break;
3021	case DESC_FMASK:
3022		type = ctx->v8i32;
3023		offset += 32;
3024		type_size = 32;
3025		break;
3026	case DESC_SAMPLER:
3027		type = ctx->v4i32;
3028		if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
3029			offset += 64;
3030
3031		type_size = 16;
3032		break;
3033	case DESC_BUFFER:
3034		type = ctx->v4i32;
3035		type_size = 16;
3036		break;
3037	default:
3038		unreachable("invalid desc_type\n");
3039	}
3040
3041	if (deref->deref.child) {
3042		nir_deref_array *child = (nir_deref_array*)deref->deref.child;
3043
3044		assert(child->deref_array_type != nir_deref_array_type_wildcard);
3045		offset += child->base_offset * stride;
3046		if (child->deref_array_type == nir_deref_array_type_indirect) {
3047			index = get_src(ctx, child->indirect);
3048		}
3049	}
3050
3051	assert(stride % type_size == 0);
3052
3053	if (!index)
3054		index = ctx->i32zero;
3055
3056	index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
3057
3058	list = build_gep0(ctx, list, LLVMConstInt(ctx->i32, offset, 0));
3059	list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
3060
3061	return build_indexed_load_const(ctx, list, index);
3062}
3063
3064static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
3065			       struct ac_tex_info *tinfo,
3066			       nir_tex_instr *instr,
3067			       nir_texop op,
3068			       LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
3069			       LLVMValueRef *param, unsigned count,
3070			       unsigned dmask)
3071{
3072	int num_args;
3073	unsigned is_rect = 0;
3074	bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
3075
3076	if (op == nir_texop_lod)
3077		da = false;
3078	/* Pad to power of two vector */
3079	while (count < util_next_power_of_two(count))
3080		param[count++] = LLVMGetUndef(ctx->i32);
3081
3082	if (count > 1)
3083		tinfo->args[0] = ac_build_gather_values(&ctx->ac, param, count);
3084	else
3085		tinfo->args[0] = param[0];
3086
3087	tinfo->args[1] = res_ptr;
3088	num_args = 2;
3089
3090	if (op == nir_texop_txf ||
3091	    op == nir_texop_txf_ms ||
3092	    op == nir_texop_query_levels ||
3093	    op == nir_texop_texture_samples ||
3094	    op == nir_texop_txs)
3095		tinfo->dst_type = ctx->v4i32;
3096	else {
3097		tinfo->dst_type = ctx->v4f32;
3098		tinfo->args[num_args++] = samp_ptr;
3099	}
3100
3101	if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
3102		tinfo->args[0] = res_ptr;
3103		tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false);
3104		tinfo->args[2] = param[0];
3105		tinfo->arg_count = 3;
3106		return;
3107	}
3108
3109	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0);
3110	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */
3111	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
3112	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
3113	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
3114	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
3115	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
3116	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
3117
3118	tinfo->arg_count = num_args;
3119}
3120
3121/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
3122 *
3123 * SI-CI:
3124 *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
3125 *   filtering manually. The driver sets img7 to a mask clearing
3126 *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
3127 *     s_and_b32 samp0, samp0, img7
3128 *
3129 * VI:
3130 *   The ANISO_OVERRIDE sampler field enables this fix in TA.
3131 */
3132static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
3133                                           LLVMValueRef res, LLVMValueRef samp)
3134{
3135	LLVMBuilderRef builder = ctx->builder;
3136	LLVMValueRef img7, samp0;
3137
3138	if (ctx->options->chip_class >= VI)
3139		return samp;
3140
3141	img7 = LLVMBuildExtractElement(builder, res,
3142	                               LLVMConstInt(ctx->i32, 7, 0), "");
3143	samp0 = LLVMBuildExtractElement(builder, samp,
3144	                                LLVMConstInt(ctx->i32, 0, 0), "");
3145	samp0 = LLVMBuildAnd(builder, samp0, img7, "");
3146	return LLVMBuildInsertElement(builder, samp, samp0,
3147	                              LLVMConstInt(ctx->i32, 0, 0), "");
3148}
3149
3150static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
3151			   nir_tex_instr *instr,
3152			   LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
3153			   LLVMValueRef *fmask_ptr)
3154{
3155	if (instr->sampler_dim  == GLSL_SAMPLER_DIM_BUF)
3156		*res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
3157	else
3158		*res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
3159	if (samp_ptr) {
3160		if (instr->sampler)
3161			*samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
3162		else
3163			*samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
3164		if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
3165			*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
3166	}
3167	if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
3168					     instr->op == nir_texop_samples_identical))
3169		*fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
3170}
3171
3172static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx,
3173				      LLVMValueRef coord)
3174{
3175	coord = to_float(ctx, coord);
3176	coord = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
3177	coord = to_integer(ctx, coord);
3178	return coord;
3179}
3180
3181static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
3182{
3183	LLVMValueRef result = NULL;
3184	struct ac_tex_info tinfo = { 0 };
3185	unsigned dmask = 0xf;
3186	LLVMValueRef address[16];
3187	LLVMValueRef coords[5];
3188	LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
3189	LLVMValueRef bias = NULL, offsets = NULL;
3190	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
3191	LLVMValueRef ddx = NULL, ddy = NULL;
3192	LLVMValueRef derivs[6];
3193	unsigned chan, count = 0;
3194	unsigned const_src = 0, num_deriv_comp = 0;
3195
3196	tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
3197
3198	for (unsigned i = 0; i < instr->num_srcs; i++) {
3199		switch (instr->src[i].src_type) {
3200		case nir_tex_src_coord:
3201			coord = get_src(ctx, instr->src[i].src);
3202			break;
3203		case nir_tex_src_projector:
3204			break;
3205		case nir_tex_src_comparator:
3206			comparator = get_src(ctx, instr->src[i].src);
3207			break;
3208		case nir_tex_src_offset:
3209			offsets = get_src(ctx, instr->src[i].src);
3210			const_src = i;
3211			break;
3212		case nir_tex_src_bias:
3213			bias = get_src(ctx, instr->src[i].src);
3214			break;
3215		case nir_tex_src_lod:
3216			lod = get_src(ctx, instr->src[i].src);
3217			break;
3218		case nir_tex_src_ms_index:
3219			sample_index = get_src(ctx, instr->src[i].src);
3220			break;
3221		case nir_tex_src_ms_mcs:
3222			break;
3223		case nir_tex_src_ddx:
3224			ddx = get_src(ctx, instr->src[i].src);
3225			num_deriv_comp = instr->src[i].src.ssa->num_components;
3226			break;
3227		case nir_tex_src_ddy:
3228			ddy = get_src(ctx, instr->src[i].src);
3229			break;
3230		case nir_tex_src_texture_offset:
3231		case nir_tex_src_sampler_offset:
3232		case nir_tex_src_plane:
3233		default:
3234			break;
3235		}
3236	}
3237
3238	if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
3239		result = get_buffer_size(ctx, res_ptr, true);
3240		goto write_result;
3241	}
3242
3243	if (instr->op == nir_texop_texture_samples) {
3244		LLVMValueRef res, samples, is_msaa;
3245		res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
3246		samples = LLVMBuildExtractElement(ctx->builder, res,
3247						  LLVMConstInt(ctx->i32, 3, false), "");
3248		is_msaa = LLVMBuildLShr(ctx->builder, samples,
3249					LLVMConstInt(ctx->i32, 28, false), "");
3250		is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
3251				       LLVMConstInt(ctx->i32, 0xe, false), "");
3252		is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
3253					LLVMConstInt(ctx->i32, 0xe, false), "");
3254
3255		samples = LLVMBuildLShr(ctx->builder, samples,
3256					LLVMConstInt(ctx->i32, 16, false), "");
3257		samples = LLVMBuildAnd(ctx->builder, samples,
3258				       LLVMConstInt(ctx->i32, 0xf, false), "");
3259		samples = LLVMBuildShl(ctx->builder, ctx->i32one,
3260				       samples, "");
3261		samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
3262					  ctx->i32one, "");
3263		result = samples;
3264		goto write_result;
3265	}
3266
3267	if (coord)
3268		for (chan = 0; chan < instr->coord_components; chan++)
3269			coords[chan] = llvm_extract_elem(ctx, coord, chan);
3270
3271	if (offsets && instr->op != nir_texop_txf) {
3272		LLVMValueRef offset[3], pack;
3273		for (chan = 0; chan < 3; ++chan)
3274			offset[chan] = ctx->i32zero;
3275
3276		tinfo.has_offset = true;
3277		for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
3278			offset[chan] = llvm_extract_elem(ctx, offsets, chan);
3279			offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
3280						    LLVMConstInt(ctx->i32, 0x3f, false), "");
3281			if (chan)
3282				offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
3283							    LLVMConstInt(ctx->i32, chan * 8, false), "");
3284		}
3285		pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
3286		pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
3287		address[count++] = pack;
3288
3289	}
3290	/* pack LOD bias value */
3291	if (instr->op == nir_texop_txb && bias) {
3292		address[count++] = bias;
3293	}
3294
3295	/* Pack depth comparison value */
3296	if (instr->is_shadow && comparator) {
3297		address[count++] = llvm_extract_elem(ctx, comparator, 0);
3298	}
3299
3300	/* pack derivatives */
3301	if (ddx || ddy) {
3302		switch (instr->sampler_dim) {
3303		case GLSL_SAMPLER_DIM_3D:
3304		case GLSL_SAMPLER_DIM_CUBE:
3305			num_deriv_comp = 3;
3306			break;
3307		case GLSL_SAMPLER_DIM_2D:
3308		default:
3309			num_deriv_comp = 2;
3310			break;
3311		case GLSL_SAMPLER_DIM_1D:
3312			num_deriv_comp = 1;
3313			break;
3314		}
3315
3316		for (unsigned i = 0; i < num_deriv_comp; i++) {
3317			derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i));
3318			derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i));
3319		}
3320	}
3321
3322	if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
3323		for (chan = 0; chan < instr->coord_components; chan++)
3324			coords[chan] = to_float(ctx, coords[chan]);
3325		if (instr->coord_components == 3)
3326			coords[3] = LLVMGetUndef(ctx->f32);
3327		ac_prepare_cube_coords(&ctx->ac,
3328			instr->op == nir_texop_txd, instr->is_array,
3329			coords, derivs);
3330		if (num_deriv_comp)
3331			num_deriv_comp--;
3332	}
3333
3334	if (ddx || ddy) {
3335		for (unsigned i = 0; i < num_deriv_comp * 2; i++)
3336			address[count++] = derivs[i];
3337	}
3338
3339	/* Pack texture coordinates */
3340	if (coord) {
3341		address[count++] = coords[0];
3342		if (instr->coord_components > 1) {
3343			if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
3344				coords[1] = apply_round_slice(ctx, coords[1]);
3345			}
3346			address[count++] = coords[1];
3347		}
3348		if (instr->coord_components > 2) {
3349			/* This seems like a bit of a hack - but it passes Vulkan CTS with it */
3350			if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
3351				coords[2] = apply_round_slice(ctx, coords[2]);
3352			}
3353			address[count++] = coords[2];
3354		}
3355	}
3356
3357	/* Pack LOD */
3358	if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) {
3359		address[count++] = lod;
3360	} else if (instr->op == nir_texop_txf_ms && sample_index) {
3361		address[count++] = sample_index;
3362	} else if(instr->op == nir_texop_txs) {
3363		count = 0;
3364		if (lod)
3365			address[count++] = lod;
3366		else
3367			address[count++] = ctx->i32zero;
3368	}
3369
3370	for (chan = 0; chan < count; chan++) {
3371		address[chan] = LLVMBuildBitCast(ctx->builder,
3372						 address[chan], ctx->i32, "");
3373	}
3374
3375	if (instr->op == nir_texop_samples_identical) {
3376		LLVMValueRef txf_address[4];
3377		struct ac_tex_info txf_info = { 0 };
3378		unsigned txf_count = count;
3379		memcpy(txf_address, address, sizeof(txf_address));
3380
3381		if (!instr->is_array)
3382			txf_address[2] = ctx->i32zero;
3383		txf_address[3] = ctx->i32zero;
3384
3385		set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3386				   fmask_ptr, NULL,
3387				   txf_address, txf_count, 0xf);
3388
3389		result = build_tex_intrinsic(ctx, instr, &txf_info);
3390
3391		result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3392		result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
3393		goto write_result;
3394	}
3395
3396	/* Adjust the sample index according to FMASK.
3397	 *
3398	 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3399	 * which is the identity mapping. Each nibble says which physical sample
3400	 * should be fetched to get that sample.
3401	 *
3402	 * For example, 0x11111100 means there are only 2 samples stored and
3403	 * the second sample covers 3/4 of the pixel. When reading samples 0
3404	 * and 1, return physical sample 0 (determined by the first two 0s
3405	 * in FMASK), otherwise return physical sample 1.
3406	 *
3407	 * The sample index should be adjusted as follows:
3408	 *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
3409	 */
3410	if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
3411	    instr->op != nir_texop_txs) {
3412		LLVMValueRef txf_address[4];
3413		struct ac_tex_info txf_info = { 0 };
3414		unsigned txf_count = count;
3415		memcpy(txf_address, address, sizeof(txf_address));
3416
3417		if (!instr->is_array)
3418			txf_address[2] = ctx->i32zero;
3419		txf_address[3] = ctx->i32zero;
3420
3421		set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3422				   fmask_ptr, NULL,
3423				   txf_address, txf_count, 0xf);
3424
3425		result = build_tex_intrinsic(ctx, instr, &txf_info);
3426		LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3427		LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3428
3429		LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3430							     result,
3431							     ctx->i32zero, "");
3432
3433		unsigned sample_chan = instr->is_array ? 3 : 2;
3434
3435		LLVMValueRef sample_index4 =
3436			LLVMBuildMul(ctx->builder, address[sample_chan], four, "");
3437		LLVMValueRef shifted_fmask =
3438			LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3439		LLVMValueRef final_sample =
3440			LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3441
3442		/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3443		 * resource descriptor is 0 (invalid),
3444		 */
3445		LLVMValueRef fmask_desc =
3446			LLVMBuildBitCast(ctx->builder, fmask_ptr,
3447					 ctx->v8i32, "");
3448
3449		LLVMValueRef fmask_word1 =
3450			LLVMBuildExtractElement(ctx->builder, fmask_desc,
3451						ctx->i32one, "");
3452
3453		LLVMValueRef word1_is_nonzero =
3454			LLVMBuildICmp(ctx->builder, LLVMIntNE,
3455				      fmask_word1, ctx->i32zero, "");
3456
3457		/* Replace the MSAA sample index. */
3458		address[sample_chan] =
3459			LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3460					final_sample, address[sample_chan], "");
3461	}
3462
3463	if (offsets && instr->op == nir_texop_txf) {
3464		nir_const_value *const_offset =
3465			nir_src_as_const_value(instr->src[const_src].src);
3466		int num_offsets = instr->src[const_src].src.ssa->num_components;
3467		assert(const_offset);
3468		num_offsets = MIN2(num_offsets, instr->coord_components);
3469		if (num_offsets > 2)
3470			address[2] = LLVMBuildAdd(ctx->builder,
3471						  address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
3472		if (num_offsets > 1)
3473			address[1] = LLVMBuildAdd(ctx->builder,
3474						  address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
3475		address[0] = LLVMBuildAdd(ctx->builder,
3476					  address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
3477
3478	}
3479
3480	/* TODO TG4 support */
3481	if (instr->op == nir_texop_tg4) {
3482		if (instr->is_shadow)
3483			dmask = 1;
3484		else
3485			dmask = 1 << instr->component;
3486	}
3487	set_tex_fetch_args(ctx, &tinfo, instr, instr->op,
3488			   res_ptr, samp_ptr, address, count, dmask);
3489
3490	result = build_tex_intrinsic(ctx, instr, &tinfo);
3491
3492	if (instr->op == nir_texop_query_levels)
3493		result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
3494	else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
3495		result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3496	else if (instr->op == nir_texop_txs &&
3497		 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
3498		 instr->is_array) {
3499		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3500		LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3501		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
3502		z = LLVMBuildSDiv(ctx->builder, z, six, "");
3503		result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
3504	} else if (instr->dest.ssa.num_components != 4)
3505		result = trim_vector(ctx, result, instr->dest.ssa.num_components);
3506
3507write_result:
3508	if (result) {
3509		assert(instr->dest.is_ssa);
3510		result = to_integer(ctx, result);
3511		_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3512	}
3513}
3514
3515
3516static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
3517{
3518	LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
3519	LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
3520
3521	_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3522	_mesa_hash_table_insert(ctx->phis, instr, result);
3523}
3524
3525static void visit_post_phi(struct nir_to_llvm_context *ctx,
3526                           nir_phi_instr *instr,
3527                           LLVMValueRef llvm_phi)
3528{
3529	nir_foreach_phi_src(src, instr) {
3530		LLVMBasicBlockRef block = get_block(ctx, src->pred);
3531		LLVMValueRef llvm_src = get_src(ctx, src->src);
3532
3533		LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
3534	}
3535}
3536
3537static void phi_post_pass(struct nir_to_llvm_context *ctx)
3538{
3539	struct hash_entry *entry;
3540	hash_table_foreach(ctx->phis, entry) {
3541		visit_post_phi(ctx, (nir_phi_instr*)entry->key,
3542		               (LLVMValueRef)entry->data);
3543	}
3544}
3545
3546
3547static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
3548			    nir_ssa_undef_instr *instr)
3549{
3550	unsigned num_components = instr->def.num_components;
3551	LLVMValueRef undef;
3552
3553	if (num_components == 1)
3554		undef = LLVMGetUndef(ctx->i32);
3555	else {
3556		undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
3557	}
3558	_mesa_hash_table_insert(ctx->defs, &instr->def, undef);
3559}
3560
3561static void visit_jump(struct nir_to_llvm_context *ctx,
3562		       nir_jump_instr *instr)
3563{
3564	switch (instr->type) {
3565	case nir_jump_break:
3566		LLVMBuildBr(ctx->builder, ctx->break_block);
3567		LLVMClearInsertionPosition(ctx->builder);
3568		break;
3569	case nir_jump_continue:
3570		LLVMBuildBr(ctx->builder, ctx->continue_block);
3571		LLVMClearInsertionPosition(ctx->builder);
3572		break;
3573	default:
3574		fprintf(stderr, "Unknown NIR jump instr: ");
3575		nir_print_instr(&instr->instr, stderr);
3576		fprintf(stderr, "\n");
3577		abort();
3578	}
3579}
3580
3581static void visit_cf_list(struct nir_to_llvm_context *ctx,
3582                          struct exec_list *list);
3583
3584static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
3585{
3586	LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
3587	nir_foreach_instr(instr, block)
3588	{
3589		switch (instr->type) {
3590		case nir_instr_type_alu:
3591			visit_alu(ctx, nir_instr_as_alu(instr));
3592			break;
3593		case nir_instr_type_load_const:
3594			visit_load_const(ctx, nir_instr_as_load_const(instr));
3595			break;
3596		case nir_instr_type_intrinsic:
3597			visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
3598			break;
3599		case nir_instr_type_tex:
3600			visit_tex(ctx, nir_instr_as_tex(instr));
3601			break;
3602		case nir_instr_type_phi:
3603			visit_phi(ctx, nir_instr_as_phi(instr));
3604			break;
3605		case nir_instr_type_ssa_undef:
3606			visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
3607			break;
3608		case nir_instr_type_jump:
3609			visit_jump(ctx, nir_instr_as_jump(instr));
3610			break;
3611		default:
3612			fprintf(stderr, "Unknown NIR instr type: ");
3613			nir_print_instr(instr, stderr);
3614			fprintf(stderr, "\n");
3615			abort();
3616		}
3617	}
3618
3619	_mesa_hash_table_insert(ctx->defs, block, llvm_block);
3620}
3621
3622static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
3623{
3624	LLVMValueRef value = get_src(ctx, if_stmt->condition);
3625
3626	LLVMBasicBlockRef merge_block =
3627	    LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3628	LLVMBasicBlockRef if_block =
3629	    LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3630	LLVMBasicBlockRef else_block = merge_block;
3631	if (!exec_list_is_empty(&if_stmt->else_list))
3632		else_block = LLVMAppendBasicBlockInContext(
3633		    ctx->context, ctx->main_function, "");
3634
3635	LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
3636	                                  LLVMConstInt(ctx->i32, 0, false), "");
3637	LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
3638
3639	LLVMPositionBuilderAtEnd(ctx->builder, if_block);
3640	visit_cf_list(ctx, &if_stmt->then_list);
3641	if (LLVMGetInsertBlock(ctx->builder))
3642		LLVMBuildBr(ctx->builder, merge_block);
3643
3644	if (!exec_list_is_empty(&if_stmt->else_list)) {
3645		LLVMPositionBuilderAtEnd(ctx->builder, else_block);
3646		visit_cf_list(ctx, &if_stmt->else_list);
3647		if (LLVMGetInsertBlock(ctx->builder))
3648			LLVMBuildBr(ctx->builder, merge_block);
3649	}
3650
3651	LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
3652}
3653
3654static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
3655{
3656	LLVMBasicBlockRef continue_parent = ctx->continue_block;
3657	LLVMBasicBlockRef break_parent = ctx->break_block;
3658
3659	ctx->continue_block =
3660	    LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3661	ctx->break_block =
3662	    LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3663
3664	LLVMBuildBr(ctx->builder, ctx->continue_block);
3665	LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
3666	visit_cf_list(ctx, &loop->body);
3667
3668	if (LLVMGetInsertBlock(ctx->builder))
3669		LLVMBuildBr(ctx->builder, ctx->continue_block);
3670	LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
3671
3672	ctx->continue_block = continue_parent;
3673	ctx->break_block = break_parent;
3674}
3675
3676static void visit_cf_list(struct nir_to_llvm_context *ctx,
3677                          struct exec_list *list)
3678{
3679	foreach_list_typed(nir_cf_node, node, node, list)
3680	{
3681		switch (node->type) {
3682		case nir_cf_node_block:
3683			visit_block(ctx, nir_cf_node_as_block(node));
3684			break;
3685
3686		case nir_cf_node_if:
3687			visit_if(ctx, nir_cf_node_as_if(node));
3688			break;
3689
3690		case nir_cf_node_loop:
3691			visit_loop(ctx, nir_cf_node_as_loop(node));
3692			break;
3693
3694		default:
3695			assert(0);
3696		}
3697	}
3698}
3699
3700static void
3701handle_vs_input_decl(struct nir_to_llvm_context *ctx,
3702		     struct nir_variable *variable)
3703{
3704	LLVMValueRef t_list_ptr = ctx->vertex_buffers;
3705	LLVMValueRef t_offset;
3706	LLVMValueRef t_list;
3707	LLVMValueRef args[3];
3708	LLVMValueRef input;
3709	LLVMValueRef buffer_index;
3710	int index = variable->data.location - VERT_ATTRIB_GENERIC0;
3711	int idx = variable->data.location;
3712	unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
3713
3714	variable->data.driver_location = idx * 4;
3715
3716	if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
3717		buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
3718					    ctx->start_instance, "");
3719		ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3720		                            ctx->shader_info->vs.vgpr_comp_cnt);
3721	} else
3722		buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
3723					    ctx->base_vertex, "");
3724
3725	for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
3726		t_offset = LLVMConstInt(ctx->i32, index + i, false);
3727
3728		t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
3729		args[0] = t_list;
3730		args[1] = LLVMConstInt(ctx->i32, 0, false);
3731		args[2] = buffer_index;
3732		input = ac_emit_llvm_intrinsic(&ctx->ac,
3733			"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
3734			AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3735
3736		for (unsigned chan = 0; chan < 4; chan++) {
3737			LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3738			ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
3739				to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
3740							input, llvm_chan, ""));
3741		}
3742	}
3743}
3744
3745
3746static void interp_fs_input(struct nir_to_llvm_context *ctx,
3747			    unsigned attr,
3748			    LLVMValueRef interp_param,
3749			    LLVMValueRef prim_mask,
3750			    LLVMValueRef result[4])
3751{
3752	const char *intr_name;
3753	LLVMValueRef attr_number;
3754	unsigned chan;
3755
3756	attr_number = LLVMConstInt(ctx->i32, attr, false);
3757
3758	/* fs.constant returns the param from the middle vertex, so it's not
3759	 * really useful for flat shading. It's meant to be used for custom
3760	 * interpolation (but the intrinsic can't fetch from the other two
3761	 * vertices).
3762	 *
3763	 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
3764	 * to do the right thing. The only reason we use fs.constant is that
3765	 * fs.interp cannot be used on integers, because they can be equal
3766	 * to NaN.
3767	 */
3768	intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
3769
3770	for (chan = 0; chan < 4; chan++) {
3771		LLVMValueRef args[4];
3772		LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3773
3774		args[0] = llvm_chan;
3775		args[1] = attr_number;
3776		args[2] = prim_mask;
3777		args[3] = interp_param;
3778		result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
3779						   ctx->f32, args, args[3] ? 4 : 3,
3780						  AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3781	}
3782}
3783
3784static void
3785handle_fs_input_decl(struct nir_to_llvm_context *ctx,
3786		     struct nir_variable *variable)
3787{
3788	int idx = variable->data.location;
3789	unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3790	LLVMValueRef interp;
3791
3792	variable->data.driver_location = idx * 4;
3793	ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
3794
3795	if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
3796		unsigned interp_type;
3797		if (variable->data.sample) {
3798			interp_type = INTERP_SAMPLE;
3799			ctx->shader_info->fs.force_persample = true;
3800		} else if (variable->data.centroid)
3801			interp_type = INTERP_CENTROID;
3802		else
3803			interp_type = INTERP_CENTER;
3804
3805		interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
3806	} else
3807		interp = NULL;
3808
3809	for (unsigned i = 0; i < attrib_count; ++i)
3810		ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
3811
3812}
3813
3814static void
3815handle_shader_input_decl(struct nir_to_llvm_context *ctx,
3816			 struct nir_variable *variable)
3817{
3818	switch (ctx->stage) {
3819	case MESA_SHADER_VERTEX:
3820		handle_vs_input_decl(ctx, variable);
3821		break;
3822	case MESA_SHADER_FRAGMENT:
3823		handle_fs_input_decl(ctx, variable);
3824		break;
3825	default:
3826		break;
3827	}
3828
3829}
3830
3831static void
3832handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
3833		     struct nir_shader *nir)
3834{
3835	unsigned index = 0;
3836	for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
3837		LLVMValueRef interp_param;
3838		LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
3839
3840		if (!(ctx->input_mask & (1ull << i)))
3841			continue;
3842
3843		if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC) {
3844			interp_param = *inputs;
3845			interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
3846					inputs);
3847
3848			if (!interp_param)
3849				ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
3850			++index;
3851		} else if (i == VARYING_SLOT_POS) {
3852			for(int i = 0; i < 3; ++i)
3853				inputs[i] = ctx->frag_pos[i];
3854
3855			inputs[3] = ac_emit_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]);
3856		}
3857	}
3858	ctx->shader_info->fs.num_interp = index;
3859	if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
3860		ctx->shader_info->fs.has_pcoord = true;
3861	ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
3862}
3863
3864static LLVMValueRef
3865ac_build_alloca(struct nir_to_llvm_context *ctx,
3866                LLVMTypeRef type,
3867                const char *name)
3868{
3869	LLVMBuilderRef builder = ctx->builder;
3870	LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
3871	LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
3872	LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
3873	LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
3874	LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
3875	LLVMValueRef res;
3876
3877	if (first_instr) {
3878		LLVMPositionBuilderBefore(first_builder, first_instr);
3879	} else {
3880		LLVMPositionBuilderAtEnd(first_builder, first_block);
3881	}
3882
3883	res = LLVMBuildAlloca(first_builder, type, name);
3884	LLVMBuildStore(builder, LLVMConstNull(type), res);
3885
3886	LLVMDisposeBuilder(first_builder);
3887
3888	return res;
3889}
3890
3891static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
3892					  LLVMTypeRef type,
3893					  const char *name)
3894{
3895	LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
3896	LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
3897	return ptr;
3898}
3899
3900static void
3901handle_shader_output_decl(struct nir_to_llvm_context *ctx,
3902			  struct nir_variable *variable)
3903{
3904	int idx = variable->data.location + variable->data.index;
3905	unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3906
3907	variable->data.driver_location = idx * 4;
3908
3909	if (ctx->stage == MESA_SHADER_VERTEX) {
3910
3911		if (idx == VARYING_SLOT_CLIP_DIST0 ||
3912		    idx == VARYING_SLOT_CULL_DIST0) {
3913			int length = glsl_get_length(variable->type);
3914			if (idx == VARYING_SLOT_CLIP_DIST0) {
3915				ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
3916				ctx->num_clips = length;
3917			} else if (idx == VARYING_SLOT_CULL_DIST0) {
3918				ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
3919				ctx->num_culls = length;
3920			}
3921			if (length > 4)
3922				attrib_count = 2;
3923			else
3924				attrib_count = 1;
3925		}
3926	}
3927
3928	for (unsigned i = 0; i < attrib_count; ++i) {
3929		for (unsigned chan = 0; chan < 4; chan++) {
3930			ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
3931		                       si_build_alloca_undef(ctx, ctx->f32, "");
3932		}
3933	}
3934	ctx->output_mask |= ((1ull << attrib_count) - 1) << idx;
3935}
3936
3937static void
3938setup_locals(struct nir_to_llvm_context *ctx,
3939	     struct nir_function *func)
3940{
3941	int i, j;
3942	ctx->num_locals = 0;
3943	nir_foreach_variable(variable, &func->impl->locals) {
3944		unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3945		variable->data.driver_location = ctx->num_locals * 4;
3946		ctx->num_locals += attrib_count;
3947	}
3948	ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
3949	if (!ctx->locals)
3950	    return;
3951
3952	for (i = 0; i < ctx->num_locals; i++) {
3953		for (j = 0; j < 4; j++) {
3954			ctx->locals[i * 4 + j] =
3955				si_build_alloca_undef(ctx, ctx->f32, "temp");
3956		}
3957	}
3958}
3959
3960static LLVMValueRef
3961emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
3962{
3963	v = to_float(ctx, v);
3964	v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", v, LLVMConstReal(ctx->f32, lo));
3965	return emit_intrin_2f_param(ctx, "llvm.minnum.f32", v, LLVMConstReal(ctx->f32, hi));
3966}
3967
3968
3969static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
3970					LLVMValueRef src0, LLVMValueRef src1)
3971{
3972	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
3973	LLVMValueRef comp[2];
3974
3975	comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
3976	comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
3977	comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
3978	return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
3979}
3980
3981/* Initialize arguments for the shader export intrinsic */
3982static void
3983si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
3984			 LLVMValueRef *values,
3985			 unsigned target,
3986			 LLVMValueRef *args)
3987{
3988	/* Default is 0xf. Adjusted below depending on the format. */
3989	args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false);
3990	/* Specify whether the EXEC mask represents the valid mask */
3991	args[1] = LLVMConstInt(ctx->i32, 0, false);
3992
3993	/* Specify whether this is the last export */
3994	args[2] = LLVMConstInt(ctx->i32, 0, false);
3995	/* Specify the target we are exporting */
3996	args[3] = LLVMConstInt(ctx->i32, target, false);
3997
3998	args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */
3999	args[5] = LLVMGetUndef(ctx->f32);
4000	args[6] = LLVMGetUndef(ctx->f32);
4001	args[7] = LLVMGetUndef(ctx->f32);
4002	args[8] = LLVMGetUndef(ctx->f32);
4003
4004	if (!values)
4005		return;
4006
4007	if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
4008		LLVMValueRef val[4];
4009		unsigned index = target - V_008DFC_SQ_EXP_MRT;
4010		unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
4011		bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
4012
4013		switch(col_format) {
4014		case V_028714_SPI_SHADER_ZERO:
4015			args[0] = LLVMConstInt(ctx->i32, 0x0, 0);
4016			args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0);
4017			break;
4018
4019		case V_028714_SPI_SHADER_32_R:
4020			args[0] = LLVMConstInt(ctx->i32, 0x1, 0);
4021			args[5] = values[0];
4022			break;
4023
4024		case V_028714_SPI_SHADER_32_GR:
4025			args[0] = LLVMConstInt(ctx->i32, 0x3, 0);
4026			args[5] = values[0];
4027			args[6] = values[1];
4028			break;
4029
4030		case V_028714_SPI_SHADER_32_AR:
4031			args[0] = LLVMConstInt(ctx->i32, 0x9, 0);
4032			args[5] = values[0];
4033			args[8] = values[3];
4034			break;
4035
4036		case V_028714_SPI_SHADER_FP16_ABGR:
4037			args[4] = ctx->i32one;
4038
4039			for (unsigned chan = 0; chan < 2; chan++) {
4040				LLVMValueRef pack_args[2] = {
4041					values[2 * chan],
4042					values[2 * chan + 1]
4043				};
4044				LLVMValueRef packed;
4045
4046				packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
4047							     ctx->i32, pack_args, 2,
4048							     AC_FUNC_ATTR_READNONE);
4049				args[chan + 5] = packed;
4050			}
4051			break;
4052
4053		case V_028714_SPI_SHADER_UNORM16_ABGR:
4054			for (unsigned chan = 0; chan < 4; chan++) {
4055				val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
4056				val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4057							LLVMConstReal(ctx->f32, 65535), "");
4058				val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4059							LLVMConstReal(ctx->f32, 0.5), "");
4060				val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
4061							ctx->i32, "");
4062			}
4063
4064			args[4] = ctx->i32one;
4065			args[5] = emit_pack_int16(ctx, val[0], val[1]);
4066			args[6] = emit_pack_int16(ctx, val[2], val[3]);
4067			break;
4068
4069		case V_028714_SPI_SHADER_SNORM16_ABGR:
4070			for (unsigned chan = 0; chan < 4; chan++) {
4071				val[chan] = emit_float_saturate(ctx, values[chan], -1, 1);
4072				val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4073							LLVMConstReal(ctx->f32, 32767), "");
4074
4075				/* If positive, add 0.5, else add -0.5. */
4076				val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4077						LLVMBuildSelect(ctx->builder,
4078							LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
4079								val[chan], ctx->f32zero, ""),
4080							LLVMConstReal(ctx->f32, 0.5),
4081							LLVMConstReal(ctx->f32, -0.5), ""), "");
4082				val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
4083			}
4084
4085			args[4] = ctx->i32one;
4086			args[5] = emit_pack_int16(ctx, val[0], val[1]);
4087			args[6] = emit_pack_int16(ctx, val[2], val[3]);
4088			break;
4089
4090		case V_028714_SPI_SHADER_UINT16_ABGR: {
4091			LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
4092
4093			for (unsigned chan = 0; chan < 4; chan++) {
4094				val[chan] = to_integer(ctx, values[chan]);
4095				val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max);
4096			}
4097
4098			args[4] = ctx->i32one;
4099			args[5] = emit_pack_int16(ctx, val[0], val[1]);
4100			args[6] = emit_pack_int16(ctx, val[2], val[3]);
4101			break;
4102		}
4103
4104		case V_028714_SPI_SHADER_SINT16_ABGR: {
4105			LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
4106			LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
4107
4108			/* Clamp. */
4109			for (unsigned chan = 0; chan < 4; chan++) {
4110				val[chan] = to_integer(ctx, values[chan]);
4111				val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max);
4112				val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min);
4113			}
4114
4115			args[4] = ctx->i32one;
4116			args[5] = emit_pack_int16(ctx, val[0], val[1]);
4117			args[6] = emit_pack_int16(ctx, val[2], val[3]);
4118			break;
4119		}
4120
4121		default:
4122		case V_028714_SPI_SHADER_32_ABGR:
4123			memcpy(&args[5], values, sizeof(values[0]) * 4);
4124			break;
4125		}
4126	} else
4127		memcpy(&args[5], values, sizeof(values[0]) * 4);
4128
4129	for (unsigned i = 5; i < 9; ++i)
4130		args[i] = to_float(ctx, args[i]);
4131}
4132
4133static void
4134handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
4135{
4136	uint32_t param_count = 0;
4137	unsigned target;
4138	unsigned pos_idx, num_pos_exports = 0;
4139	LLVMValueRef args[9];
4140	LLVMValueRef pos_args[4][9] = { { 0 } };
4141	LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
4142	int i;
4143	const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) |
4144						       (1ull << VARYING_SLOT_CLIP_DIST1) |
4145						       (1ull << VARYING_SLOT_CULL_DIST0) |
4146						       (1ull << VARYING_SLOT_CULL_DIST1));
4147
4148	if (clip_mask) {
4149		LLVMValueRef slots[8];
4150		unsigned j;
4151
4152		if (ctx->shader_info->vs.cull_dist_mask)
4153			ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips;
4154
4155		i = VARYING_SLOT_CLIP_DIST0;
4156		for (j = 0; j < ctx->num_clips; j++)
4157			slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4158							       ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4159		i = VARYING_SLOT_CULL_DIST0;
4160		for (j = 0; j < ctx->num_culls; j++)
4161			slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4162									   ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4163
4164		for (i = ctx->num_clips + ctx->num_culls; i < 8; i++)
4165			slots[i] = LLVMGetUndef(ctx->f32);
4166
4167		if (ctx->num_clips + ctx->num_culls > 4) {
4168			target = V_008DFC_SQ_EXP_POS + 3;
4169			si_llvm_init_export_args(ctx, &slots[4], target, args);
4170			memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4171			       args, sizeof(args));
4172		}
4173
4174		target = V_008DFC_SQ_EXP_POS + 2;
4175		si_llvm_init_export_args(ctx, &slots[0], target, args);
4176		memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4177		       args, sizeof(args));
4178
4179	}
4180
4181	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4182		LLVMValueRef values[4];
4183		if (!(ctx->output_mask & (1ull << i)))
4184			continue;
4185
4186		for (unsigned j = 0; j < 4; j++)
4187			values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4188					      ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4189
4190		if (i == VARYING_SLOT_POS) {
4191			target = V_008DFC_SQ_EXP_POS;
4192		} else if (i == VARYING_SLOT_CLIP_DIST0 ||
4193			   i == VARYING_SLOT_CLIP_DIST1 ||
4194			   i == VARYING_SLOT_CULL_DIST0 ||
4195			   i == VARYING_SLOT_CULL_DIST1) {
4196			continue;
4197		} else if (i == VARYING_SLOT_PSIZ) {
4198			ctx->shader_info->vs.writes_pointsize = true;
4199			psize_value = values[0];
4200			continue;
4201		} else if (i == VARYING_SLOT_LAYER) {
4202			ctx->shader_info->vs.writes_layer = true;
4203			layer_value = values[0];
4204			continue;
4205		} else if (i == VARYING_SLOT_VIEWPORT) {
4206			ctx->shader_info->vs.writes_viewport_index = true;
4207			viewport_index_value = values[0];
4208			continue;
4209		} else if (i >= VARYING_SLOT_VAR0) {
4210			ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
4211			target = V_008DFC_SQ_EXP_PARAM + param_count;
4212			param_count++;
4213		}
4214
4215		si_llvm_init_export_args(ctx, values, target, args);
4216
4217		if (target >= V_008DFC_SQ_EXP_POS &&
4218		    target <= (V_008DFC_SQ_EXP_POS + 3)) {
4219			memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4220			       args, sizeof(args));
4221		} else {
4222			ac_emit_llvm_intrinsic(&ctx->ac,
4223					       "llvm.SI.export",
4224					       ctx->voidt,
4225					       args, 9, 0);
4226		}
4227	}
4228
4229	/* We need to add the position output manually if it's missing. */
4230	if (!pos_args[0][0]) {
4231		pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
4232		pos_args[0][1] = ctx->i32zero; /* EXEC mask */
4233		pos_args[0][2] = ctx->i32zero; /* last export? */
4234		pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
4235		pos_args[0][4] = ctx->i32zero; /* COMPR flag */
4236		pos_args[0][5] = ctx->f32zero; /* X */
4237		pos_args[0][6] = ctx->f32zero; /* Y */
4238		pos_args[0][7] = ctx->f32zero; /* Z */
4239		pos_args[0][8] = ctx->f32one;  /* W */
4240	}
4241
4242	uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) |
4243			 (ctx->shader_info->vs.writes_layer == true ? 4 : 0) |
4244			 (ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0));
4245	if (mask) {
4246		pos_args[1][0] = LLVMConstInt(ctx->i32, mask, false); /* writemask */
4247		pos_args[1][1] = ctx->i32zero;  /* EXEC mask */
4248		pos_args[1][2] = ctx->i32zero;  /* last export? */
4249		pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false);
4250		pos_args[1][4] = ctx->i32zero;  /* COMPR flag */
4251		pos_args[1][5] = ctx->f32zero; /* X */
4252		pos_args[1][6] = ctx->f32zero; /* Y */
4253		pos_args[1][7] = ctx->f32zero; /* Z */
4254		pos_args[1][8] = ctx->f32zero;  /* W */
4255
4256		if (ctx->shader_info->vs.writes_pointsize == true)
4257			pos_args[1][5] = psize_value;
4258		if (ctx->shader_info->vs.writes_layer == true)
4259			pos_args[1][7] = layer_value;
4260		if (ctx->shader_info->vs.writes_viewport_index == true)
4261			pos_args[1][8] = viewport_index_value;
4262	}
4263	for (i = 0; i < 4; i++) {
4264		if (pos_args[i][0])
4265			num_pos_exports++;
4266	}
4267
4268	pos_idx = 0;
4269	for (i = 0; i < 4; i++) {
4270		if (!pos_args[i][0])
4271			continue;
4272
4273		/* Specify the target we are exporting */
4274		pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
4275		if (pos_idx == num_pos_exports)
4276			pos_args[i][2] = ctx->i32one;
4277		ac_emit_llvm_intrinsic(&ctx->ac,
4278				       "llvm.SI.export",
4279				       ctx->voidt,
4280				       pos_args[i], 9, 0);
4281	}
4282
4283	ctx->shader_info->vs.pos_exports = num_pos_exports;
4284	ctx->shader_info->vs.param_exports = param_count;
4285}
4286
4287static void
4288si_export_mrt_color(struct nir_to_llvm_context *ctx,
4289		    LLVMValueRef *color, unsigned param, bool is_last)
4290{
4291	LLVMValueRef args[9];
4292	/* Export */
4293	si_llvm_init_export_args(ctx, color, param,
4294				 args);
4295
4296	if (is_last) {
4297		args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4298		args[2] = ctx->i32one; /* DONE bit */
4299	} else if (args[0] == ctx->i32zero)
4300		return; /* unnecessary NULL export */
4301
4302	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4303			    ctx->voidt, args, 9, 0);
4304}
4305
4306static void
4307si_export_mrt_z(struct nir_to_llvm_context *ctx,
4308		LLVMValueRef depth, LLVMValueRef stencil,
4309		LLVMValueRef samplemask)
4310{
4311	LLVMValueRef args[9];
4312	unsigned mask = 0;
4313	args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4314	args[2] = ctx->i32one; /* DONE bit */
4315	/* Specify the target we are exporting */
4316	args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false);
4317
4318	args[4] = ctx->i32zero; /* COMP flag */
4319	args[5] = LLVMGetUndef(ctx->f32); /* R, depth */
4320	args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
4321	args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */
4322	args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
4323
4324	if (depth) {
4325		args[5] = depth;
4326		mask |= 0x1;
4327	}
4328
4329	if (stencil) {
4330		args[6] = stencil;
4331		mask |= 0x2;
4332	}
4333
4334	if (samplemask) {
4335		args[7] = samplemask;
4336		mask |= 0x04;
4337	}
4338
4339	/* SI (except OLAND) has a bug that it only looks
4340	 * at the X writemask component. */
4341	if (ctx->options->chip_class == SI &&
4342	    ctx->options->family != CHIP_OLAND)
4343		mask |= 0x01;
4344
4345	args[0] = LLVMConstInt(ctx->i32, mask, false);
4346	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4347			    ctx->voidt, args, 9, 0);
4348}
4349
4350static void
4351handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
4352{
4353	unsigned index = 0;
4354	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
4355
4356	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4357		LLVMValueRef values[4];
4358
4359		if (!(ctx->output_mask & (1ull << i)))
4360			continue;
4361
4362		if (i == FRAG_RESULT_DEPTH) {
4363			ctx->shader_info->fs.writes_z = true;
4364			depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
4365							    ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4366		} else if (i == FRAG_RESULT_STENCIL) {
4367			ctx->shader_info->fs.writes_stencil = true;
4368			stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
4369							      ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4370		} else {
4371			bool last = false;
4372			for (unsigned j = 0; j < 4; j++)
4373				values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4374									ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4375
4376			if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
4377				last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
4378
4379			si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
4380			index++;
4381		}
4382	}
4383
4384	if (depth || stencil)
4385		si_export_mrt_z(ctx, depth, stencil, samplemask);
4386	else if (!index)
4387		si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
4388
4389	ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
4390}
4391
4392static void
4393handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
4394{
4395	switch (ctx->stage) {
4396	case MESA_SHADER_VERTEX:
4397		handle_vs_outputs_post(ctx);
4398		break;
4399	case MESA_SHADER_FRAGMENT:
4400		handle_fs_outputs_post(ctx);
4401		break;
4402	default:
4403		break;
4404	}
4405}
4406
4407static void
4408handle_shared_compute_var(struct nir_to_llvm_context *ctx,
4409			  struct nir_variable *variable, uint32_t *offset, int idx)
4410{
4411	unsigned size = glsl_count_attribute_slots(variable->type, false);
4412	variable->data.driver_location = *offset;
4413	*offset += size;
4414}
4415
4416static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
4417{
4418	LLVMPassManagerRef passmgr;
4419	/* Create the pass manager */
4420	passmgr = LLVMCreateFunctionPassManagerForModule(
4421							ctx->module);
4422
4423	/* This pass should eliminate all the load and store instructions */
4424	LLVMAddPromoteMemoryToRegisterPass(passmgr);
4425
4426	/* Add some optimization passes */
4427	LLVMAddScalarReplAggregatesPass(passmgr);
4428	LLVMAddLICMPass(passmgr);
4429	LLVMAddAggressiveDCEPass(passmgr);
4430	LLVMAddCFGSimplificationPass(passmgr);
4431	LLVMAddInstructionCombiningPass(passmgr);
4432
4433	/* Run the pass */
4434	LLVMInitializeFunctionPassManager(passmgr);
4435	LLVMRunFunctionPassManager(passmgr, ctx->main_function);
4436	LLVMFinalizeFunctionPassManager(passmgr);
4437
4438	LLVMDisposeBuilder(ctx->builder);
4439	LLVMDisposePassManager(passmgr);
4440}
4441
4442static
4443LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
4444                                       struct nir_shader *nir,
4445                                       struct ac_shader_variant_info *shader_info,
4446                                       const struct ac_nir_compiler_options *options)
4447{
4448	struct nir_to_llvm_context ctx = {0};
4449	struct nir_function *func;
4450	unsigned i;
4451	ctx.options = options;
4452	ctx.shader_info = shader_info;
4453	ctx.context = LLVMContextCreate();
4454	ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
4455
4456	ac_llvm_context_init(&ctx.ac, ctx.context);
4457	ctx.ac.module = ctx.module;
4458
4459	ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
4460
4461	memset(shader_info, 0, sizeof(*shader_info));
4462
4463	LLVMSetTarget(ctx.module, "amdgcn--");
4464
4465	LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
4466	char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
4467	LLVMSetDataLayout(ctx.module, data_layout_str);
4468	LLVMDisposeTargetData(data_layout);
4469	LLVMDisposeMessage(data_layout_str);
4470
4471	setup_types(&ctx);
4472
4473	ctx.builder = LLVMCreateBuilderInContext(ctx.context);
4474	ctx.ac.builder = ctx.builder;
4475	ctx.stage = nir->stage;
4476
4477	for (i = 0; i < AC_UD_MAX_SETS; i++)
4478		shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
4479	for (i = 0; i < AC_UD_MAX_UD; i++)
4480		shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
4481
4482	create_function(&ctx);
4483
4484	if (nir->stage == MESA_SHADER_COMPUTE) {
4485		int num_shared = 0;
4486		nir_foreach_variable(variable, &nir->shared)
4487			num_shared++;
4488		if (num_shared) {
4489			int idx = 0;
4490			uint32_t shared_size = 0;
4491			LLVMValueRef var;
4492			LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
4493			nir_foreach_variable(variable, &nir->shared) {
4494				handle_shared_compute_var(&ctx, variable, &shared_size, idx);
4495				idx++;
4496			}
4497
4498			shared_size *= 16;
4499			var = LLVMAddGlobalInAddressSpace(ctx.module,
4500							  LLVMArrayType(ctx.i8, shared_size),
4501							  "compute_lds",
4502							  LOCAL_ADDR_SPACE);
4503			LLVMSetAlignment(var, 4);
4504			ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
4505		}
4506	}
4507
4508	nir_foreach_variable(variable, &nir->inputs)
4509		handle_shader_input_decl(&ctx, variable);
4510
4511	if (nir->stage == MESA_SHADER_FRAGMENT)
4512		handle_fs_inputs_pre(&ctx, nir);
4513
4514	nir_foreach_variable(variable, &nir->outputs)
4515		handle_shader_output_decl(&ctx, variable);
4516
4517	ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4518	                                   _mesa_key_pointer_equal);
4519	ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4520	                                   _mesa_key_pointer_equal);
4521
4522	func = (struct nir_function *)exec_list_get_head(&nir->functions);
4523
4524	setup_locals(&ctx, func);
4525
4526	visit_cf_list(&ctx, &func->impl->body);
4527	phi_post_pass(&ctx);
4528
4529	handle_shader_outputs_post(&ctx);
4530	LLVMBuildRetVoid(ctx.builder);
4531
4532	ac_llvm_finalize_module(&ctx);
4533	free(ctx.locals);
4534	ralloc_free(ctx.defs);
4535	ralloc_free(ctx.phis);
4536
4537	return ctx.module;
4538}
4539
4540static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
4541{
4542	unsigned *retval = (unsigned *)context;
4543	LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
4544	char *description = LLVMGetDiagInfoDescription(di);
4545
4546	if (severity == LLVMDSError) {
4547		*retval = 1;
4548		fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
4549		        description);
4550	}
4551
4552	LLVMDisposeMessage(description);
4553}
4554
4555static unsigned ac_llvm_compile(LLVMModuleRef M,
4556                                struct ac_shader_binary *binary,
4557                                LLVMTargetMachineRef tm)
4558{
4559	unsigned retval = 0;
4560	char *err;
4561	LLVMContextRef llvm_ctx;
4562	LLVMMemoryBufferRef out_buffer;
4563	unsigned buffer_size;
4564	const char *buffer_data;
4565	LLVMBool mem_err;
4566
4567	/* Setup Diagnostic Handler*/
4568	llvm_ctx = LLVMGetModuleContext(M);
4569
4570	LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
4571	                                &retval);
4572
4573	/* Compile IR*/
4574	mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
4575	                                              &err, &out_buffer);
4576
4577	/* Process Errors/Warnings */
4578	if (mem_err) {
4579		fprintf(stderr, "%s: %s", __FUNCTION__, err);
4580		free(err);
4581		retval = 1;
4582		goto out;
4583	}
4584
4585	/* Extract Shader Code*/
4586	buffer_size = LLVMGetBufferSize(out_buffer);
4587	buffer_data = LLVMGetBufferStart(out_buffer);
4588
4589	ac_elf_read(buffer_data, buffer_size, binary);
4590
4591	/* Clean up */
4592	LLVMDisposeMemoryBuffer(out_buffer);
4593
4594out:
4595	return retval;
4596}
4597
4598static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
4599				   LLVMModuleRef llvm_module,
4600				   struct ac_shader_binary *binary,
4601				   struct ac_shader_config *config,
4602				   struct ac_shader_variant_info *shader_info,
4603				   gl_shader_stage stage,
4604				   bool dump_shader)
4605{
4606	if (dump_shader)
4607		LLVMDumpModule(llvm_module);
4608
4609	memset(binary, 0, sizeof(*binary));
4610	int v = ac_llvm_compile(llvm_module, binary, tm);
4611	if (v) {
4612		fprintf(stderr, "compile failed\n");
4613	}
4614
4615	if (dump_shader)
4616		fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
4617
4618	ac_shader_binary_read_config(binary, config, 0);
4619
4620	LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
4621	LLVMDisposeModule(llvm_module);
4622	LLVMContextDispose(ctx);
4623
4624	if (stage == MESA_SHADER_FRAGMENT) {
4625		shader_info->num_input_vgprs = 0;
4626		if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
4627			shader_info->num_input_vgprs += 2;
4628		if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
4629			shader_info->num_input_vgprs += 2;
4630		if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
4631			shader_info->num_input_vgprs += 2;
4632		if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
4633			shader_info->num_input_vgprs += 3;
4634		if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
4635			shader_info->num_input_vgprs += 2;
4636		if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
4637			shader_info->num_input_vgprs += 2;
4638		if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
4639			shader_info->num_input_vgprs += 2;
4640		if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
4641			shader_info->num_input_vgprs += 1;
4642		if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
4643			shader_info->num_input_vgprs += 1;
4644		if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
4645			shader_info->num_input_vgprs += 1;
4646		if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
4647			shader_info->num_input_vgprs += 1;
4648		if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
4649			shader_info->num_input_vgprs += 1;
4650		if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
4651			shader_info->num_input_vgprs += 1;
4652		if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
4653			shader_info->num_input_vgprs += 1;
4654		if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
4655			shader_info->num_input_vgprs += 1;
4656		if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
4657			shader_info->num_input_vgprs += 1;
4658	}
4659	config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
4660
4661	/* +3 for scratch wave offset and VCC */
4662	config->num_sgprs = MAX2(config->num_sgprs,
4663	                         shader_info->num_input_sgprs + 3);
4664}
4665
4666void ac_compile_nir_shader(LLVMTargetMachineRef tm,
4667                           struct ac_shader_binary *binary,
4668                           struct ac_shader_config *config,
4669                           struct ac_shader_variant_info *shader_info,
4670                           struct nir_shader *nir,
4671                           const struct ac_nir_compiler_options *options,
4672			   bool dump_shader)
4673{
4674
4675	LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
4676	                                                     options);
4677
4678	ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader);
4679	switch (nir->stage) {
4680	case MESA_SHADER_COMPUTE:
4681		for (int i = 0; i < 3; ++i)
4682			shader_info->cs.block_size[i] = nir->info->cs.local_size[i];
4683		break;
4684	case MESA_SHADER_FRAGMENT:
4685		shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests;
4686		break;
4687	default:
4688		break;
4689	}
4690}
4691