1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24#include "si_shader_internal.h"
25#include "gallivm/lp_bld_const.h"
26#include "gallivm/lp_bld_intr.h"
27#include "gallivm/lp_bld_gather.h"
28#include "tgsi/tgsi_parse.h"
29
30static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
31			       struct lp_build_emit_data *emit_data)
32{
33	const struct tgsi_full_instruction *inst = emit_data->inst;
34	struct gallivm_state *gallivm = bld_base->base.gallivm;
35	LLVMBuilderRef builder = gallivm->builder;
36	unsigned i;
37	LLVMValueRef conds[TGSI_NUM_CHANNELS];
38
39	for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
40		LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
41		conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
42					bld_base->base.zero, "");
43	}
44
45	/* Or the conditions together */
46	for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
47		conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
48	}
49
50	emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
51	emit_data->arg_count = 1;
52	emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
53					lp_build_const_float(gallivm, -1.0f),
54					bld_base->base.zero, "");
55}
56
57static void kil_emit(const struct lp_build_tgsi_action *action,
58		     struct lp_build_tgsi_context *bld_base,
59		     struct lp_build_emit_data *emit_data)
60{
61	unsigned i;
62	for (i = 0; i < emit_data->arg_count; i++) {
63		emit_data->output[i] = lp_build_intrinsic_unary(
64			bld_base->base.gallivm->builder,
65			action->intr_name,
66			emit_data->dst_type, emit_data->args[i]);
67	}
68}
69
70static void emit_icmp(const struct lp_build_tgsi_action *action,
71		      struct lp_build_tgsi_context *bld_base,
72		      struct lp_build_emit_data *emit_data)
73{
74	unsigned pred;
75	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
76	LLVMContextRef context = bld_base->base.gallivm->context;
77
78	switch (emit_data->inst->Instruction.Opcode) {
79	case TGSI_OPCODE_USEQ:
80	case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
81	case TGSI_OPCODE_USNE:
82	case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
83	case TGSI_OPCODE_USGE:
84	case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
85	case TGSI_OPCODE_USLT:
86	case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
87	case TGSI_OPCODE_ISGE:
88	case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
89	case TGSI_OPCODE_ISLT:
90	case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
91	default:
92		assert(!"unknown instruction");
93		pred = 0;
94		break;
95	}
96
97	LLVMValueRef v = LLVMBuildICmp(builder, pred,
98			emit_data->args[0], emit_data->args[1],"");
99
100	v = LLVMBuildSExtOrBitCast(builder, v,
101			LLVMInt32TypeInContext(context), "");
102
103	emit_data->output[emit_data->chan] = v;
104}
105
106static void emit_ucmp(const struct lp_build_tgsi_action *action,
107		      struct lp_build_tgsi_context *bld_base,
108		      struct lp_build_emit_data *emit_data)
109{
110	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
111
112	LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
113					     bld_base->uint_bld.elem_type, "");
114
115	LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
116				       bld_base->uint_bld.zero, "");
117
118	emit_data->output[emit_data->chan] =
119		LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
120}
121
122static void emit_cmp(const struct lp_build_tgsi_action *action,
123		     struct lp_build_tgsi_context *bld_base,
124		     struct lp_build_emit_data *emit_data)
125{
126	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
127	LLVMValueRef cond, *args = emit_data->args;
128
129	cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
130			     bld_base->base.zero, "");
131
132	emit_data->output[emit_data->chan] =
133		LLVMBuildSelect(builder, cond, args[1], args[2], "");
134}
135
136static void emit_set_cond(const struct lp_build_tgsi_action *action,
137			  struct lp_build_tgsi_context *bld_base,
138			  struct lp_build_emit_data *emit_data)
139{
140	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
141	LLVMRealPredicate pred;
142	LLVMValueRef cond;
143
144	/* Use ordered for everything but NE (which is usual for
145	 * float comparisons)
146	 */
147	switch (emit_data->inst->Instruction.Opcode) {
148	case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
149	case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
150	case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
151	case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
152	case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
153	case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
154	default: assert(!"unknown instruction"); pred = 0; break;
155	}
156
157	cond = LLVMBuildFCmp(builder,
158		pred, emit_data->args[0], emit_data->args[1], "");
159
160	emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
161		cond, bld_base->base.one, bld_base->base.zero, "");
162}
163
164static void emit_fcmp(const struct lp_build_tgsi_action *action,
165		      struct lp_build_tgsi_context *bld_base,
166		      struct lp_build_emit_data *emit_data)
167{
168	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
169	LLVMContextRef context = bld_base->base.gallivm->context;
170	LLVMRealPredicate pred;
171
172	/* Use ordered for everything but NE (which is usual for
173	 * float comparisons)
174	 */
175	switch (emit_data->inst->Instruction.Opcode) {
176	case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
177	case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
178	case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
179	case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
180	default: assert(!"unknown instruction"); pred = 0; break;
181	}
182
183	LLVMValueRef v = LLVMBuildFCmp(builder, pred,
184			emit_data->args[0], emit_data->args[1],"");
185
186	v = LLVMBuildSExtOrBitCast(builder, v,
187			LLVMInt32TypeInContext(context), "");
188
189	emit_data->output[emit_data->chan] = v;
190}
191
192static void emit_dcmp(const struct lp_build_tgsi_action *action,
193		      struct lp_build_tgsi_context *bld_base,
194		      struct lp_build_emit_data *emit_data)
195{
196	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
197	LLVMContextRef context = bld_base->base.gallivm->context;
198	LLVMRealPredicate pred;
199
200	/* Use ordered for everything but NE (which is usual for
201	 * float comparisons)
202	 */
203	switch (emit_data->inst->Instruction.Opcode) {
204	case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
205	case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
206	case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
207	case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
208	default: assert(!"unknown instruction"); pred = 0; break;
209	}
210
211	LLVMValueRef v = LLVMBuildFCmp(builder, pred,
212			emit_data->args[0], emit_data->args[1],"");
213
214	v = LLVMBuildSExtOrBitCast(builder, v,
215			LLVMInt32TypeInContext(context), "");
216
217	emit_data->output[emit_data->chan] = v;
218}
219
220static void emit_not(const struct lp_build_tgsi_action *action,
221		     struct lp_build_tgsi_context *bld_base,
222		     struct lp_build_emit_data *emit_data)
223{
224	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
225	LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
226			emit_data->args[0]);
227	emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
228}
229
230static void emit_arl(const struct lp_build_tgsi_action *action,
231		     struct lp_build_tgsi_context *bld_base,
232		     struct lp_build_emit_data *emit_data)
233{
234	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
235	LLVMValueRef floor_index =  lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
236	emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
237			floor_index, bld_base->base.int_elem_type , "");
238}
239
240static void emit_and(const struct lp_build_tgsi_action *action,
241		     struct lp_build_tgsi_context *bld_base,
242		     struct lp_build_emit_data *emit_data)
243{
244	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
245	emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
246			emit_data->args[0], emit_data->args[1], "");
247}
248
249static void emit_or(const struct lp_build_tgsi_action *action,
250		    struct lp_build_tgsi_context *bld_base,
251		    struct lp_build_emit_data *emit_data)
252{
253	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
254	emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
255			emit_data->args[0], emit_data->args[1], "");
256}
257
258static void emit_uadd(const struct lp_build_tgsi_action *action,
259		      struct lp_build_tgsi_context *bld_base,
260		      struct lp_build_emit_data *emit_data)
261{
262	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
263	emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
264			emit_data->args[0], emit_data->args[1], "");
265}
266
267static void emit_udiv(const struct lp_build_tgsi_action *action,
268		      struct lp_build_tgsi_context *bld_base,
269		      struct lp_build_emit_data *emit_data)
270{
271	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
272	emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
273			emit_data->args[0], emit_data->args[1], "");
274}
275
276static void emit_idiv(const struct lp_build_tgsi_action *action,
277		      struct lp_build_tgsi_context *bld_base,
278		      struct lp_build_emit_data *emit_data)
279{
280	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
281	emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
282			emit_data->args[0], emit_data->args[1], "");
283}
284
285static void emit_mod(const struct lp_build_tgsi_action *action,
286		     struct lp_build_tgsi_context *bld_base,
287		     struct lp_build_emit_data *emit_data)
288{
289	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
290	emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
291			emit_data->args[0], emit_data->args[1], "");
292}
293
294static void emit_umod(const struct lp_build_tgsi_action *action,
295		      struct lp_build_tgsi_context *bld_base,
296		      struct lp_build_emit_data *emit_data)
297{
298	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
299	emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
300			emit_data->args[0], emit_data->args[1], "");
301}
302
303static void emit_shl(const struct lp_build_tgsi_action *action,
304		     struct lp_build_tgsi_context *bld_base,
305		     struct lp_build_emit_data *emit_data)
306{
307	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
308	emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
309			emit_data->args[0], emit_data->args[1], "");
310}
311
312static void emit_ushr(const struct lp_build_tgsi_action *action,
313		      struct lp_build_tgsi_context *bld_base,
314		      struct lp_build_emit_data *emit_data)
315{
316	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
317	emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
318			emit_data->args[0], emit_data->args[1], "");
319}
320static void emit_ishr(const struct lp_build_tgsi_action *action,
321		      struct lp_build_tgsi_context *bld_base,
322		      struct lp_build_emit_data *emit_data)
323{
324	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
325	emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
326			emit_data->args[0], emit_data->args[1], "");
327}
328
329static void emit_xor(const struct lp_build_tgsi_action *action,
330		     struct lp_build_tgsi_context *bld_base,
331		     struct lp_build_emit_data *emit_data)
332{
333	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
334	emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
335			emit_data->args[0], emit_data->args[1], "");
336}
337
338static void emit_ssg(const struct lp_build_tgsi_action *action,
339		     struct lp_build_tgsi_context *bld_base,
340		     struct lp_build_emit_data *emit_data)
341{
342	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
343
344	LLVMValueRef cmp, val;
345
346	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
347		cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, "");
348		val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], "");
349		cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, "");
350		val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
351	} else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
352		cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
353		val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
354		cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
355		val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
356	} else { // float SSG
357		cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
358		val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
359		cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
360		val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
361	}
362
363	emit_data->output[emit_data->chan] = val;
364}
365
366static void emit_ineg(const struct lp_build_tgsi_action *action,
367		      struct lp_build_tgsi_context *bld_base,
368		      struct lp_build_emit_data *emit_data)
369{
370	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
371	emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
372			emit_data->args[0], "");
373}
374
375static void emit_dneg(const struct lp_build_tgsi_action *action,
376		      struct lp_build_tgsi_context *bld_base,
377		      struct lp_build_emit_data *emit_data)
378{
379	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
380	emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
381			emit_data->args[0], "");
382}
383
384static void emit_frac(const struct lp_build_tgsi_action *action,
385		      struct lp_build_tgsi_context *bld_base,
386		      struct lp_build_emit_data *emit_data)
387{
388	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
389	char *intr;
390
391	if (emit_data->info->opcode == TGSI_OPCODE_FRC)
392		intr = "llvm.floor.f32";
393	else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
394		intr = "llvm.floor.f64";
395	else {
396		assert(0);
397		return;
398	}
399
400	LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
401						&emit_data->args[0], 1,
402						LP_FUNC_ATTR_READNONE);
403	emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
404			emit_data->args[0], floor, "");
405}
406
407static void emit_f2i(const struct lp_build_tgsi_action *action,
408		     struct lp_build_tgsi_context *bld_base,
409		     struct lp_build_emit_data *emit_data)
410{
411	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
412	emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
413			emit_data->args[0], bld_base->int_bld.elem_type, "");
414}
415
416static void emit_f2u(const struct lp_build_tgsi_action *action,
417		     struct lp_build_tgsi_context *bld_base,
418		     struct lp_build_emit_data *emit_data)
419{
420	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
421	emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
422			emit_data->args[0], bld_base->uint_bld.elem_type, "");
423}
424
425static void emit_i2f(const struct lp_build_tgsi_action *action,
426		     struct lp_build_tgsi_context *bld_base,
427		     struct lp_build_emit_data *emit_data)
428{
429	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
430	emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
431			emit_data->args[0], bld_base->base.elem_type, "");
432}
433
434static void emit_u2f(const struct lp_build_tgsi_action *action,
435		     struct lp_build_tgsi_context *bld_base,
436		     struct lp_build_emit_data *emit_data)
437{
438	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
439	emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
440			emit_data->args[0], bld_base->base.elem_type, "");
441}
442
443static void
444build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
445			   struct lp_build_tgsi_context *bld_base,
446			   struct lp_build_emit_data *emit_data)
447{
448	struct lp_build_context *base = &bld_base->base;
449	emit_data->output[emit_data->chan] =
450		lp_build_intrinsic(base->gallivm->builder, action->intr_name,
451				   emit_data->dst_type, emit_data->args,
452				   emit_data->arg_count, LP_FUNC_ATTR_READNONE);
453}
454
455static void emit_bfi(const struct lp_build_tgsi_action *action,
456		     struct lp_build_tgsi_context *bld_base,
457		     struct lp_build_emit_data *emit_data)
458{
459	struct gallivm_state *gallivm = bld_base->base.gallivm;
460	LLVMBuilderRef builder = gallivm->builder;
461	LLVMValueRef bfi_args[3];
462	LLVMValueRef bfi_sm5;
463	LLVMValueRef cond;
464
465	// Calculate the bitmask: (((1 << src3) - 1) << src2
466	bfi_args[0] = LLVMBuildShl(builder,
467				   LLVMBuildSub(builder,
468						LLVMBuildShl(builder,
469							     bld_base->int_bld.one,
470							     emit_data->args[3], ""),
471						bld_base->int_bld.one, ""),
472				   emit_data->args[2], "");
473
474	bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
475				   emit_data->args[2], "");
476
477	bfi_args[2] = emit_data->args[0];
478
479	/* Calculate:
480	 *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
481	 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
482	 */
483	bfi_sm5 =
484		LLVMBuildXor(builder, bfi_args[2],
485			LLVMBuildAnd(builder, bfi_args[0],
486				LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
487					     ""), ""), "");
488
489	/* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
490	 * uses the convenient V_BFI lowering for the above, which follows SM5
491	 * and disagrees with GLSL semantics when bits (src3) is 32.
492	 */
493	cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
494			     lp_build_const_int32(gallivm, 32), "");
495	emit_data->output[emit_data->chan] =
496		LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
497}
498
499static void emit_bfe(const struct lp_build_tgsi_action *action,
500		     struct lp_build_tgsi_context *bld_base,
501		     struct lp_build_emit_data *emit_data)
502{
503	struct gallivm_state *gallivm = bld_base->base.gallivm;
504	LLVMBuilderRef builder = gallivm->builder;
505	LLVMValueRef bfe_sm5;
506	LLVMValueRef cond;
507
508	bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
509				     emit_data->dst_type, emit_data->args,
510				     emit_data->arg_count, LP_FUNC_ATTR_READNONE);
511
512	/* Correct for GLSL semantics. */
513	cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
514			     lp_build_const_int32(gallivm, 32), "");
515	emit_data->output[emit_data->chan] =
516		LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
517}
518
519/* this is ffs in C */
520static void emit_lsb(const struct lp_build_tgsi_action *action,
521		     struct lp_build_tgsi_context *bld_base,
522		     struct lp_build_emit_data *emit_data)
523{
524	struct gallivm_state *gallivm = bld_base->base.gallivm;
525	LLVMBuilderRef builder = gallivm->builder;
526	LLVMValueRef args[2] = {
527		emit_data->args[0],
528
529		/* The value of 1 means that ffs(x=0) = undef, so LLVM won't
530		 * add special code to check for x=0. The reason is that
531		 * the LLVM behavior for x=0 is different from what we
532		 * need here. However, LLVM also assumes that ffs(x) is
533		 * in [0, 31], but GLSL expects that ffs(0) = -1, so
534		 * a conditional assignment to handle 0 is still required.
535		 */
536		LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
537	};
538
539	LLVMValueRef lsb =
540		lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
541				emit_data->dst_type, args, ARRAY_SIZE(args),
542				LP_FUNC_ATTR_READNONE);
543
544	/* TODO: We need an intrinsic to skip this conditional. */
545	/* Check for zero: */
546	emit_data->output[emit_data->chan] =
547		LLVMBuildSelect(builder,
548				LLVMBuildICmp(builder, LLVMIntEQ, args[0],
549					      bld_base->uint_bld.zero, ""),
550				lp_build_const_int32(gallivm, -1), lsb, "");
551}
552
553/* Find the last bit set. */
554static void emit_umsb(const struct lp_build_tgsi_action *action,
555		      struct lp_build_tgsi_context *bld_base,
556		      struct lp_build_emit_data *emit_data)
557{
558	struct gallivm_state *gallivm = bld_base->base.gallivm;
559	LLVMBuilderRef builder = gallivm->builder;
560	LLVMValueRef args[2] = {
561		emit_data->args[0],
562		/* Don't generate code for handling zero: */
563		LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
564	};
565
566	LLVMValueRef msb =
567		lp_build_intrinsic(builder, "llvm.ctlz.i32",
568				emit_data->dst_type, args, ARRAY_SIZE(args),
569				LP_FUNC_ATTR_READNONE);
570
571	/* The HW returns the last bit index from MSB, but TGSI wants
572	 * the index from LSB. Invert it by doing "31 - msb". */
573	msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
574			   msb, "");
575
576	/* Check for zero: */
577	emit_data->output[emit_data->chan] =
578		LLVMBuildSelect(builder,
579				LLVMBuildICmp(builder, LLVMIntEQ, args[0],
580					      bld_base->uint_bld.zero, ""),
581				lp_build_const_int32(gallivm, -1), msb, "");
582}
583
584/* Find the last bit opposite of the sign bit. */
585static void emit_imsb(const struct lp_build_tgsi_action *action,
586		      struct lp_build_tgsi_context *bld_base,
587		      struct lp_build_emit_data *emit_data)
588{
589	struct gallivm_state *gallivm = bld_base->base.gallivm;
590	LLVMBuilderRef builder = gallivm->builder;
591	LLVMValueRef arg = emit_data->args[0];
592
593	LLVMValueRef msb =
594		lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
595				emit_data->dst_type, &arg, 1,
596				LP_FUNC_ATTR_READNONE);
597
598	/* The HW returns the last bit index from MSB, but TGSI wants
599	 * the index from LSB. Invert it by doing "31 - msb". */
600	msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
601			   msb, "");
602
603	/* If arg == 0 || arg == -1 (0xffffffff), return -1. */
604	LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
605
606	LLVMValueRef cond =
607		LLVMBuildOr(builder,
608			    LLVMBuildICmp(builder, LLVMIntEQ, arg,
609					  bld_base->uint_bld.zero, ""),
610			    LLVMBuildICmp(builder, LLVMIntEQ, arg,
611					  all_ones, ""), "");
612
613	emit_data->output[emit_data->chan] =
614		LLVMBuildSelect(builder, cond, all_ones, msb, "");
615}
616
617static void emit_iabs(const struct lp_build_tgsi_action *action,
618		      struct lp_build_tgsi_context *bld_base,
619		      struct lp_build_emit_data *emit_data)
620{
621	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
622
623	emit_data->output[emit_data->chan] =
624		lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
625					  emit_data->args[0],
626					  LLVMBuildNeg(builder,
627						       emit_data->args[0], ""));
628}
629
630static void emit_minmax_int(const struct lp_build_tgsi_action *action,
631			    struct lp_build_tgsi_context *bld_base,
632			    struct lp_build_emit_data *emit_data)
633{
634	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
635	LLVMIntPredicate op;
636
637	switch (emit_data->info->opcode) {
638	default:
639		assert(0);
640	case TGSI_OPCODE_IMAX:
641	case TGSI_OPCODE_I64MAX:
642		op = LLVMIntSGT;
643		break;
644	case TGSI_OPCODE_IMIN:
645	case TGSI_OPCODE_I64MIN:
646		op = LLVMIntSLT;
647		break;
648	case TGSI_OPCODE_UMAX:
649	case TGSI_OPCODE_U64MAX:
650		op = LLVMIntUGT;
651		break;
652	case TGSI_OPCODE_UMIN:
653	case TGSI_OPCODE_U64MIN:
654		op = LLVMIntULT;
655		break;
656	}
657
658	emit_data->output[emit_data->chan] =
659		LLVMBuildSelect(builder,
660				LLVMBuildICmp(builder, op, emit_data->args[0],
661					      emit_data->args[1], ""),
662				emit_data->args[0],
663				emit_data->args[1], "");
664}
665
666static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
667			    struct lp_build_emit_data *emit_data)
668{
669	emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
670						 0, TGSI_CHAN_X);
671	emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
672						 0, TGSI_CHAN_Y);
673}
674
675static void emit_pk2h(const struct lp_build_tgsi_action *action,
676		      struct lp_build_tgsi_context *bld_base,
677		      struct lp_build_emit_data *emit_data)
678{
679	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
680	LLVMContextRef context = bld_base->base.gallivm->context;
681	struct lp_build_context *uint_bld = &bld_base->uint_bld;
682	LLVMTypeRef fp16, i16;
683	LLVMValueRef const16, comp[2];
684	unsigned i;
685
686	fp16 = LLVMHalfTypeInContext(context);
687	i16 = LLVMInt16TypeInContext(context);
688	const16 = lp_build_const_int32(uint_bld->gallivm, 16);
689
690	for (i = 0; i < 2; i++) {
691		comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
692		comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
693		comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
694	}
695
696	comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
697	comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
698
699	emit_data->output[emit_data->chan] = comp[0];
700}
701
702static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
703			    struct lp_build_emit_data *emit_data)
704{
705	emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
706						 0, TGSI_CHAN_X);
707}
708
709static void emit_up2h(const struct lp_build_tgsi_action *action,
710		      struct lp_build_tgsi_context *bld_base,
711		      struct lp_build_emit_data *emit_data)
712{
713	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
714	LLVMContextRef context = bld_base->base.gallivm->context;
715	struct lp_build_context *uint_bld = &bld_base->uint_bld;
716	LLVMTypeRef fp16, i16;
717	LLVMValueRef const16, input, val;
718	unsigned i;
719
720	fp16 = LLVMHalfTypeInContext(context);
721	i16 = LLVMInt16TypeInContext(context);
722	const16 = lp_build_const_int32(uint_bld->gallivm, 16);
723	input = emit_data->args[0];
724
725	for (i = 0; i < 2; i++) {
726		val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
727		val = LLVMBuildTrunc(builder, val, i16, "");
728		val = LLVMBuildBitCast(builder, val, fp16, "");
729		emit_data->output[i] =
730			LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
731	}
732}
733
734static void emit_fdiv(const struct lp_build_tgsi_action *action,
735		      struct lp_build_tgsi_context *bld_base,
736		      struct lp_build_emit_data *emit_data)
737{
738	struct si_shader_context *ctx = si_shader_context(bld_base);
739
740	emit_data->output[emit_data->chan] =
741		LLVMBuildFDiv(bld_base->base.gallivm->builder,
742			      emit_data->args[0], emit_data->args[1], "");
743
744	/* Use v_rcp_f32 instead of precise division. */
745	if (HAVE_LLVM >= 0x0309 &&
746	    !LLVMIsConstant(emit_data->output[emit_data->chan]))
747		LLVMSetMetadata(emit_data->output[emit_data->chan],
748				ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
749}
750
751/* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
752 * the target machine. f64 needs global unsafe math flags to get rsq. */
753static void emit_rsq(const struct lp_build_tgsi_action *action,
754		     struct lp_build_tgsi_context *bld_base,
755		     struct lp_build_emit_data *emit_data)
756{
757	LLVMValueRef sqrt =
758		lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
759					 emit_data->args[0]);
760
761	emit_data->output[emit_data->chan] =
762		lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
763					  bld_base->base.one, sqrt);
764}
765
766void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
767{
768	lp_set_default_actions(bld_base);
769
770	bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
771	bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
772	bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
773	bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
774	bld_base->op_actions[TGSI_OPCODE_BREV].intr_name =
775		HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev";
776	bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
777	bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
778	bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
779	bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name =
780		HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp.";
781	bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
782	bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
783	bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
784	bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
785	bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
786	bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
787	bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
788	bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
789	bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
790	bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
791	bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
792	bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
793	bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
794	bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
795	bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
796	bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name =
797		HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.rsq.f64" : "llvm.AMDGPU.rsq.f64";
798	bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
799	bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
800	bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
801	bld_base->op_actions[TGSI_OPCODE_EX2].intr_name =
802		HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp.";
803	bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
804	bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
805	bld_base->op_actions[TGSI_OPCODE_FMA].emit =
806		bld_base->op_actions[TGSI_OPCODE_MAD].emit;
807	bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
808	bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
809	bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
810	bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
811	bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
812	bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
813	bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
814	bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
815	bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
816	bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
817	bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
818	bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
819	bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
820	bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
821	bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
822	bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
823	bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
824	bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
825	bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
826	bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
827	bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
828	bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
829	bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
830	bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
831	bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
832	bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
833	bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
834	bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
835	bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
836	bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
837	bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
838	bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
839	bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
840	bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
841	bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
842	bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
843	bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
844	bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
845	bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
846	bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
847	bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
848	bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
849	bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
850	bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
851	bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
852	bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
853	bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
854	bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
855	bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
856	bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
857	bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
858	bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
859	bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
860	bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
861	bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
862	bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
863	bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
864	bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
865	bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
866	bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
867	bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
868	bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
869	bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
870	bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
871	bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
872	bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
873	bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
874	bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
875	bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
876	bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
877	bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
878	bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
879	bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
880	bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
881	bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
882	bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
883
884	bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
885	bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
886	bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
887	bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
888	bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
889	bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
890	bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
891
892	bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
893	bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
894	bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
895	bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
896	bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
897	bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
898
899	bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
900	bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
901	bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
902	bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
903
904	bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
905	bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
906	bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
907	bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
908}
909