1/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Tom Stellard <thomas.stellard@amd.com>
24 *
25 */
26#include "radeon_llvm.h"
27
28#include "gallivm/lp_bld_const.h"
29#include "gallivm/lp_bld_gather.h"
30#include "gallivm/lp_bld_flow.h"
31#include "gallivm/lp_bld_init.h"
32#include "gallivm/lp_bld_intr.h"
33#include "gallivm/lp_bld_swizzle.h"
34#include "tgsi/tgsi_info.h"
35#include "tgsi/tgsi_parse.h"
36#include "util/u_math.h"
37#include "util/u_memory.h"
38#include "util/u_debug.h"
39
40#include <llvm-c/Core.h>
41#include <llvm-c/Transforms/Scalar.h>
42
43static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx)
44{
45	return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
46}
47
48static struct radeon_llvm_branch * get_current_branch(
49	struct radeon_llvm_context * ctx)
50{
51	return ctx->branch_depth > 0 ?
52			ctx->branch + (ctx->branch_depth - 1) : NULL;
53}
54
55unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
56{
57 return (index * 4) + chan;
58}
59
60static LLVMValueRef emit_swizzle(
61	struct lp_build_tgsi_context * bld_base,
62        LLVMValueRef value,
63	unsigned swizzle_x,
64	unsigned swizzle_y,
65	unsigned swizzle_z,
66	unsigned swizzle_w)
67{
68	LLVMValueRef swizzles[4];
69	LLVMTypeRef i32t =
70		LLVMInt32TypeInContext(bld_base->base.gallivm->context);
71
72	swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
73	swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
74	swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
75	swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
76
77	return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
78		value,
79		LLVMGetUndef(LLVMTypeOf(value)),
80		LLVMConstVector(swizzles, 4), "");
81}
82
83static LLVMValueRef
84emit_array_index(
85	struct lp_build_tgsi_soa_context *bld,
86	const struct tgsi_full_src_register *reg,
87	unsigned swizzle)
88{
89	struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
90
91	LLVMValueRef addr = LLVMBuildLoad(gallivm->builder,
92	bld->addr[reg->Indirect.Index][swizzle], "");
93	LLVMValueRef offset = lp_build_const_int32(gallivm, reg->Register.Index);
94	LLVMValueRef hw_index = LLVMBuildAdd(gallivm->builder, addr, offset, "");
95	LLVMValueRef soa_index = LLVMBuildMul(gallivm->builder, hw_index,
96	lp_build_const_int32(gallivm, 4), "");
97	LLVMValueRef array_index = LLVMBuildAdd(gallivm->builder, soa_index,
98	lp_build_const_int32(gallivm, swizzle), "");
99
100	return array_index;
101}
102
103static LLVMValueRef
104emit_fetch_immediate(
105	struct lp_build_tgsi_context *bld_base,
106	const struct tgsi_full_src_register *reg,
107	enum tgsi_opcode_type type,
108	unsigned swizzle)
109{
110	LLVMTypeRef ctype;
111	LLVMContextRef ctx = bld_base->base.gallivm->context;
112
113	switch (type) {
114	case TGSI_TYPE_UNSIGNED:
115	case TGSI_TYPE_SIGNED:
116		ctype = LLVMInt32TypeInContext(ctx);
117		break;
118	case TGSI_TYPE_UNTYPED:
119	case TGSI_TYPE_FLOAT:
120		ctype = LLVMFloatTypeInContext(ctx);
121		break;
122	default:
123		ctype = 0;
124		break;
125	}
126
127	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
128	return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
129}
130
131static LLVMValueRef
132emit_fetch_input(
133	struct lp_build_tgsi_context *bld_base,
134	const struct tgsi_full_src_register *reg,
135	enum tgsi_opcode_type type,
136	unsigned swizzle)
137{
138	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
139	if (swizzle == ~0) {
140		LLVMValueRef values[TGSI_NUM_CHANNELS] = {};
141		unsigned chan;
142		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
143			values[chan] = ctx->inputs[radeon_llvm_reg_index_soa(
144						reg->Register.Index, chan)];
145		}
146		return lp_build_gather_values(bld_base->base.gallivm, values,
147						TGSI_NUM_CHANNELS);
148	} else {
149		return bitcast(bld_base, type, ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]);
150	}
151}
152
153static LLVMValueRef
154emit_fetch_temporary(
155	struct lp_build_tgsi_context *bld_base,
156	const struct tgsi_full_src_register *reg,
157	enum tgsi_opcode_type type,
158	unsigned swizzle)
159{
160	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
161	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
162	if (swizzle == ~0) {
163		LLVMValueRef values[TGSI_NUM_CHANNELS] = {};
164		unsigned chan;
165		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
166			values[chan] = emit_fetch_temporary(bld_base, reg, type, chan);
167		}
168		return lp_build_gather_values(bld_base->base.gallivm, values,
169						TGSI_NUM_CHANNELS);
170	}
171
172	if (reg->Register.Indirect) {
173		LLVMValueRef array_index = emit_array_index(bld, reg, swizzle);
174		LLVMValueRef ptr = LLVMBuildGEP(builder, bld->temps_array, &array_index,
175						1, "");
176		return LLVMBuildLoad(builder, ptr, "");
177	} else {
178		LLVMValueRef temp_ptr;
179		temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
180		return bitcast(bld_base,type,LLVMBuildLoad(builder, temp_ptr, ""));
181	}
182}
183
184static LLVMValueRef
185emit_fetch_output(
186	struct lp_build_tgsi_context *bld_base,
187	const struct tgsi_full_src_register *reg,
188	enum tgsi_opcode_type type,
189	unsigned swizzle)
190{
191	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
192	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
193	 if (reg->Register.Indirect) {
194		LLVMValueRef array_index = emit_array_index(bld, reg, swizzle);
195		LLVMValueRef ptr = LLVMBuildGEP(builder, bld->outputs_array, &array_index,
196						1, "");
197		return LLVMBuildLoad(builder, ptr, "");
198	} else {
199		LLVMValueRef temp_ptr;
200		temp_ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
201		return LLVMBuildLoad(builder, temp_ptr, "");
202	 }
203}
204
205static void emit_declaration(
206	struct lp_build_tgsi_context * bld_base,
207	const struct tgsi_full_declaration *decl)
208{
209	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
210	switch(decl->Declaration.File) {
211	case TGSI_FILE_ADDRESS:
212	{
213		 unsigned idx;
214		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
215			unsigned chan;
216			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
217				 ctx->soa.addr[idx][chan] = lp_build_alloca(
218					&ctx->gallivm,
219					ctx->soa.bld_base.uint_bld.elem_type, "");
220			}
221		}
222		break;
223	}
224
225	case TGSI_FILE_TEMPORARY:
226		lp_emit_declaration_soa(bld_base, decl);
227		break;
228
229	case TGSI_FILE_INPUT:
230	{
231		unsigned idx;
232		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
233			ctx->load_input(ctx, idx, decl);
234		}
235	}
236	break;
237
238	case TGSI_FILE_SYSTEM_VALUE:
239	{
240		unsigned idx;
241		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
242			ctx->load_system_value(ctx, idx, decl);
243		}
244	}
245	break;
246
247	case TGSI_FILE_OUTPUT:
248	{
249		unsigned idx;
250		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
251			unsigned chan;
252			assert(idx < RADEON_LLVM_MAX_OUTPUTS);
253			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
254				ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm,
255					ctx->soa.bld_base.base.elem_type, "");
256			}
257		}
258
259		ctx->output_reg_count = MAX2(ctx->output_reg_count,
260							 decl->Range.Last + 1);
261		break;
262	}
263
264	default:
265		break;
266	}
267}
268
269static void
270emit_store(
271	struct lp_build_tgsi_context * bld_base,
272	const struct tgsi_full_instruction * inst,
273	const struct tgsi_opcode_info * info,
274	LLVMValueRef dst[4])
275{
276	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
277	struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
278	struct lp_build_context base = bld->bld_base.base;
279	const struct tgsi_full_dst_register *reg = &inst->Dst[0];
280	LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
281	LLVMValueRef temp_ptr;
282	unsigned chan, chan_index;
283	boolean is_vec_store = FALSE;
284	if (dst[0]) {
285		LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
286		is_vec_store = (k == LLVMVectorTypeKind);
287	}
288
289	if (is_vec_store) {
290		LLVMValueRef values[4] = {};
291		TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
292			LLVMValueRef index = lp_build_const_int32(gallivm, chan);
293			values[chan]  = LLVMBuildExtractElement(gallivm->builder,
294							dst[0], index, "");
295		}
296		bld_base->emit_store(bld_base, inst, info, values);
297		return;
298	}
299
300	TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
301		LLVMValueRef value = dst[chan_index];
302
303		if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
304			struct lp_build_emit_data clamp_emit_data;
305
306			memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
307			clamp_emit_data.arg_count = 3;
308			clamp_emit_data.args[0] = value;
309			clamp_emit_data.args[2] = base.one;
310
311			switch(inst->Instruction.Saturate) {
312			case TGSI_SAT_ZERO_ONE:
313				clamp_emit_data.args[1] = base.zero;
314				break;
315			case TGSI_SAT_MINUS_PLUS_ONE:
316				clamp_emit_data.args[1] = LLVMConstReal(
317						base.elem_type, -1.0f);
318				break;
319			default:
320				assert(0);
321			}
322			value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
323						&clamp_emit_data);
324		}
325
326		switch(reg->Register.File) {
327		case TGSI_FILE_OUTPUT:
328			temp_ptr = bld->outputs[reg->Register.Index][chan_index];
329			break;
330
331		case TGSI_FILE_TEMPORARY:
332			temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
333			break;
334
335		default:
336			return;
337		}
338
339		value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
340
341		LLVMBuildStore(builder, value, temp_ptr);
342	}
343}
344
345static void bgnloop_emit(
346	const struct lp_build_tgsi_action * action,
347	struct lp_build_tgsi_context * bld_base,
348	struct lp_build_emit_data * emit_data)
349{
350	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
351	struct gallivm_state * gallivm = bld_base->base.gallivm;
352	LLVMBasicBlockRef loop_block;
353	LLVMBasicBlockRef endloop_block;
354	endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
355						ctx->main_fn, "ENDLOOP");
356	loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
357						endloop_block, "LOOP");
358	LLVMBuildBr(gallivm->builder, loop_block);
359	LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
360	ctx->loop_depth++;
361	ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
362	ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
363}
364
365static void brk_emit(
366	const struct lp_build_tgsi_action * action,
367	struct lp_build_tgsi_context * bld_base,
368	struct lp_build_emit_data * emit_data)
369{
370	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
371	struct gallivm_state * gallivm = bld_base->base.gallivm;
372	struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
373
374	LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
375}
376
377static void cont_emit(
378	const struct lp_build_tgsi_action * action,
379	struct lp_build_tgsi_context * bld_base,
380	struct lp_build_emit_data * emit_data)
381{
382	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
383	struct gallivm_state * gallivm = bld_base->base.gallivm;
384	struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
385
386	LLVMBuildBr(gallivm->builder, current_loop->loop_block);
387}
388
389static void else_emit(
390	const struct lp_build_tgsi_action * action,
391	struct lp_build_tgsi_context * bld_base,
392	struct lp_build_emit_data * emit_data)
393{
394	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
395	struct gallivm_state * gallivm = bld_base->base.gallivm;
396	struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
397	LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
398
399	/* We need to add a terminator to the current block if the previous
400	 * instruction was an ENDIF.Example:
401	 * IF
402	 *   [code]
403	 *   IF
404	 *     [code]
405	 *   ELSE
406	 *    [code]
407	 *   ENDIF <--
408	 * ELSE<--
409	 *   [code]
410	 * ENDIF
411	 */
412
413	if (current_block != current_branch->if_block) {
414		LLVMBuildBr(gallivm->builder, current_branch->endif_block);
415	}
416	if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
417		LLVMBuildBr(gallivm->builder, current_branch->endif_block);
418	}
419	current_branch->has_else = 1;
420	LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
421}
422
423static void endif_emit(
424	const struct lp_build_tgsi_action * action,
425	struct lp_build_tgsi_context * bld_base,
426	struct lp_build_emit_data * emit_data)
427{
428	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
429	struct gallivm_state * gallivm = bld_base->base.gallivm;
430	struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
431	LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
432
433	/* If we have consecutive ENDIF instructions, then the first ENDIF
434	 * will not have a terminator, so we need to add one. */
435	if (current_block != current_branch->if_block
436			&& current_block != current_branch->else_block
437			&& !LLVMGetBasicBlockTerminator(current_block)) {
438
439		 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
440	}
441	if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
442		LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
443		LLVMBuildBr(gallivm->builder, current_branch->endif_block);
444	}
445
446	if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
447		LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
448		LLVMBuildBr(gallivm->builder, current_branch->endif_block);
449	}
450
451	LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
452	ctx->branch_depth--;
453}
454
455static void endloop_emit(
456	const struct lp_build_tgsi_action * action,
457	struct lp_build_tgsi_context * bld_base,
458	struct lp_build_emit_data * emit_data)
459{
460	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
461	struct gallivm_state * gallivm = bld_base->base.gallivm;
462	struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
463
464	if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
465		 LLVMBuildBr(gallivm->builder, current_loop->loop_block);
466	}
467
468	LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
469	ctx->loop_depth--;
470}
471
472static void if_emit(
473	const struct lp_build_tgsi_action * action,
474	struct lp_build_tgsi_context * bld_base,
475	struct lp_build_emit_data * emit_data)
476{
477	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
478	struct gallivm_state * gallivm = bld_base->base.gallivm;
479	LLVMValueRef cond;
480	LLVMBasicBlockRef if_block, else_block, endif_block;
481
482	cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
483	        bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
484			bld_base->int_bld.zero, "");
485
486	endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
487						ctx->main_fn, "ENDIF");
488	if_block = LLVMInsertBasicBlockInContext(gallivm->context,
489						endif_block, "IF");
490	else_block = LLVMInsertBasicBlockInContext(gallivm->context,
491						endif_block, "ELSE");
492	LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
493	LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
494
495	ctx->branch_depth++;
496	ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
497	ctx->branch[ctx->branch_depth - 1].if_block = if_block;
498	ctx->branch[ctx->branch_depth - 1].else_block = else_block;
499	ctx->branch[ctx->branch_depth - 1].has_else = 0;
500}
501
502static void kil_emit(
503	const struct lp_build_tgsi_action * action,
504	struct lp_build_tgsi_context * bld_base,
505	struct lp_build_emit_data * emit_data)
506{
507	unsigned i;
508	for (i = 0; i < emit_data->arg_count; i++) {
509		emit_data->output[i] = lp_build_intrinsic_unary(
510			bld_base->base.gallivm->builder,
511			action->intr_name,
512			emit_data->dst_type, emit_data->args[i]);
513	}
514}
515
516
517static void emit_prepare_cube_coords(
518		struct lp_build_tgsi_context * bld_base,
519		struct lp_build_emit_data * emit_data)
520{
521	boolean shadowcube = (emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE);
522	struct gallivm_state * gallivm = bld_base->base.gallivm;
523	LLVMBuilderRef builder = gallivm->builder;
524	LLVMTypeRef type = bld_base->base.elem_type;
525	LLVMValueRef coords[4];
526	LLVMValueRef mad_args[3];
527	unsigned i, cnt;
528
529	LLVMValueRef v = build_intrinsic(builder, "llvm.AMDGPU.cube",
530			LLVMVectorType(type, 4),
531			&emit_data->args[0],1, LLVMReadNoneAttribute);
532
533	/* save src.w for shadow cube */
534	cnt = shadowcube ? 3 : 4;
535
536	for (i = 0; i < cnt; ++i) {
537		LLVMValueRef idx = lp_build_const_int32(gallivm, i);
538		coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
539	}
540
541	coords[2] = build_intrinsic(builder, "llvm.AMDIL.fabs.",
542			type, &coords[2], 1, LLVMReadNoneAttribute);
543	coords[2] = build_intrinsic(builder, "llvm.AMDGPU.rcp",
544			type, &coords[2], 1, LLVMReadNoneAttribute);
545
546	mad_args[1] = coords[2];
547	mad_args[2] = LLVMConstReal(type, 1.5);
548
549	mad_args[0] = coords[0];
550	coords[0] = build_intrinsic(builder, "llvm.AMDIL.mad.",
551			type, mad_args, 3, LLVMReadNoneAttribute);
552
553	mad_args[0] = coords[1];
554	coords[1] = build_intrinsic(builder, "llvm.AMDIL.mad.",
555			type, mad_args, 3, LLVMReadNoneAttribute);
556
557	/* apply yxwy swizzle to cooords */
558	coords[2] = coords[3];
559	coords[3] = coords[1];
560	coords[1] = coords[0];
561	coords[0] = coords[3];
562
563	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
564						coords, 4);
565}
566
567static void txd_fetch_args(
568	struct lp_build_tgsi_context * bld_base,
569	struct lp_build_emit_data * emit_data)
570{
571	const struct tgsi_full_instruction * inst = emit_data->inst;
572
573	LLVMValueRef coords[4];
574	unsigned chan, src;
575	for (src = 0; src < 3; src++) {
576		for (chan = 0; chan < 4; chan++)
577			coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
578
579		emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
580				coords, 4);
581	}
582	emit_data->arg_count = 3;
583	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
584}
585
586
587static void txp_fetch_args(
588	struct lp_build_tgsi_context * bld_base,
589	struct lp_build_emit_data * emit_data)
590{
591	const struct tgsi_full_instruction * inst = emit_data->inst;
592	LLVMValueRef src_w;
593	unsigned chan;
594	LLVMValueRef coords[4];
595
596	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
597	src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
598
599	for (chan = 0; chan < 3; chan++ ) {
600		LLVMValueRef arg = lp_build_emit_fetch(bld_base,
601						emit_data->inst, 0, chan);
602		coords[chan] = lp_build_emit_llvm_binary(bld_base,
603					TGSI_OPCODE_DIV, arg, src_w);
604	}
605	coords[3] = bld_base->base.one;
606	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
607						coords, 4);
608	emit_data->arg_count = 1;
609
610	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
611	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
612	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
613		emit_prepare_cube_coords(bld_base, emit_data);
614	}
615}
616
617static void tex_fetch_args(
618	struct lp_build_tgsi_context * bld_base,
619	struct lp_build_emit_data * emit_data)
620{
621	/* XXX: lp_build_swizzle_aos() was failing with wrong arg types,
622	 * when we used CHAN_ALL.  We should be able to get this to work,
623	 * but for now we will swizzle it ourselves
624	emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
625						 0, CHAN_ALL);
626
627	*/
628
629	const struct tgsi_full_instruction * inst = emit_data->inst;
630
631	LLVMValueRef coords[4];
632	unsigned chan;
633	for (chan = 0; chan < 4; chan++) {
634		coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
635	}
636
637	emit_data->arg_count = 1;
638	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
639						coords, 4);
640	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
641
642	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
643	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
644	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
645		emit_prepare_cube_coords(bld_base, emit_data);
646	}
647}
648
649static void txf_fetch_args(
650	struct lp_build_tgsi_context * bld_base,
651	struct lp_build_emit_data * emit_data)
652{
653	const struct tgsi_full_instruction * inst = emit_data->inst;
654	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
655	const struct tgsi_texture_offset * off = inst->TexOffsets;
656	LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
657
658	/* fetch tex coords */
659	tex_fetch_args(bld_base, emit_data);
660
661	/* fetch tex offsets */
662	if (inst->Texture.NumOffsets) {
663		assert(inst->Texture.NumOffsets == 1);
664
665		emit_data->args[1] = LLVMConstBitCast(
666			bld->immediates[off->Index][off->SwizzleX],
667			offset_type);
668		emit_data->args[2] = LLVMConstBitCast(
669			bld->immediates[off->Index][off->SwizzleY],
670			offset_type);
671		emit_data->args[3] = LLVMConstBitCast(
672			bld->immediates[off->Index][off->SwizzleZ],
673			offset_type);
674	} else {
675		emit_data->args[1] = bld_base->int_bld.zero;
676		emit_data->args[2] = bld_base->int_bld.zero;
677		emit_data->args[3] = bld_base->int_bld.zero;
678	}
679
680	emit_data->arg_count = 4;
681}
682
683static void emit_icmp(
684		const struct lp_build_tgsi_action * action,
685		struct lp_build_tgsi_context * bld_base,
686		struct lp_build_emit_data * emit_data)
687{
688	unsigned pred;
689	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
690	LLVMContextRef context = bld_base->base.gallivm->context;
691
692	switch (emit_data->inst->Instruction.Opcode) {
693	case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
694	case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
695	case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
696	case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
697	case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
698	case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
699	default:
700		assert(!"unknown instruction");
701	}
702
703	LLVMValueRef v = LLVMBuildICmp(builder, pred,
704			emit_data->args[0], emit_data->args[1],"");
705
706	v = LLVMBuildSExtOrBitCast(builder, v,
707			LLVMInt32TypeInContext(context), "");
708
709	emit_data->output[emit_data->chan] = v;
710}
711
712static void emit_cmp(
713		const struct lp_build_tgsi_action *action,
714		struct lp_build_tgsi_context * bld_base,
715		struct lp_build_emit_data * emit_data)
716{
717	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
718	LLVMRealPredicate pred;
719	LLVMValueRef cond;
720
721	/* XXX I'm not sure whether to do unordered or ordered comparisons,
722	 * but llvmpipe uses unordered comparisons, so for consistency we use
723	 * unordered.  (The authors of llvmpipe aren't sure about using
724	 * unordered vs ordered comparisons either.
725	 */
726	switch (emit_data->inst->Instruction.Opcode) {
727	case TGSI_OPCODE_SGE: pred = LLVMRealUGE; break;
728	case TGSI_OPCODE_SEQ: pred = LLVMRealUEQ; break;
729	case TGSI_OPCODE_SLE: pred = LLVMRealULE; break;
730	case TGSI_OPCODE_SLT: pred = LLVMRealULT; break;
731	case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
732	case TGSI_OPCODE_SGT: pred = LLVMRealUGT; break;
733	default: assert(!"unknown instruction");
734	}
735
736	cond = LLVMBuildFCmp(builder,
737		pred, emit_data->args[0], emit_data->args[1], "");
738
739	emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
740		cond, bld_base->base.one, bld_base->base.zero, "");
741}
742
743static void emit_not(
744		const struct lp_build_tgsi_action * action,
745		struct lp_build_tgsi_context * bld_base,
746		struct lp_build_emit_data * emit_data)
747{
748	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
749	LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
750			emit_data->args[0]);
751	emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
752}
753
754static void emit_and(
755		const struct lp_build_tgsi_action * action,
756		struct lp_build_tgsi_context * bld_base,
757		struct lp_build_emit_data * emit_data)
758{
759	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
760	emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
761			emit_data->args[0], emit_data->args[1], "");
762}
763
764static void emit_or(
765		const struct lp_build_tgsi_action * action,
766		struct lp_build_tgsi_context * bld_base,
767		struct lp_build_emit_data * emit_data)
768{
769	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
770	emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
771			emit_data->args[0], emit_data->args[1], "");
772}
773
774static void emit_uadd(
775		const struct lp_build_tgsi_action * action,
776		struct lp_build_tgsi_context * bld_base,
777		struct lp_build_emit_data * emit_data)
778{
779	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
780	emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
781			emit_data->args[0], emit_data->args[1], "");
782}
783
784static void emit_udiv(
785		const struct lp_build_tgsi_action * action,
786		struct lp_build_tgsi_context * bld_base,
787		struct lp_build_emit_data * emit_data)
788{
789	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
790	emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
791			emit_data->args[0], emit_data->args[1], "");
792}
793
794static void emit_idiv(
795		const struct lp_build_tgsi_action * action,
796		struct lp_build_tgsi_context * bld_base,
797		struct lp_build_emit_data * emit_data)
798{
799	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
800	emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
801			emit_data->args[0], emit_data->args[1], "");
802}
803
804static void emit_mod(
805		const struct lp_build_tgsi_action * action,
806		struct lp_build_tgsi_context * bld_base,
807		struct lp_build_emit_data * emit_data)
808{
809	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
810	emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
811			emit_data->args[0], emit_data->args[1], "");
812}
813
814static void emit_umod(
815		const struct lp_build_tgsi_action * action,
816		struct lp_build_tgsi_context * bld_base,
817		struct lp_build_emit_data * emit_data)
818{
819	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
820	emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
821			emit_data->args[0], emit_data->args[1], "");
822}
823
824static void emit_shl(
825		const struct lp_build_tgsi_action * action,
826		struct lp_build_tgsi_context * bld_base,
827		struct lp_build_emit_data * emit_data)
828{
829	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
830	emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
831			emit_data->args[0], emit_data->args[1], "");
832}
833
834static void emit_ushr(
835		const struct lp_build_tgsi_action * action,
836		struct lp_build_tgsi_context * bld_base,
837		struct lp_build_emit_data * emit_data)
838{
839	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
840	emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
841			emit_data->args[0], emit_data->args[1], "");
842}
843static void emit_ishr(
844		const struct lp_build_tgsi_action * action,
845		struct lp_build_tgsi_context * bld_base,
846		struct lp_build_emit_data * emit_data)
847{
848	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
849	emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
850			emit_data->args[0], emit_data->args[1], "");
851}
852
853static void emit_xor(
854		const struct lp_build_tgsi_action * action,
855		struct lp_build_tgsi_context * bld_base,
856		struct lp_build_emit_data * emit_data)
857{
858	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
859	emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
860			emit_data->args[0], emit_data->args[1], "");
861}
862
863static void emit_ssg(
864		const struct lp_build_tgsi_action * action,
865		struct lp_build_tgsi_context * bld_base,
866		struct lp_build_emit_data * emit_data)
867{
868	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
869
870	LLVMValueRef cmp, val;
871
872	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
873		cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
874		val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
875		cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
876		val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
877	} else { // float SSG
878		cmp = LLVMBuildFCmp(builder, LLVMRealUGT, emit_data->args[0], bld_base->int_bld.zero, "");
879		val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
880		cmp = LLVMBuildFCmp(builder, LLVMRealUGE, val, bld_base->base.zero, "");
881		val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
882	}
883
884	emit_data->output[emit_data->chan] = val;
885}
886
887static void emit_ineg(
888		const struct lp_build_tgsi_action * action,
889		struct lp_build_tgsi_context * bld_base,
890		struct lp_build_emit_data * emit_data)
891{
892	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
893	emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
894			emit_data->args[0], "");
895}
896
897static void emit_f2i(
898		const struct lp_build_tgsi_action * action,
899		struct lp_build_tgsi_context * bld_base,
900		struct lp_build_emit_data * emit_data)
901{
902	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
903	emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
904			emit_data->args[0], bld_base->int_bld.elem_type, "");
905}
906
907static void emit_f2u(
908		const struct lp_build_tgsi_action * action,
909		struct lp_build_tgsi_context * bld_base,
910		struct lp_build_emit_data * emit_data)
911{
912	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
913	emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
914			emit_data->args[0], bld_base->uint_bld.elem_type, "");
915}
916
917static void emit_i2f(
918		const struct lp_build_tgsi_action * action,
919		struct lp_build_tgsi_context * bld_base,
920		struct lp_build_emit_data * emit_data)
921{
922	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
923	emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
924			emit_data->args[0], bld_base->base.elem_type, "");
925}
926
927static void emit_u2f(
928		const struct lp_build_tgsi_action * action,
929		struct lp_build_tgsi_context * bld_base,
930		struct lp_build_emit_data * emit_data)
931{
932	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
933	emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
934			emit_data->args[0], bld_base->base.elem_type, "");
935}
936
937static void emit_immediate(struct lp_build_tgsi_context * bld_base,
938		const struct tgsi_full_immediate *imm)
939{
940	unsigned i;
941	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
942
943	for (i = 0; i < 4; ++i) {
944		ctx->soa.immediates[ctx->soa.num_immediates][i] =
945				LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
946	}
947
948	ctx->soa.num_immediates++;
949}
950
951LLVMValueRef
952build_intrinsic(LLVMBuilderRef builder,
953                   const char *name,
954                   LLVMTypeRef ret_type,
955                   LLVMValueRef *args,
956                   unsigned num_args,
957                   LLVMAttribute attr)
958{
959   LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
960   LLVMValueRef function;
961
962   function = LLVMGetNamedFunction(module, name);
963   if(!function) {
964      LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
965      unsigned i;
966
967      assert(num_args <= LP_MAX_FUNC_ARGS);
968
969      for(i = 0; i < num_args; ++i) {
970         assert(args[i]);
971         arg_types[i] = LLVMTypeOf(args[i]);
972      }
973
974      function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
975
976      if (attr)
977          LLVMAddFunctionAttr(function, attr);
978   }
979
980   return LLVMBuildCall(builder, function, args, num_args, "");
981}
982
983void
984build_tgsi_intrinsic_nomem(
985 const struct lp_build_tgsi_action * action,
986 struct lp_build_tgsi_context * bld_base,
987 struct lp_build_emit_data * emit_data)
988{
989   struct lp_build_context * base = &bld_base->base;
990   emit_data->output[emit_data->chan] = build_intrinsic(
991               base->gallivm->builder, action->intr_name,
992               emit_data->dst_type, emit_data->args,
993               emit_data->arg_count, LLVMReadNoneAttribute);
994}
995
996void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
997{
998	struct lp_type type;
999	LLVMTypeRef main_fn_type;
1000	LLVMBasicBlockRef main_fn_body;
1001
1002	/* Initialize the gallivm object:
1003	 * We are only using the module, context, and builder fields of this struct.
1004	 * This should be enough for us to be able to pass our gallivm struct to the
1005	 * helper functions in the gallivm module.
1006	 */
1007	memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1008	memset(&ctx->soa, 0, sizeof(ctx->soa));
1009	ctx->gallivm.context = LLVMContextCreate();
1010	ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1011						ctx->gallivm.context);
1012	ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1013
1014	/* Setup the module */
1015	main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context),
1016					 NULL, 0, 0);
1017	ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
1018	main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1019			ctx->main_fn, "main_body");
1020	 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1021
1022	ctx->store_output_intr = "llvm.AMDGPU.store.output.";
1023	ctx->swizzle_intr = "llvm.AMDGPU.swizzle";
1024	struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
1025
1026	/* XXX: We need to revisit this.I think the correct way to do this is
1027	 * to use length = 4 here and use the elem_bld for everything. */
1028	type.floating = TRUE;
1029	type.sign = TRUE;
1030	type.width = 32;
1031	type.length = 1;
1032
1033	lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1034	lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1035	lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1036
1037	bld_base->soa = 1;
1038	bld_base->emit_store = emit_store;
1039	bld_base->emit_swizzle = emit_swizzle;
1040	bld_base->emit_declaration = emit_declaration;
1041	bld_base->emit_immediate = emit_immediate;
1042
1043	bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1044	bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1045	bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1046	bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_output;
1047
1048	/* Allocate outputs */
1049	ctx->soa.outputs = ctx->outputs;
1050
1051	/* XXX: Is there a better way to initialize all this ? */
1052
1053	lp_set_default_actions(bld_base);
1054
1055	bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem;
1056	bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
1057	bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
1058	bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
1059	bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
1060	bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
1061	bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
1062	bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
1063	bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
1064	bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
1065	bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
1066	bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
1067	bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
1068	bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
1069	bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
1070	bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
1071	bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
1072	bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
1073	bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
1074	bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
1075	bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
1076	bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
1077	bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
1078	bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
1079	bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
1080	bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
1081	bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
1082	bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
1083	bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
1084	bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
1085	bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
1086	bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
1087	bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
1088	bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
1089	bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min.";
1090	bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
1091	bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max.";
1092	bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem;
1093	bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin";
1094	bld_base->op_actions[TGSI_OPCODE_IMAX].emit = build_tgsi_intrinsic_nomem;
1095	bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax";
1096	bld_base->op_actions[TGSI_OPCODE_UMIN].emit = build_tgsi_intrinsic_nomem;
1097	bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin";
1098	bld_base->op_actions[TGSI_OPCODE_UMAX].emit = build_tgsi_intrinsic_nomem;
1099	bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax";
1100	bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
1101	bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
1102	bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
1103	bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
1104	bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
1105	bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.AMDIL.round.posinf.";
1106
1107
1108
1109	bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
1110	bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.AMDIL.fabs.";
1111	bld_base->op_actions[TGSI_OPCODE_ARL].emit = build_tgsi_intrinsic_nomem;
1112	bld_base->op_actions[TGSI_OPCODE_ARL].intr_name = "llvm.AMDGPU.arl";
1113	bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1114	bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1115	bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1116	bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
1117	bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
1118	bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
1119	bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
1120	bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
1121	bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDGPU.cos";
1122	bld_base->op_actions[TGSI_OPCODE_DIV].emit = build_tgsi_intrinsic_nomem;
1123	bld_base->op_actions[TGSI_OPCODE_DIV].intr_name = "llvm.AMDGPU.div";
1124	bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1125	bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1126	bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1127	bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
1128	bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
1129	bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
1130	bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.AMDGPU.floor";
1131	bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
1132	bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
1133	bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1134	bld_base->op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
1135	bld_base->op_actions[TGSI_OPCODE_KIL].intr_name = "llvm.AMDGPU.kill";
1136	bld_base->op_actions[TGSI_OPCODE_KILP].emit = lp_build_tgsi_intrinsic;
1137	bld_base->op_actions[TGSI_OPCODE_KILP].intr_name = "llvm.AMDGPU.kilp";
1138	bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
1139	bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.AMDIL.log.";
1140	bld_base->op_actions[TGSI_OPCODE_LRP].emit = build_tgsi_intrinsic_nomem;
1141	bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDGPU.lrp";
1142	bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
1143	bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min.";
1144	bld_base->op_actions[TGSI_OPCODE_MAD].emit = build_tgsi_intrinsic_nomem;
1145	bld_base->op_actions[TGSI_OPCODE_MAD].intr_name = "llvm.AMDIL.mad.";
1146	bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
1147	bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max.";
1148	bld_base->op_actions[TGSI_OPCODE_MUL].emit = build_tgsi_intrinsic_nomem;
1149	bld_base->op_actions[TGSI_OPCODE_MUL].intr_name = "llvm.AMDGPU.mul";
1150	bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
1151	bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.AMDGPU.pow";
1152	bld_base->op_actions[TGSI_OPCODE_RCP].emit = build_tgsi_intrinsic_nomem;
1153	bld_base->op_actions[TGSI_OPCODE_RCP].intr_name = "llvm.AMDGPU.rcp";
1154	bld_base->op_actions[TGSI_OPCODE_SSG].emit = build_tgsi_intrinsic_nomem;
1155	bld_base->op_actions[TGSI_OPCODE_SSG].intr_name = "llvm.AMDGPU.ssg";
1156	bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
1157	bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
1158	bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp;
1159	bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp;
1160	bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp;
1161	bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp;
1162	bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
1163	bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.AMDGPU.sin";
1164	bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
1165	bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
1166	bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
1167	bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
1168	bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
1169	bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
1170	bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
1171	bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
1172	bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
1173	bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
1174	bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
1175	bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
1176
1177	bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem;
1178	bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq";
1179}
1180
1181void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
1182{
1183	struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
1184	/* End the main function with Return*/
1185	LLVMBuildRetVoid(gallivm->builder);
1186
1187	/* Create the pass manager */
1188	ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
1189							gallivm->module);
1190
1191	/* This pass should eliminate all the load and store instructions */
1192	LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1193
1194	/* Add some optimization passes */
1195	LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1196	LLVMAddCFGSimplificationPass(gallivm->passmgr);
1197
1198	/* Run the passs */
1199	LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
1200
1201	LLVMDisposeBuilder(gallivm->builder);
1202	LLVMDisposePassManager(gallivm->passmgr);
1203
1204}
1205
1206void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
1207{
1208	LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
1209	LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
1210}
1211