radeonsi_shader.c revision 1279923d72942ee201fcc6ad40d552143f651f03
1
2#include "gallivm/lp_bld_tgsi_action.h"
3#include "gallivm/lp_bld_const.h"
4#include "gallivm/lp_bld_intr.h"
5#include "gallivm/lp_bld_tgsi.h"
6#include "radeon_llvm.h"
7#include "radeon_llvm_emit.h"
8#include "tgsi/tgsi_info.h"
9#include "tgsi/tgsi_parse.h"
10#include "tgsi/tgsi_scan.h"
11#include "tgsi/tgsi_dump.h"
12
13#include "radeonsi_pipe.h"
14#include "radeonsi_shader.h"
15#include "sid.h"
16
17#include <assert.h>
18#include <errno.h>
19#include <stdio.h>
20
21/*
22static ps_remap_inputs(
23	struct tgsi_llvm_context * tl_ctx,
24	unsigned tgsi_index,
25	unsigned tgsi_chan)
26{
27	:
28}
29
30struct si_input
31{
32	struct list_head head;
33	unsigned tgsi_index;
34	unsigned tgsi_chan;
35	unsigned order;
36};
37*/
38
39
40struct si_shader_context
41{
42	struct radeon_llvm_context radeon_bld;
43	struct r600_context *rctx;
44	struct tgsi_parse_context parse;
45	struct tgsi_token * tokens;
46	struct si_pipe_shader *shader;
47	unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
48/*	unsigned num_inputs; */
49/*	struct list_head inputs; */
50/*	unsigned * input_mappings *//* From TGSI to SI hw */
51/*	struct tgsi_shader_info info;*/
52};
53
54static struct si_shader_context * si_shader_context(
55	struct lp_build_tgsi_context * bld_base)
56{
57	return (struct si_shader_context *)bld_base;
58}
59
60
61#define PERSPECTIVE_BASE 0
62#define LINEAR_BASE 9
63
64#define SAMPLE_OFFSET 0
65#define CENTER_OFFSET 2
66#define CENTROID_OFSET 4
67
68#define USE_SGPR_MAX_SUFFIX_LEN 5
69
70enum sgpr_type {
71	SGPR_I32,
72	SGPR_I64,
73	SGPR_PTR_V4I32,
74	SGPR_PTR_V8I32
75};
76
77static LLVMValueRef use_sgpr(
78	struct gallivm_state * gallivm,
79	enum sgpr_type type,
80	unsigned sgpr)
81{
82	LLVMValueRef sgpr_index;
83	LLVMValueRef sgpr_value;
84	LLVMTypeRef ret_type;
85
86	sgpr_index = lp_build_const_int32(gallivm, sgpr);
87
88	if (type == SGPR_I32) {
89		ret_type = LLVMInt32TypeInContext(gallivm->context);
90		return lp_build_intrinsic_unary(gallivm->builder,
91						"llvm.SI.use.sgpr.i32",
92						ret_type, sgpr_index);
93	}
94
95	ret_type = LLVMInt64TypeInContext(gallivm->context);
96	sgpr_value = lp_build_intrinsic_unary(gallivm->builder,
97				"llvm.SI.use.sgpr.i64",
98				 ret_type, sgpr_index);
99
100	switch (type) {
101	case SGPR_I64:
102		return sgpr_value;
103	case SGPR_PTR_V4I32:
104		ret_type = LLVMInt32TypeInContext(gallivm->context);
105		ret_type = LLVMVectorType(ret_type, 4);
106		ret_type = LLVMPointerType(ret_type,
107					0 /*XXX: Specify address space*/);
108		return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
109								ret_type, "");
110	case SGPR_PTR_V8I32:
111		ret_type = LLVMInt32TypeInContext(gallivm->context);
112		ret_type = LLVMVectorType(ret_type, 8);
113		ret_type = LLVMPointerType(ret_type,
114					0 /*XXX: Specify address space*/);
115		return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
116								ret_type, "");
117	default:
118		assert(!"Unsupported SGPR type in use_sgpr()");
119		return NULL;
120	}
121}
122
123static void declare_input_vs(
124	struct si_shader_context * si_shader_ctx,
125	unsigned input_index,
126	const struct tgsi_full_declaration *decl)
127{
128	LLVMValueRef t_list_ptr;
129	LLVMValueRef t_offset;
130	LLVMValueRef attribute_offset;
131	LLVMValueRef buffer_index_reg;
132	LLVMValueRef args[4];
133	LLVMTypeRef vec4_type;
134	LLVMValueRef input;
135	struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
136	struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
137	struct r600_context *rctx = si_shader_ctx->rctx;
138	struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
139	unsigned chan;
140
141	/* XXX: Communicate with the rest of the driver about which SGPR the T#
142	 * list pointer is going to be stored in.  Hard code to SGPR[6:7] for
143 	 * now */
144	t_list_ptr = use_sgpr(base->gallivm, SGPR_I64, 3);
145
146	t_offset = lp_build_const_int32(base->gallivm,
147					4 * velem->vertex_buffer_index);
148	attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset);
149
150	/* Load the buffer index is always, which is always stored in VGPR0
151	 * for Vertex Shaders */
152	buffer_index_reg = lp_build_intrinsic(base->gallivm->builder,
153		"llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0);
154
155	vec4_type = LLVMVectorType(base->elem_type, 4);
156	args[0] = t_list_ptr;
157	args[1] = t_offset;
158	args[2] = attribute_offset;
159	args[3] = buffer_index_reg;
160	input = lp_build_intrinsic(base->gallivm->builder,
161		"llvm.SI.vs.load.input", vec4_type, args, 4);
162
163	/* Break up the vec4 into individual components */
164	for (chan = 0; chan < 4; chan++) {
165		LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
166		/* XXX: Use a helper function for this.  There is one in
167 		 * tgsi_llvm.c. */
168		si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
169				LLVMBuildExtractElement(base->gallivm->builder,
170				input, llvm_chan, "");
171	}
172}
173
174static void declare_input_fs(
175	struct si_shader_context * si_shader_ctx,
176	unsigned input_index,
177	const struct tgsi_full_declaration *decl)
178{
179	const char * intr_name;
180	unsigned chan;
181	struct lp_build_context * base =
182				&si_shader_ctx->radeon_bld.soa.bld_base.base;
183	struct gallivm_state * gallivm = base->gallivm;
184
185	/* This value is:
186	 * [15:0] NewPrimMask (Bit mask for each quad.  It is set it the
187	 *                     quad begins a new primitive.  Bit 0 always needs
188	 *                     to be unset)
189	 * [32:16] ParamOffset
190	 *
191	 */
192	/* XXX: This register number must be identical to the S_00B02C_USER_SGPR
193	 * register field value
194	 */
195	LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6);
196
197
198	/* XXX: Is this the input_index? */
199	LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index);
200
201	/* XXX: Handle all possible interpolation modes */
202	switch (decl->Interp.Interpolate) {
203	case TGSI_INTERPOLATE_COLOR:
204		if (si_shader_ctx->rctx->rasterizer->flatshade)
205			intr_name = "llvm.SI.fs.interp.constant";
206		else
207			intr_name = "llvm.SI.fs.interp.linear.center";
208		break;
209	case TGSI_INTERPOLATE_CONSTANT:
210		intr_name = "llvm.SI.fs.interp.constant";
211		break;
212	case TGSI_INTERPOLATE_LINEAR:
213		intr_name = "llvm.SI.fs.interp.linear.center";
214		break;
215	default:
216		fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
217		return;
218	}
219
220	/* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
221	for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
222		LLVMValueRef args[3];
223		LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
224		unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
225		LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
226		args[0] = llvm_chan;
227		args[1] = attr_number;
228		args[2] = params;
229		si_shader_ctx->radeon_bld.inputs[soa_index] =
230			lp_build_intrinsic(gallivm->builder, intr_name,
231						input_type, args, 3);
232	}
233}
234
235static void declare_input(
236	struct radeon_llvm_context * radeon_bld,
237	unsigned input_index,
238	const struct tgsi_full_declaration *decl)
239{
240	struct si_shader_context * si_shader_ctx =
241				si_shader_context(&radeon_bld->soa.bld_base);
242	if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
243		declare_input_vs(si_shader_ctx, input_index, decl);
244	} else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
245		declare_input_fs(si_shader_ctx, input_index, decl);
246	} else {
247		fprintf(stderr, "Warning: Unsupported shader type,\n");
248	}
249}
250
251static LLVMValueRef fetch_constant(
252	struct lp_build_tgsi_context * bld_base,
253	const struct tgsi_full_src_register *reg,
254	enum tgsi_opcode_type type,
255	unsigned swizzle)
256{
257	struct lp_build_context * base = &bld_base->base;
258
259	LLVMValueRef const_ptr;
260	LLVMValueRef offset;
261
262	/* XXX: Assume the pointer to the constant buffer is being stored in
263	 * SGPR[0:1] */
264	const_ptr = use_sgpr(base->gallivm, SGPR_I64, 0);
265
266	/* XXX: This assumes that the constant buffer is not packed, so
267	 * CONST[0].x will have an offset of 0 and CONST[1].x will have an
268	 * offset of 4. */
269	offset = lp_build_const_int32(base->gallivm,
270					(reg->Register.Index * 4) + swizzle);
271
272	return lp_build_intrinsic_binary(base->gallivm->builder,
273		"llvm.SI.load.const", base->elem_type, const_ptr, offset);
274}
275
276
277/* Declare some intrinsics with the correct attributes */
278static void si_llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
279{
280	LLVMValueRef function;
281	struct gallivm_state * gallivm = bld_base->base.gallivm;
282
283	LLVMTypeRef i64 = LLVMInt64TypeInContext(gallivm->context);
284	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
285
286	/* declare i32 @llvm.SI.use.sgpr.i32(i32) */
287	function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i32",
288					i32, &i32, 1);
289	LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
290
291	/* declare i64 @llvm.SI.use.sgpr.i64(i32) */
292	function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i64",
293					i64, &i32, 1);
294	LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
295}
296
297/* XXX: This is partially implemented for VS only at this point.  It is not complete */
298static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
299{
300	struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
301	struct r600_shader * shader = &si_shader_ctx->shader->shader;
302	struct lp_build_context * base = &bld_base->base;
303	struct lp_build_context * uint =
304				&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
305	struct tgsi_parse_context *parse = &si_shader_ctx->parse;
306	LLVMValueRef last_args[9] = { 0 };
307
308	while (!tgsi_parse_end_of_tokens(parse)) {
309		/* XXX: component_bits controls which components of the output
310		 * registers actually get exported. (e.g bit 0 means export
311		 * X component, bit 1 means export Y component, etc.)  I'm
312		 * hard coding this to 0xf for now.  In the future, we might
313		 * want to do something else. */
314		unsigned component_bits = 0xf;
315		unsigned chan;
316		struct tgsi_full_declaration *d =
317					&parse->FullToken.FullDeclaration;
318		LLVMValueRef args[9];
319		unsigned target;
320		unsigned index;
321		unsigned color_count = 0;
322		unsigned param_count = 0;
323		int i;
324
325		tgsi_parse_token(parse);
326		if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
327			continue;
328
329		switch (d->Declaration.File) {
330		case TGSI_FILE_INPUT:
331			i = shader->ninput++;
332			shader->input[i].name = d->Semantic.Name;
333			shader->input[i].sid = d->Semantic.Index;
334			shader->input[i].interpolate = d->Interp.Interpolate;
335			shader->input[i].centroid = d->Interp.Centroid;
336			break;
337		case TGSI_FILE_OUTPUT:
338			i = shader->noutput++;
339			shader->output[i].name = d->Semantic.Name;
340			shader->output[i].sid = d->Semantic.Index;
341			shader->output[i].interpolate = d->Interp.Interpolate;
342			break;
343		}
344
345		if (d->Declaration.File != TGSI_FILE_OUTPUT)
346			continue;
347
348		for (index = d->Range.First; index <= d->Range.Last; index++) {
349			for (chan = 0; chan < 4; chan++ ) {
350				LLVMValueRef out_ptr =
351					si_shader_ctx->radeon_bld.soa.outputs
352					[index][chan];
353				/* +5 because the first output value will be
354				 * the 6th argument to the intrinsic. */
355				args[chan + 5]= LLVMBuildLoad(
356					base->gallivm->builder,	out_ptr, "");
357			}
358
359			/* XXX: We probably need to keep track of the output
360			 * values, so we know what we are passing to the next
361			 * stage. */
362
363			/* Select the correct target */
364			switch(d->Semantic.Name) {
365			case TGSI_SEMANTIC_POSITION:
366				target = V_008DFC_SQ_EXP_POS;
367				break;
368			case TGSI_SEMANTIC_COLOR:
369				if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
370					target = V_008DFC_SQ_EXP_PARAM + param_count;
371					param_count++;
372				} else {
373					target = V_008DFC_SQ_EXP_MRT + color_count;
374					color_count++;
375				}
376				break;
377			case TGSI_SEMANTIC_GENERIC:
378				target = V_008DFC_SQ_EXP_PARAM + param_count;
379				param_count++;
380				break;
381			default:
382				target = 0;
383				fprintf(stderr,
384					"Warning: SI unhandled output type:%d\n",
385					d->Semantic.Name);
386			}
387
388			/* Specify which components to enable */
389			args[0] = lp_build_const_int32(base->gallivm,
390								component_bits);
391
392			/* Specify whether the EXEC mask represents the valid mask */
393			args[1] = lp_build_const_int32(base->gallivm, 0);
394
395			/* Specify whether this is the last export */
396			args[2] = lp_build_const_int32(base->gallivm, 0);
397
398			/* Specify the target we are exporting */
399			args[3] = lp_build_const_int32(base->gallivm, target);
400
401			/* Set COMPR flag to zero to export data as 32-bit */
402			args[4] = uint->zero;
403
404			if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
405			    (d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
406			    (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
407				if (last_args[0]) {
408					lp_build_intrinsic(base->gallivm->builder,
409							   "llvm.SI.export",
410							   LLVMVoidTypeInContext(base->gallivm->context),
411							   last_args, 9);
412				}
413
414				memcpy(last_args, args, sizeof(args));
415			} else {
416				lp_build_intrinsic(base->gallivm->builder,
417						   "llvm.SI.export",
418						   LLVMVoidTypeInContext(base->gallivm->context),
419						   args, 9);
420			}
421
422		}
423	}
424
425	/* Specify whether the EXEC mask represents the valid mask */
426	last_args[1] = lp_build_const_int32(base->gallivm,
427					    si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
428
429	/* Specify that this is the last export */
430	last_args[2] = lp_build_const_int32(base->gallivm, 1);
431
432	lp_build_intrinsic(base->gallivm->builder,
433			   "llvm.SI.export",
434			   LLVMVoidTypeInContext(base->gallivm->context),
435			   last_args, 9);
436
437/* XXX: Look up what this function does */
438/*		ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/
439}
440
441static void tex_fetch_args(
442	struct lp_build_tgsi_context * bld_base,
443	struct lp_build_emit_data * emit_data)
444{
445	/* WriteMask */
446	emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm,
447				emit_data->inst->Dst[0].Register.WriteMask);
448
449	/* Coordinates */
450	/* XXX: Not all sample instructions need 4 address arguments. */
451	emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
452							0, LP_CHAN_ALL);
453
454	/* Resource */
455	emit_data->args[2] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 2);
456	emit_data->args[3] = lp_build_const_int32(bld_base->base.gallivm,
457						  8 * emit_data->inst->Src[1].Register.Index);
458
459	/* Sampler */
460	emit_data->args[4] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 1);
461	emit_data->args[5] = lp_build_const_int32(bld_base->base.gallivm,
462						  4 * emit_data->inst->Src[1].Register.Index);
463
464	/* Dimensions */
465	/* XXX: We might want to pass this information to the shader at some. */
466/*	emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
467					emit_data->inst->Texture.Texture);
468*/
469
470	emit_data->arg_count = 6;
471	/* XXX: To optimize, we could use a float or v2f32, if the last bits of
472	 * the writemask are clear */
473	emit_data->dst_type = LLVMVectorType(
474			LLVMFloatTypeInContext(bld_base->base.gallivm->context),
475			4);
476}
477
478static const struct lp_build_tgsi_action tex_action = {
479	.fetch_args = tex_fetch_args,
480	.emit = lp_build_tgsi_intrinsic,
481	.intr_name = "llvm.SI.sample"
482};
483
484
485int si_pipe_shader_create(
486	struct pipe_context *ctx,
487	struct si_pipe_shader *shader)
488{
489	struct r600_context *rctx = (struct r600_context*)ctx;
490	struct si_shader_context si_shader_ctx;
491	struct tgsi_shader_info shader_info;
492	struct lp_build_tgsi_context * bld_base;
493	LLVMModuleRef mod;
494	unsigned char * inst_bytes;
495	unsigned inst_byte_count;
496	unsigned i;
497
498	radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
499	bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
500
501	tgsi_scan_shader(shader->tokens, &shader_info);
502	bld_base->info = &shader_info;
503	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
504	bld_base->emit_prologue = si_llvm_emit_prologue;
505	bld_base->emit_epilogue = si_llvm_emit_epilogue;
506
507	bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
508
509	si_shader_ctx.radeon_bld.load_input = declare_input;
510	si_shader_ctx.tokens = shader->tokens;
511	tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
512	si_shader_ctx.shader = shader;
513	si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
514	si_shader_ctx.rctx = rctx;
515
516	shader->shader.nr_cbufs = rctx->nr_cbufs;
517
518	lp_build_tgsi_llvm(bld_base, shader->tokens);
519
520	radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
521
522	mod = bld_base->base.gallivm->module;
523	tgsi_dump(shader->tokens, 0);
524	LLVMDumpModule(mod);
525	radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", 1 /* dump */);
526	fprintf(stderr, "SI CODE:\n");
527	for (i = 0; i < inst_byte_count; i+=4 ) {
528		fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
529			inst_bytes[i + 2], inst_bytes[i + 1],
530			inst_bytes[i]);
531	}
532
533	shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
534	shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
535	shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
536
537	tgsi_parse_free(&si_shader_ctx.parse);
538
539	/* copy new shader */
540	if (shader->bo == NULL) {
541		uint32_t *ptr;
542
543		shader->bo = (struct r600_resource*)
544			pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, inst_byte_count);
545		if (shader->bo == NULL) {
546			return -ENOMEM;
547		}
548		ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
549		if (0 /*R600_BIG_ENDIAN*/) {
550			for (i = 0; i < (inst_byte_count-12)/4; ++i) {
551				ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
552			}
553		} else {
554			memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
555		}
556		rctx->ws->buffer_unmap(shader->bo->cs_buf);
557	}
558
559	free(inst_bytes);
560
561	return 0;
562}
563
564void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
565{
566	pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
567
568	memset(&shader->shader,0,sizeof(struct r600_shader));
569}
570