1/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3/*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Rob Clark <robclark@freedesktop.org>
27 */
28
29#include "pipe/p_state.h"
30#include "util/u_string.h"
31#include "util/u_memory.h"
32#include "util/u_inlines.h"
33#include "util/u_format.h"
34#include "tgsi/tgsi_dump.h"
35#include "tgsi/tgsi_parse.h"
36
37#include "freedreno_program.h"
38
39#include "fd2_program.h"
40#include "fd2_compiler.h"
41#include "fd2_texture.h"
42#include "fd2_util.h"
43
44static struct fd2_shader_stateobj *
45create_shader(enum shader_t type)
46{
47	struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
48	if (!so)
49		return NULL;
50	so->type = type;
51	return so;
52}
53
54static void
55delete_shader(struct fd2_shader_stateobj *so)
56{
57	ir2_shader_destroy(so->ir);
58	free(so->tokens);
59	free(so->bin);
60	free(so);
61}
62
63static struct fd2_shader_stateobj *
64assemble(struct fd2_shader_stateobj *so)
65{
66	free(so->bin);
67	so->bin = ir2_shader_assemble(so->ir, &so->info);
68	if (!so->bin)
69		goto fail;
70
71	if (fd_mesa_debug & FD_DBG_DISASM) {
72		DBG("disassemble: type=%d", so->type);
73		disasm_a2xx(so->bin, so->info.sizedwords, 0, so->type);
74	}
75
76	return so;
77
78fail:
79	debug_error("assemble failed!");
80	delete_shader(so);
81	return NULL;
82}
83
84static struct fd2_shader_stateobj *
85compile(struct fd_program_stateobj *prog, struct fd2_shader_stateobj *so)
86{
87	int ret;
88
89	if (fd_mesa_debug & FD_DBG_DISASM) {
90		DBG("dump tgsi: type=%d", so->type);
91		tgsi_dump(so->tokens, 0);
92	}
93
94	ret = fd2_compile_shader(prog, so);
95	if (ret)
96		goto fail;
97
98	/* NOTE: we don't assemble yet because for VS we don't know the
99	 * type information for vertex fetch yet.. so those need to be
100	 * patched up later before assembling.
101	 */
102
103	so->info.sizedwords = 0;
104
105	return so;
106
107fail:
108	debug_error("compile failed!");
109	delete_shader(so);
110	return NULL;
111}
112
113static void
114emit(struct fd_ringbuffer *ring, struct fd2_shader_stateobj *so)
115{
116	unsigned i;
117
118	if (so->info.sizedwords == 0)
119		assemble(so);
120
121	OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords);
122	OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1);
123	OUT_RING(ring, so->info.sizedwords);
124	for (i = 0; i < so->info.sizedwords; i++)
125		OUT_RING(ring, so->bin[i]);
126}
127
128static void *
129fd2_fp_state_create(struct pipe_context *pctx,
130		const struct pipe_shader_state *cso)
131{
132	struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
133	if (!so)
134		return NULL;
135	so->tokens = tgsi_dup_tokens(cso->tokens);
136	return so;
137}
138
139static void
140fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
141{
142	struct fd2_shader_stateobj *so = hwcso;
143	delete_shader(so);
144}
145
146static void *
147fd2_vp_state_create(struct pipe_context *pctx,
148		const struct pipe_shader_state *cso)
149{
150	struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
151	if (!so)
152		return NULL;
153	so->tokens = tgsi_dup_tokens(cso->tokens);
154	return so;
155}
156
157static void
158fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
159{
160	struct fd2_shader_stateobj *so = hwcso;
161	delete_shader(so);
162}
163
164static void
165patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
166		struct fd_vertex_stateobj *vtx)
167{
168	unsigned i;
169
170	assert(so->num_vfetch_instrs == vtx->num_elements);
171
172	/* update vtx fetch instructions: */
173	for (i = 0; i < so->num_vfetch_instrs; i++) {
174		struct ir2_instruction *instr = so->vfetch_instrs[i];
175		struct pipe_vertex_element *elem = &vtx->pipe[i];
176		struct pipe_vertex_buffer *vb =
177				&ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index];
178		enum pipe_format format = elem->src_format;
179		const struct util_format_description *desc =
180				util_format_description(format);
181		unsigned j;
182
183		/* Find the first non-VOID channel. */
184		for (j = 0; j < 4; j++)
185			if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
186				break;
187
188		/* CI/CIS can probably be set in compiler instead: */
189		instr->fetch.const_idx = 20 + (i / 3);
190		instr->fetch.const_idx_sel = i % 3;
191
192		instr->fetch.fmt = fd2_pipe2surface(format);
193		instr->fetch.is_normalized = desc->channel[j].normalized;
194		instr->fetch.is_signed =
195				desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
196		instr->fetch.stride = vb->stride ? : 1;
197		instr->fetch.offset = elem->src_offset;
198
199		for (j = 0; j < 4; j++)
200			instr->regs[0]->swizzle[j] = "xyzw01__"[desc->swizzle[j]];
201
202		assert(instr->fetch.fmt != ~0);
203
204		DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, "
205				"stride=%d, offset=%d",
206				i, util_format_name(format),
207				instr->fetch.fmt,
208				instr->fetch.const_idx,
209				instr->fetch.const_idx_sel,
210				elem->instance_divisor,
211				instr->regs[0]->swizzle,
212				instr->fetch.stride,
213				instr->fetch.offset);
214	}
215
216	/* trigger re-assemble: */
217	so->info.sizedwords = 0;
218}
219
220static void
221patch_tex_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
222		struct fd_texture_stateobj *tex)
223{
224	unsigned i;
225
226	/* update tex fetch instructions: */
227	for (i = 0; i < so->num_tfetch_instrs; i++) {
228		struct ir2_instruction *instr = so->tfetch_instrs[i].instr;
229		unsigned samp_id = so->tfetch_instrs[i].samp_id;
230		unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
231
232		if (const_idx != instr->fetch.const_idx) {
233			instr->fetch.const_idx = const_idx;
234			/* trigger re-assemble: */
235			so->info.sizedwords = 0;
236		}
237	}
238}
239
240void
241fd2_program_validate(struct fd_context *ctx)
242{
243	struct fd_program_stateobj *prog = &ctx->prog;
244
245	/* if vertex or frag shader is dirty, we may need to recompile. Compile
246	 * frag shader first, as that assigns the register slots for exports
247	 * from the vertex shader.  And therefore if frag shader has changed we
248	 * need to recompile both vert and frag shader.
249	 */
250	if (ctx->dirty & FD_SHADER_DIRTY_FP)
251		compile(prog, prog->fp);
252
253	if (ctx->dirty & (FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP))
254		compile(prog, prog->vp);
255
256	/* if necessary, fix up vertex fetch instructions: */
257	if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
258		patch_vtx_fetches(ctx, prog->vp, ctx->vtx.vtx);
259
260	/* if necessary, fix up texture fetch instructions: */
261	if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
262		patch_tex_fetches(ctx, prog->vp, &ctx->verttex);
263		patch_tex_fetches(ctx, prog->fp, &ctx->fragtex);
264	}
265}
266
267void
268fd2_program_emit(struct fd_ringbuffer *ring,
269		struct fd_program_stateobj *prog)
270{
271	struct ir2_shader_info *vsi =
272		&((struct fd2_shader_stateobj *)prog->vp)->info;
273	struct ir2_shader_info *fsi =
274		&((struct fd2_shader_stateobj *)prog->fp)->info;
275	uint8_t vs_gprs, fs_gprs, vs_export;
276
277	emit(ring, prog->vp);
278	emit(ring, prog->fp);
279
280	vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg;
281	fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg;
282	vs_export = MAX2(1, prog->num_exports) - 1;
283
284	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
285	OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
286	OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) |
287			A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
288			A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
289			A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
290			A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
291			A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs));
292}
293
294/* Creates shader:
295 *    EXEC ADDR(0x2) CNT(0x1)
296 *       (S)FETCH:	SAMPLE	R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER)
297 *    ALLOC PARAM/PIXEL SIZE(0x0)
298 *    EXEC_END ADDR(0x3) CNT(0x1)
299 *          ALU:	MAXv	export0 = R0, R0	; gl_FragColor
300 *    NOP
301 */
302static struct fd2_shader_stateobj *
303create_blit_fp(void)
304{
305	struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
306	struct ir2_cf *cf;
307	struct ir2_instruction *instr;
308
309	if (!so)
310		return NULL;
311
312	so->ir = ir2_shader_create();
313
314	cf = ir2_cf_create(so->ir, EXEC);
315
316	instr = ir2_instr_create_tex_fetch(cf, 0);
317	ir2_reg_create(instr, 0, "xyzw", 0);
318	ir2_reg_create(instr, 0, "xyx", 0);
319	instr->sync = true;
320
321	cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
322	cf = ir2_cf_create(so->ir, EXEC_END);
323
324	instr = ir2_instr_create_alu(cf, MAXv, ~0);
325	ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
326	ir2_reg_create(instr, 0, NULL, 0);
327	ir2_reg_create(instr, 0, NULL, 0);
328
329	return assemble(so);
330}
331
332/* Creates shader:
333*     EXEC ADDR(0x3) CNT(0x2)
334*           FETCH:	VERTEX	R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1)
335*           FETCH:	VERTEX	R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0)
336*     ALLOC POSITION SIZE(0x0)
337*     EXEC ADDR(0x5) CNT(0x1)
338*           ALU:	MAXv	export62 = R2, R2	; gl_Position
339*     ALLOC PARAM/PIXEL SIZE(0x0)
340*     EXEC_END ADDR(0x6) CNT(0x1)
341*           ALU:	MAXv	export0 = R1, R1
342*     NOP
343 */
344static struct fd2_shader_stateobj *
345create_blit_vp(void)
346{
347	struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
348	struct ir2_cf *cf;
349	struct ir2_instruction *instr;
350
351	if (!so)
352		return NULL;
353
354	so->ir = ir2_shader_create();
355
356	cf = ir2_cf_create(so->ir, EXEC);
357
358	instr = ir2_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8);
359	instr->fetch.is_normalized = true;
360	ir2_reg_create(instr, 1, "xy01", 0);
361	ir2_reg_create(instr, 0, "x", 0);
362
363	instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
364	instr->fetch.is_normalized = true;
365	ir2_reg_create(instr, 2, "xyz1", 0);
366	ir2_reg_create(instr, 0, "x", 0);
367
368	cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
369	cf = ir2_cf_create(so->ir, EXEC);
370
371	instr = ir2_instr_create_alu(cf, MAXv, ~0);
372	ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
373	ir2_reg_create(instr, 2, NULL, 0);
374	ir2_reg_create(instr, 2, NULL, 0);
375
376	cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
377	cf = ir2_cf_create(so->ir, EXEC_END);
378
379	instr = ir2_instr_create_alu(cf, MAXv, ~0);
380	ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
381	ir2_reg_create(instr, 1, NULL, 0);
382	ir2_reg_create(instr, 1, NULL, 0);
383
384	return assemble(so);
385}
386
387/* Creates shader:
388 *    ALLOC PARAM/PIXEL SIZE(0x0)
389 *    EXEC_END ADDR(0x1) CNT(0x1)
390 *          ALU:	MAXv	export0 = C0, C0	; gl_FragColor
391 */
392static struct fd2_shader_stateobj *
393create_solid_fp(void)
394{
395	struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
396	struct ir2_cf *cf;
397	struct ir2_instruction *instr;
398
399	if (!so)
400		return NULL;
401
402	so->ir = ir2_shader_create();
403
404	cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
405	cf = ir2_cf_create(so->ir, EXEC_END);
406
407	instr = ir2_instr_create_alu(cf, MAXv, ~0);
408	ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
409	ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
410	ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
411
412	return assemble(so);
413}
414
415/* Creates shader:
416 *    EXEC ADDR(0x3) CNT(0x1)
417 *       (S)FETCH:	VERTEX	R1.xyz1 = R0.x FMT_32_32_32_FLOAT
418 *                           UNSIGNED STRIDE(12) CONST(26, 0)
419 *    ALLOC POSITION SIZE(0x0)
420 *    EXEC ADDR(0x4) CNT(0x1)
421 *          ALU:	MAXv	export62 = R1, R1	; gl_Position
422 *    ALLOC PARAM/PIXEL SIZE(0x0)
423 *    EXEC_END ADDR(0x5) CNT(0x0)
424 */
425static struct fd2_shader_stateobj *
426create_solid_vp(void)
427{
428	struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
429	struct ir2_cf *cf;
430	struct ir2_instruction *instr;
431
432	if (!so)
433		return NULL;
434
435	so->ir = ir2_shader_create();
436
437	cf = ir2_cf_create(so->ir, EXEC);
438
439	instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
440	ir2_reg_create(instr, 1, "xyz1", 0);
441	ir2_reg_create(instr, 0, "x", 0);
442
443	cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
444	cf = ir2_cf_create(so->ir, EXEC);
445
446	instr = ir2_instr_create_alu(cf, MAXv, ~0);
447	ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
448	ir2_reg_create(instr, 1, NULL, 0);
449	ir2_reg_create(instr, 1, NULL, 0);
450
451	cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
452	cf = ir2_cf_create(so->ir, EXEC_END);
453
454	return assemble(so);
455}
456
457void
458fd2_prog_init(struct pipe_context *pctx)
459{
460	struct fd_context *ctx = fd_context(pctx);
461
462	pctx->create_fs_state = fd2_fp_state_create;
463	pctx->delete_fs_state = fd2_fp_state_delete;
464
465	pctx->create_vs_state = fd2_vp_state_create;
466	pctx->delete_vs_state = fd2_vp_state_delete;
467
468	fd_prog_init(pctx);
469
470	ctx->solid_prog.fp = create_solid_fp();
471	ctx->solid_prog.vp = create_solid_vp();
472	ctx->blit_prog[0].fp = create_blit_fp();
473	ctx->blit_prog[0].vp = create_blit_vp();
474}
475