1/*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "ir-a2xx.h"
25
26#include <stdlib.h>
27#include <stdio.h>
28#include <string.h>
29#include <assert.h>
30
31#include "freedreno_util.h"
32#include "instr-a2xx.h"
33
34#define DEBUG_MSG(f, ...)  do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
35#define WARN_MSG(f, ...)   DBG("WARN:  "f, ##__VA_ARGS__)
36#define ERROR_MSG(f, ...)  DBG("ERROR: "f, ##__VA_ARGS__)
37
38#define REG_MASK 0x3f
39
40static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr);
41
42static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
43		uint32_t idx, struct ir2_shader_info *info);
44
45static void reg_update_stats(struct ir2_register *reg,
46		struct ir2_shader_info *info, bool dest);
47static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n);
48static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg);
49static uint32_t reg_alu_dst_swiz(struct ir2_register *reg);
50static uint32_t reg_alu_src_swiz(struct ir2_register *reg);
51
52/* simple allocator to carve allocations out of an up-front allocated heap,
53 * so that we can free everything easily in one shot.
54 */
55static void * ir2_alloc(struct ir2_shader *shader, int sz)
56{
57	void *ptr = &shader->heap[shader->heap_idx];
58	shader->heap_idx += align(sz, 4);
59	return ptr;
60}
61
62static char * ir2_strdup(struct ir2_shader *shader, const char *str)
63{
64	char *ptr = NULL;
65	if (str) {
66		int len = strlen(str);
67		ptr = ir2_alloc(shader, len+1);
68		memcpy(ptr, str, len);
69		ptr[len] = '\0';
70	}
71	return ptr;
72}
73
74struct ir2_shader * ir2_shader_create(void)
75{
76	DEBUG_MSG("");
77	return calloc(1, sizeof(struct ir2_shader));
78}
79
80void ir2_shader_destroy(struct ir2_shader *shader)
81{
82	DEBUG_MSG("");
83	free(shader);
84}
85
86/* resolve addr/cnt/sequence fields in the individual CF's */
87static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info)
88{
89	uint32_t addr;
90	unsigned i;
91	int j;
92
93	addr = shader->cfs_count / 2;
94	for (i = 0; i < shader->cfs_count; i++) {
95		struct ir2_cf *cf = shader->cfs[i];
96		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
97			uint32_t sequence = 0;
98
99			if (cf->exec.addr && (cf->exec.addr != addr))
100				WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i);
101			if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count))
102				WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i);
103
104			for (j = cf->exec.instrs_count - 1; j >= 0; j--) {
105				struct ir2_instruction *instr = cf->exec.instrs[j];
106				sequence <<= 2;
107				if (instr->instr_type == IR2_FETCH)
108					sequence |= 0x1;
109				if (instr->sync)
110					sequence |= 0x2;
111			}
112
113			cf->exec.addr = addr;
114			cf->exec.cnt  = cf->exec.instrs_count;
115			cf->exec.sequence = sequence;
116
117			addr += cf->exec.instrs_count;
118		}
119	}
120
121	info->sizedwords = 3 * addr;
122
123	return 0;
124}
125
126void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info)
127{
128	uint32_t i, j;
129	uint32_t *ptr, *dwords = NULL;
130	uint32_t idx = 0;
131	int ret;
132
133	info->sizedwords    = 0;
134	info->max_reg       = -1;
135	info->max_input_reg = 0;
136	info->regs_written  = 0;
137
138	/* we need an even # of CF's.. insert a NOP if needed */
139	if (shader->cfs_count != align(shader->cfs_count, 2))
140		ir2_cf_create(shader, NOP);
141
142	/* first pass, resolve sizes and addresses: */
143	ret = shader_resolve(shader, info);
144	if (ret) {
145		ERROR_MSG("resolve failed: %d", ret);
146		goto fail;
147	}
148
149	ptr = dwords = calloc(4, info->sizedwords);
150
151	/* second pass, emit CF program in pairs: */
152	for (i = 0; i < shader->cfs_count; i += 2) {
153		instr_cf_t *cfs = (instr_cf_t *)ptr;
154		ret = cf_emit(shader->cfs[i], &cfs[0]);
155		if (ret) {
156			ERROR_MSG("CF emit failed: %d\n", ret);
157			goto fail;
158		}
159		ret = cf_emit(shader->cfs[i+1], &cfs[1]);
160		if (ret) {
161			ERROR_MSG("CF emit failed: %d\n", ret);
162			goto fail;
163		}
164		ptr += 3;
165		assert((ptr - dwords) <= info->sizedwords);
166	}
167
168	/* third pass, emit ALU/FETCH: */
169	for (i = 0; i < shader->cfs_count; i++) {
170		struct ir2_cf *cf = shader->cfs[i];
171		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
172			for (j = 0; j < cf->exec.instrs_count; j++) {
173				ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info);
174				if (ret) {
175					ERROR_MSG("instruction emit failed: %d", ret);
176					goto fail;
177				}
178				ptr += 3;
179				assert((ptr - dwords) <= info->sizedwords);
180			}
181		}
182	}
183
184	return dwords;
185
186fail:
187	free(dwords);
188	return NULL;
189}
190
191
192struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type)
193{
194	struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf));
195	DEBUG_MSG("%d", cf_type);
196	cf->shader = shader;
197	cf->cf_type = cf_type;
198	assert(shader->cfs_count < ARRAY_SIZE(shader->cfs));
199	shader->cfs[shader->cfs_count++] = cf;
200	return cf;
201}
202
203
204/*
205 * CF instructions:
206 */
207
208static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr)
209{
210	memset(instr, 0, sizeof(*instr));
211
212	instr->opc = cf->cf_type;
213
214	switch (cf->cf_type) {
215	case NOP:
216		break;
217	case EXEC:
218	case EXEC_END:
219		assert(cf->exec.addr <= 0x1ff);
220		assert(cf->exec.cnt <= 0x6);
221		assert(cf->exec.sequence <= 0xfff);
222		instr->exec.address = cf->exec.addr;
223		instr->exec.count = cf->exec.cnt;
224		instr->exec.serialize = cf->exec.sequence;
225		break;
226	case ALLOC:
227		assert(cf->alloc.size <= 0xf);
228		instr->alloc.size = cf->alloc.size;
229		switch (cf->alloc.type) {
230		case SQ_POSITION:
231		case SQ_PARAMETER_PIXEL:
232			instr->alloc.buffer_select = cf->alloc.type;
233			break;
234		default:
235			ERROR_MSG("invalid alloc type: %d", cf->alloc.type);
236			return -1;
237		}
238		break;
239	case COND_EXEC:
240	case COND_EXEC_END:
241	case COND_PRED_EXEC:
242	case COND_PRED_EXEC_END:
243	case LOOP_START:
244	case LOOP_END:
245	case COND_CALL:
246	case RETURN:
247	case COND_JMP:
248	case COND_EXEC_PRED_CLEAN:
249	case COND_EXEC_PRED_CLEAN_END:
250	case MARK_VS_FETCH_DONE:
251		ERROR_MSG("TODO");
252		return -1;
253	}
254
255	return 0;
256}
257
258
259struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type)
260{
261	struct ir2_instruction *instr =
262			ir2_alloc(cf->shader, sizeof(struct ir2_instruction));
263	DEBUG_MSG("%d", instr_type);
264	instr->shader = cf->shader;
265	instr->pred = cf->shader->pred;
266	instr->instr_type = instr_type;
267	assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs));
268	cf->exec.instrs[cf->exec.instrs_count++] = instr;
269	return instr;
270}
271
272
273/*
274 * FETCH instructions:
275 */
276
277static int instr_emit_fetch(struct ir2_instruction *instr,
278		uint32_t *dwords, uint32_t idx,
279		struct ir2_shader_info *info)
280{
281	instr_fetch_t *fetch = (instr_fetch_t *)dwords;
282	int reg = 0;
283	struct ir2_register *dst_reg = instr->regs[reg++];
284	struct ir2_register *src_reg = instr->regs[reg++];
285
286	memset(fetch, 0, sizeof(*fetch));
287
288	reg_update_stats(dst_reg, info, true);
289	reg_update_stats(src_reg, info, false);
290
291	fetch->opc = instr->fetch.opc;
292
293	if (instr->fetch.opc == VTX_FETCH) {
294		instr_fetch_vtx_t *vtx = &fetch->vtx;
295
296		assert(instr->fetch.stride <= 0xff);
297		assert(instr->fetch.fmt <= 0x3f);
298		assert(instr->fetch.const_idx <= 0x1f);
299		assert(instr->fetch.const_idx_sel <= 0x3);
300
301		vtx->src_reg = src_reg->num;
302		vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
303		vtx->dst_reg = dst_reg->num;
304		vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
305		vtx->must_be_one = 1;
306		vtx->const_index = instr->fetch.const_idx;
307		vtx->const_index_sel = instr->fetch.const_idx_sel;
308		vtx->format_comp_all = !!instr->fetch.is_signed;
309		vtx->num_format_all = !instr->fetch.is_normalized;
310		vtx->format = instr->fetch.fmt;
311		vtx->stride = instr->fetch.stride;
312		vtx->offset = instr->fetch.offset;
313
314		if (instr->pred != IR2_PRED_NONE) {
315			vtx->pred_select = 1;
316			vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
317		}
318
319		/* XXX seems like every FETCH but the first has
320		 * this bit set:
321		 */
322		vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
323		vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
324	} else if (instr->fetch.opc == TEX_FETCH) {
325		instr_fetch_tex_t *tex = &fetch->tex;
326
327		assert(instr->fetch.const_idx <= 0x1f);
328
329		tex->src_reg = src_reg->num;
330		tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
331		tex->dst_reg = dst_reg->num;
332		tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
333		tex->const_idx = instr->fetch.const_idx;
334		tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
335		tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
336		tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
337		tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
338		tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
339		tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
340		tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
341		tex->use_comp_lod = 1;
342		tex->use_reg_lod = !instr->fetch.is_cube;
343		tex->sample_location = SAMPLE_CENTER;
344
345		if (instr->pred != IR2_PRED_NONE) {
346			tex->pred_select = 1;
347			tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
348		}
349
350	} else {
351		ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
352		return -1;
353	}
354
355	return 0;
356}
357
358/*
359 * ALU instructions:
360 */
361
362static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords,
363		struct ir2_shader_info *info)
364{
365	int reg = 0;
366	instr_alu_t *alu = (instr_alu_t *)dwords;
367	struct ir2_register *dst_reg  = instr->regs[reg++];
368	struct ir2_register *src1_reg;
369	struct ir2_register *src2_reg;
370	struct ir2_register *src3_reg;
371
372	memset(alu, 0, sizeof(*alu));
373
374	/* handle instructions w/ 3 src operands: */
375	switch (instr->alu.vector_opc) {
376	case MULADDv:
377	case CNDEv:
378	case CNDGTEv:
379	case CNDGTv:
380	case DOT2ADDv:
381		/* note: disassembler lists 3rd src first, ie:
382		 *   MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
383		 * which is the reason for this strange ordering.
384		 */
385		src3_reg = instr->regs[reg++];
386		break;
387	default:
388		src3_reg = NULL;
389		break;
390	}
391
392	src1_reg = instr->regs[reg++];
393	src2_reg = instr->regs[reg++];
394
395	reg_update_stats(dst_reg, info, true);
396	reg_update_stats(src1_reg, info, false);
397	reg_update_stats(src2_reg, info, false);
398
399	assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0);
400	assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4));
401	assert((src1_reg->flags & IR2_REG_EXPORT) == 0);
402	assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4));
403	assert((src2_reg->flags & IR2_REG_EXPORT) == 0);
404	assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
405
406	if (instr->alu.vector_opc == (instr_vector_opc_t)~0) {
407		alu->vector_opc          = MAXv;
408		alu->vector_write_mask   = 0;
409	} else {
410		alu->vector_opc          = instr->alu.vector_opc;
411		alu->vector_write_mask   = reg_alu_dst_swiz(dst_reg);
412	}
413
414	alu->vector_dest         = dst_reg->num;
415	alu->export_data         = !!(dst_reg->flags & IR2_REG_EXPORT);
416
417	// TODO predicate case/condition.. need to add to parser
418
419	alu->src2_reg            = src2_reg->num;
420	alu->src2_swiz           = reg_alu_src_swiz(src2_reg);
421	alu->src2_reg_negate     = !!(src2_reg->flags & IR2_REG_NEGATE);
422	alu->src2_reg_abs        = !!(src2_reg->flags & IR2_REG_ABS);
423	alu->src2_sel            = !(src2_reg->flags & IR2_REG_CONST);
424
425	alu->src1_reg            = src1_reg->num;
426	alu->src1_swiz           = reg_alu_src_swiz(src1_reg);
427	alu->src1_reg_negate     = !!(src1_reg->flags & IR2_REG_NEGATE);
428	alu->src1_reg_abs        = !!(src1_reg->flags & IR2_REG_ABS);
429	alu->src1_sel            = !(src1_reg->flags & IR2_REG_CONST);
430
431	alu->vector_clamp        = instr->alu.vector_clamp;
432	alu->scalar_clamp        = instr->alu.scalar_clamp;
433
434	if (instr->alu.scalar_opc != (instr_scalar_opc_t)~0) {
435		struct ir2_register *sdst_reg = instr->regs[reg++];
436
437		reg_update_stats(sdst_reg, info, true);
438
439		assert(sdst_reg->flags == dst_reg->flags);
440
441		if (src3_reg) {
442			assert(src3_reg == instr->regs[reg]);
443			reg++;
444		} else {
445			src3_reg = instr->regs[reg++];
446		}
447
448		alu->scalar_dest         = sdst_reg->num;
449		alu->scalar_write_mask   = reg_alu_dst_swiz(sdst_reg);
450		alu->scalar_opc          = instr->alu.scalar_opc;
451	} else {
452		/* not sure if this is required, but adreno compiler seems
453		 * to always set scalar opc to MAXs if it is not used:
454		 */
455		alu->scalar_opc = MAXs;
456	}
457
458	if (src3_reg) {
459		reg_update_stats(src3_reg, info, false);
460
461		alu->src3_reg            = src3_reg->num;
462		alu->src3_swiz           = reg_alu_src_swiz(src3_reg);
463		alu->src3_reg_negate     = !!(src3_reg->flags & IR2_REG_NEGATE);
464		alu->src3_reg_abs        = !!(src3_reg->flags & IR2_REG_ABS);
465		alu->src3_sel            = !(src3_reg->flags & IR2_REG_CONST);
466	} else {
467		/* not sure if this is required, but adreno compiler seems
468		 * to always set register bank for 3rd src if unused:
469		 */
470		alu->src3_sel = 1;
471	}
472
473	if (instr->pred != IR2_PRED_NONE) {
474		alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2;
475	}
476
477	return 0;
478}
479
480static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
481		uint32_t idx, struct ir2_shader_info *info)
482{
483	switch (instr->instr_type) {
484	case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
485	case IR2_ALU:   return instr_emit_alu(instr, dwords, info);
486	}
487	return -1;
488}
489
490
491struct ir2_register * ir2_reg_create(struct ir2_instruction *instr,
492		int num, const char *swizzle, int flags)
493{
494	struct ir2_register *reg =
495			ir2_alloc(instr->shader, sizeof(struct ir2_register));
496	DEBUG_MSG("%x, %d, %s", flags, num, swizzle);
497	assert(num <= REG_MASK);
498	reg->flags = flags;
499	reg->num = num;
500	reg->swizzle = ir2_strdup(instr->shader, swizzle);
501	assert(instr->regs_count < ARRAY_SIZE(instr->regs));
502	instr->regs[instr->regs_count++] = reg;
503	return reg;
504}
505
506static void reg_update_stats(struct ir2_register *reg,
507		struct ir2_shader_info *info, bool dest)
508{
509	if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) {
510		info->max_reg = MAX2(info->max_reg, reg->num);
511
512		if (dest) {
513			info->regs_written |= (1 << reg->num);
514		} else if (!(info->regs_written & (1 << reg->num))) {
515			/* for registers that haven't been written, they must be an
516			 * input register that the thread scheduler (presumably?)
517			 * needs to know about:
518			 */
519			info->max_input_reg = MAX2(info->max_input_reg, reg->num);
520		}
521	}
522}
523
524static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n)
525{
526	uint32_t swiz = 0;
527	int i;
528
529	assert(reg->flags == 0);
530	assert(reg->swizzle);
531
532	DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
533
534	for (i = n-1; i >= 0; i--) {
535		swiz <<= 2;
536		switch (reg->swizzle[i]) {
537		default:
538			ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
539		case 'x': swiz |= 0x0; break;
540		case 'y': swiz |= 0x1; break;
541		case 'z': swiz |= 0x2; break;
542		case 'w': swiz |= 0x3; break;
543		}
544	}
545
546	return swiz;
547}
548
549static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg)
550{
551	uint32_t swiz = 0;
552	int i;
553
554	assert(reg->flags == 0);
555	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
556
557	DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
558
559	if (reg->swizzle) {
560		for (i = 3; i >= 0; i--) {
561			swiz <<= 3;
562			switch (reg->swizzle[i]) {
563			default:
564				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
565			case 'x': swiz |= 0x0; break;
566			case 'y': swiz |= 0x1; break;
567			case 'z': swiz |= 0x2; break;
568			case 'w': swiz |= 0x3; break;
569			case '0': swiz |= 0x4; break;
570			case '1': swiz |= 0x5; break;
571			case '_': swiz |= 0x7; break;
572			}
573		}
574	} else {
575		swiz = 0x688;
576	}
577
578	return swiz;
579}
580
581/* actually, a write-mask */
582static uint32_t reg_alu_dst_swiz(struct ir2_register *reg)
583{
584	uint32_t swiz = 0;
585	int i;
586
587	assert((reg->flags & ~IR2_REG_EXPORT) == 0);
588	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
589
590	DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
591
592	if (reg->swizzle) {
593		for (i = 3; i >= 0; i--) {
594			swiz <<= 1;
595			if (reg->swizzle[i] == "xyzw"[i]) {
596				swiz |= 0x1;
597			} else if (reg->swizzle[i] != '_') {
598				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
599				break;
600			}
601		}
602	} else {
603		swiz = 0xf;
604	}
605
606	return swiz;
607}
608
609static uint32_t reg_alu_src_swiz(struct ir2_register *reg)
610{
611	uint32_t swiz = 0;
612	int i;
613
614	assert((reg->flags & IR2_REG_EXPORT) == 0);
615	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
616
617	DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
618
619	if (reg->swizzle) {
620		for (i = 3; i >= 0; i--) {
621			swiz <<= 2;
622			switch (reg->swizzle[i]) {
623			default:
624				ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
625			case 'x': swiz |= (0x0 - i) & 0x3; break;
626			case 'y': swiz |= (0x1 - i) & 0x3; break;
627			case 'z': swiz |= (0x2 - i) & 0x3; break;
628			case 'w': swiz |= (0x3 - i) & 0x3; break;
629			}
630		}
631	} else {
632		swiz = 0x0;
633	}
634
635	return swiz;
636}
637