1/*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "ir3.h"
25
26#include <stdlib.h>
27#include <stdio.h>
28#include <string.h>
29#include <assert.h>
30#include <stdbool.h>
31#include <errno.h>
32
33#include "util/ralloc.h"
34
35#include "freedreno_util.h"
36#include "instr-a3xx.h"
37
38/* simple allocator to carve allocations out of an up-front allocated heap,
39 * so that we can free everything easily in one shot.
40 */
41void * ir3_alloc(struct ir3 *shader, int sz)
42{
43	return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */
44}
45
46struct ir3 * ir3_create(struct ir3_compiler *compiler,
47		unsigned nin, unsigned nout)
48{
49	struct ir3 *shader = rzalloc(compiler, struct ir3);
50
51	shader->compiler = compiler;
52	shader->ninputs = nin;
53	shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin);
54
55	shader->noutputs = nout;
56	shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
57
58	list_inithead(&shader->block_list);
59	list_inithead(&shader->array_list);
60
61	return shader;
62}
63
64void ir3_destroy(struct ir3 *shader)
65{
66	/* TODO convert the dynamic array to ralloc too: */
67	free(shader->indirects);
68	free(shader->predicates);
69	free(shader->baryfs);
70	free(shader->keeps);
71	free(shader->astc_srgb);
72	ralloc_free(shader);
73}
74
75#define iassert(cond) do { \
76	if (!(cond)) { \
77		assert(cond); \
78		return -1; \
79	} } while (0)
80
81static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
82		uint32_t repeat, uint32_t valid_flags)
83{
84	reg_t val = { .dummy32 = 0 };
85
86	if (reg->flags & ~valid_flags) {
87		debug_printf("INVALID FLAGS: %x vs %x\n",
88				reg->flags, valid_flags);
89	}
90
91	if (!(reg->flags & IR3_REG_R))
92		repeat = 0;
93
94	if (reg->flags & IR3_REG_IMMED) {
95		val.iim_val = reg->iim_val;
96	} else {
97		unsigned components;
98		int16_t max;
99
100		if (reg->flags & IR3_REG_RELATIV) {
101			components = reg->size;
102			val.idummy10 = reg->array.offset;
103			max = (reg->array.offset + repeat + components - 1) >> 2;
104		} else {
105			components = util_last_bit(reg->wrmask);
106			val.comp = reg->num & 0x3;
107			val.num  = reg->num >> 2;
108			max = (reg->num + repeat + components - 1) >> 2;
109		}
110
111		if (reg->flags & IR3_REG_CONST) {
112			info->max_const = MAX2(info->max_const, max);
113		} else if (val.num == 63) {
114			/* ignore writes to dummy register r63.x */
115		} else if ((max != REG_A0) && (max != REG_P0)) {
116			if (reg->flags & IR3_REG_HALF) {
117				info->max_half_reg = MAX2(info->max_half_reg, max);
118			} else {
119				info->max_reg = MAX2(info->max_reg, max);
120			}
121		}
122	}
123
124	return val.dummy32;
125}
126
127static int emit_cat0(struct ir3_instruction *instr, void *ptr,
128		struct ir3_info *info)
129{
130	instr_cat0_t *cat0 = ptr;
131
132	if (info->gpu_id >= 500) {
133		cat0->a5xx.immed = instr->cat0.immed;
134	} else if (info->gpu_id >= 400) {
135		cat0->a4xx.immed = instr->cat0.immed;
136	} else {
137		cat0->a3xx.immed = instr->cat0.immed;
138	}
139	cat0->repeat   = instr->repeat;
140	cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
141	cat0->inv      = instr->cat0.inv;
142	cat0->comp     = instr->cat0.comp;
143	cat0->opc      = instr->opc;
144	cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
145	cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
146	cat0->opc_cat  = 0;
147
148	return 0;
149}
150
151static uint32_t type_flags(type_t type)
152{
153	return (type_size(type) == 32) ? 0 : IR3_REG_HALF;
154}
155
156static int emit_cat1(struct ir3_instruction *instr, void *ptr,
157		struct ir3_info *info)
158{
159	struct ir3_register *dst = instr->regs[0];
160	struct ir3_register *src = instr->regs[1];
161	instr_cat1_t *cat1 = ptr;
162
163	iassert(instr->regs_count == 2);
164	iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF));
165	iassert((src->flags & IR3_REG_IMMED) ||
166			!((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF));
167
168	if (src->flags & IR3_REG_IMMED) {
169		cat1->iim_val = src->iim_val;
170		cat1->src_im  = 1;
171	} else if (src->flags & IR3_REG_RELATIV) {
172		cat1->off       = reg(src, info, instr->repeat,
173				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV);
174		cat1->src_rel   = 1;
175		cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
176	} else {
177		cat1->src  = reg(src, info, instr->repeat,
178				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
179		cat1->src_c     = !!(src->flags & IR3_REG_CONST);
180	}
181
182	cat1->dst      = reg(dst, info, instr->repeat,
183			IR3_REG_RELATIV | IR3_REG_EVEN |
184			IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
185	cat1->repeat   = instr->repeat;
186	cat1->src_r    = !!(src->flags & IR3_REG_R);
187	cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
188	cat1->ul       = !!(instr->flags & IR3_INSTR_UL);
189	cat1->dst_type = instr->cat1.dst_type;
190	cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
191	cat1->src_type = instr->cat1.src_type;
192	cat1->even     = !!(dst->flags & IR3_REG_EVEN);
193	cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
194	cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
195	cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
196	cat1->opc_cat  = 1;
197
198	return 0;
199}
200
201static int emit_cat2(struct ir3_instruction *instr, void *ptr,
202		struct ir3_info *info)
203{
204	struct ir3_register *dst = instr->regs[0];
205	struct ir3_register *src1 = instr->regs[1];
206	struct ir3_register *src2 = instr->regs[2];
207	instr_cat2_t *cat2 = ptr;
208	unsigned absneg = ir3_cat2_absneg(instr->opc);
209
210	iassert((instr->regs_count == 2) || (instr->regs_count == 3));
211
212	if (src1->flags & IR3_REG_RELATIV) {
213		iassert(src1->array.offset < (1 << 10));
214		cat2->rel1.src1      = reg(src1, info, instr->repeat,
215				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
216				IR3_REG_HALF | absneg);
217		cat2->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
218		cat2->rel1.src1_rel  = 1;
219	} else if (src1->flags & IR3_REG_CONST) {
220		iassert(src1->num < (1 << 12));
221		cat2->c1.src1   = reg(src1, info, instr->repeat,
222				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
223		cat2->c1.src1_c = 1;
224	} else {
225		iassert(src1->num < (1 << 11));
226		cat2->src1 = reg(src1, info, instr->repeat,
227				IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
228				absneg);
229	}
230	cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
231	cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
232	cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
233	cat2->src1_r   = !!(src1->flags & IR3_REG_R);
234
235	if (src2) {
236		iassert((src2->flags & IR3_REG_IMMED) ||
237				!((src1->flags ^ src2->flags) & IR3_REG_HALF));
238
239		if (src2->flags & IR3_REG_RELATIV) {
240			iassert(src2->array.offset < (1 << 10));
241			cat2->rel2.src2      = reg(src2, info, instr->repeat,
242					IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
243					IR3_REG_HALF | absneg);
244			cat2->rel2.src2_c    = !!(src2->flags & IR3_REG_CONST);
245			cat2->rel2.src2_rel  = 1;
246		} else if (src2->flags & IR3_REG_CONST) {
247			iassert(src2->num < (1 << 12));
248			cat2->c2.src2   = reg(src2, info, instr->repeat,
249					IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
250			cat2->c2.src2_c = 1;
251		} else {
252			iassert(src2->num < (1 << 11));
253			cat2->src2 = reg(src2, info, instr->repeat,
254					IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
255					absneg);
256		}
257
258		cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
259		cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
260		cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
261		cat2->src2_r   = !!(src2->flags & IR3_REG_R);
262	}
263
264	cat2->dst      = reg(dst, info, instr->repeat,
265			IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
266	cat2->repeat   = instr->repeat;
267	cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
268	cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
269	cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
270	cat2->ei       = !!(dst->flags & IR3_REG_EI);
271	cat2->cond     = instr->cat2.condition;
272	cat2->full     = ! (src1->flags & IR3_REG_HALF);
273	cat2->opc      = instr->opc;
274	cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
275	cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
276	cat2->opc_cat  = 2;
277
278	return 0;
279}
280
281static int emit_cat3(struct ir3_instruction *instr, void *ptr,
282		struct ir3_info *info)
283{
284	struct ir3_register *dst = instr->regs[0];
285	struct ir3_register *src1 = instr->regs[1];
286	struct ir3_register *src2 = instr->regs[2];
287	struct ir3_register *src3 = instr->regs[3];
288	unsigned absneg = ir3_cat3_absneg(instr->opc);
289	instr_cat3_t *cat3 = ptr;
290	uint32_t src_flags = 0;
291
292	switch (instr->opc) {
293	case OPC_MAD_F16:
294	case OPC_MAD_U16:
295	case OPC_MAD_S16:
296	case OPC_SEL_B16:
297	case OPC_SEL_S16:
298	case OPC_SEL_F16:
299	case OPC_SAD_S16:
300	case OPC_SAD_S32:  // really??
301		src_flags |= IR3_REG_HALF;
302		break;
303	default:
304		break;
305	}
306
307	iassert(instr->regs_count == 4);
308	iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
309	iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
310	iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
311
312	if (src1->flags & IR3_REG_RELATIV) {
313		iassert(src1->array.offset < (1 << 10));
314		cat3->rel1.src1      = reg(src1, info, instr->repeat,
315				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
316				IR3_REG_HALF | absneg);
317		cat3->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
318		cat3->rel1.src1_rel  = 1;
319	} else if (src1->flags & IR3_REG_CONST) {
320		iassert(src1->num < (1 << 12));
321		cat3->c1.src1   = reg(src1, info, instr->repeat,
322				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
323		cat3->c1.src1_c = 1;
324	} else {
325		iassert(src1->num < (1 << 11));
326		cat3->src1 = reg(src1, info, instr->repeat,
327				IR3_REG_R | IR3_REG_HALF | absneg);
328	}
329
330	cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
331	cat3->src1_r   = !!(src1->flags & IR3_REG_R);
332
333	cat3->src2     = reg(src2, info, instr->repeat,
334			IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
335	cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
336	cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
337	cat3->src2_r   = !!(src2->flags & IR3_REG_R);
338
339
340	if (src3->flags & IR3_REG_RELATIV) {
341		iassert(src3->array.offset < (1 << 10));
342		cat3->rel2.src3      = reg(src3, info, instr->repeat,
343				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
344				IR3_REG_HALF | absneg);
345		cat3->rel2.src3_c    = !!(src3->flags & IR3_REG_CONST);
346		cat3->rel2.src3_rel  = 1;
347	} else if (src3->flags & IR3_REG_CONST) {
348		iassert(src3->num < (1 << 12));
349		cat3->c2.src3   = reg(src3, info, instr->repeat,
350				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
351		cat3->c2.src3_c = 1;
352	} else {
353		iassert(src3->num < (1 << 11));
354		cat3->src3 = reg(src3, info, instr->repeat,
355				IR3_REG_R | IR3_REG_HALF | absneg);
356	}
357
358	cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
359	cat3->src3_r   = !!(src3->flags & IR3_REG_R);
360
361	cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
362	cat3->repeat   = instr->repeat;
363	cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
364	cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
365	cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
366	cat3->opc      = instr->opc;
367	cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
368	cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
369	cat3->opc_cat  = 3;
370
371	return 0;
372}
373
374static int emit_cat4(struct ir3_instruction *instr, void *ptr,
375		struct ir3_info *info)
376{
377	struct ir3_register *dst = instr->regs[0];
378	struct ir3_register *src = instr->regs[1];
379	instr_cat4_t *cat4 = ptr;
380
381	iassert(instr->regs_count == 2);
382
383	if (src->flags & IR3_REG_RELATIV) {
384		iassert(src->array.offset < (1 << 10));
385		cat4->rel.src      = reg(src, info, instr->repeat,
386				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
387				IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
388		cat4->rel.src_c    = !!(src->flags & IR3_REG_CONST);
389		cat4->rel.src_rel  = 1;
390	} else if (src->flags & IR3_REG_CONST) {
391		iassert(src->num < (1 << 12));
392		cat4->c.src   = reg(src, info, instr->repeat,
393				IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS |
394				IR3_REG_R | IR3_REG_HALF);
395		cat4->c.src_c = 1;
396	} else {
397		iassert(src->num < (1 << 11));
398		cat4->src = reg(src, info, instr->repeat,
399				IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
400				IR3_REG_R | IR3_REG_HALF);
401	}
402
403	cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
404	cat4->src_neg  = !!(src->flags & IR3_REG_FNEG);
405	cat4->src_abs  = !!(src->flags & IR3_REG_FABS);
406	cat4->src_r    = !!(src->flags & IR3_REG_R);
407
408	cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
409	cat4->repeat   = instr->repeat;
410	cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
411	cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
412	cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
413	cat4->full     = ! (src->flags & IR3_REG_HALF);
414	cat4->opc      = instr->opc;
415	cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
416	cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
417	cat4->opc_cat  = 4;
418
419	return 0;
420}
421
422static int emit_cat5(struct ir3_instruction *instr, void *ptr,
423		struct ir3_info *info)
424{
425	struct ir3_register *dst = instr->regs[0];
426	struct ir3_register *src1 = instr->regs[1];
427	struct ir3_register *src2 = instr->regs[2];
428	struct ir3_register *src3 = instr->regs[3];
429	instr_cat5_t *cat5 = ptr;
430
431	iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF));
432
433	assume(src1 || !src2);
434	assume(src2 || !src3);
435
436	if (src1) {
437		cat5->full = ! (src1->flags & IR3_REG_HALF);
438		cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
439	}
440
441	if (instr->flags & IR3_INSTR_S2EN) {
442		if (src2) {
443			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
444			cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
445		}
446		if (src3) {
447			iassert(src3->flags & IR3_REG_HALF);
448			cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF);
449		}
450		iassert(!(instr->cat5.samp | instr->cat5.tex));
451	} else {
452		iassert(!src3);
453		if (src2) {
454			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
455			cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
456		}
457		cat5->norm.samp = instr->cat5.samp;
458		cat5->norm.tex  = instr->cat5.tex;
459	}
460
461	cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
462	cat5->wrmask   = dst->wrmask;
463	cat5->type     = instr->cat5.type;
464	cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
465	cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
466	cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
467	cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
468	cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
469	cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
470	cat5->opc      = instr->opc;
471	cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
472	cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
473	cat5->opc_cat  = 5;
474
475	return 0;
476}
477
478static int emit_cat6(struct ir3_instruction *instr, void *ptr,
479		struct ir3_info *info)
480{
481	struct ir3_register *dst, *src1, *src2;
482	instr_cat6_t *cat6 = ptr;
483
484	/* the "dst" for a store instruction is (from the perspective
485	 * of data flow in the shader, ie. register use/def, etc) in
486	 * fact a register that is read by the instruction, rather
487	 * than written:
488	 */
489	if (is_store(instr)) {
490		iassert(instr->regs_count >= 3);
491
492		dst  = instr->regs[1];
493		src1 = instr->regs[2];
494		src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
495	} else {
496		iassert(instr->regs_count >= 2);
497
498		dst  = instr->regs[0];
499		src1 = instr->regs[1];
500		src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
501	}
502
503
504	/* TODO we need a more comprehensive list about which instructions
505	 * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
506	 * indicate to use the src_off encoding even if offset is zero
507	 * (but then what to do about dst_off?)
508	 */
509	if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
510		instr_cat6a_t *cat6a = ptr;
511
512		cat6->src_off = true;
513
514		cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
515		cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
516		if (src2) {
517			cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
518			cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
519		}
520		cat6a->off = instr->cat6.src_offset;
521	} else {
522		instr_cat6b_t *cat6b = ptr;
523
524		cat6->src_off = false;
525
526		cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
527		cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
528		if (src2) {
529			cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
530			cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED);
531		}
532	}
533
534	if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) {
535		instr_cat6c_t *cat6c = ptr;
536		cat6->dst_off = true;
537		cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
538		cat6c->off = instr->cat6.dst_offset;
539	} else {
540		instr_cat6d_t *cat6d = ptr;
541		cat6->dst_off = false;
542		cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
543	}
544
545	cat6->type     = instr->cat6.type;
546	cat6->opc      = instr->opc;
547	cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
548	cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
549	cat6->g        = !!(instr->flags & IR3_INSTR_G);
550	cat6->opc_cat  = 6;
551
552	return 0;
553}
554
555static int (*emit[])(struct ir3_instruction *instr, void *ptr,
556		struct ir3_info *info) = {
557	emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
558};
559
560void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
561		uint32_t gpu_id)
562{
563	uint32_t *ptr, *dwords;
564
565	info->gpu_id        = gpu_id;
566	info->max_reg       = -1;
567	info->max_half_reg  = -1;
568	info->max_const     = -1;
569	info->instrs_count  = 0;
570	info->sizedwords    = 0;
571
572	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
573		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
574			info->sizedwords += 2;
575		}
576	}
577
578	/* need an integer number of instruction "groups" (sets of 16
579	 * instructions on a4xx or sets of 4 instructions on a3xx),
580	 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
581	 */
582	if (gpu_id >= 400) {
583		info->sizedwords = align(info->sizedwords, 16 * 2);
584	} else {
585		info->sizedwords = align(info->sizedwords, 4 * 2);
586	}
587
588	ptr = dwords = calloc(4, info->sizedwords);
589
590	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
591		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
592			int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
593			if (ret)
594				goto fail;
595			info->instrs_count += 1 + instr->repeat;
596			dwords += 2;
597		}
598	}
599
600	return ptr;
601
602fail:
603	free(ptr);
604	return NULL;
605}
606
607static struct ir3_register * reg_create(struct ir3 *shader,
608		int num, int flags)
609{
610	struct ir3_register *reg =
611			ir3_alloc(shader, sizeof(struct ir3_register));
612	reg->wrmask = 1;
613	reg->flags = flags;
614	reg->num = num;
615	return reg;
616}
617
618static void insert_instr(struct ir3_block *block,
619		struct ir3_instruction *instr)
620{
621	struct ir3 *shader = block->shader;
622#ifdef DEBUG
623	static uint32_t serialno = 0;
624	instr->serialno = ++serialno;
625#endif
626	list_addtail(&instr->node, &block->instr_list);
627
628	if (is_input(instr))
629		array_insert(shader->baryfs, instr);
630}
631
632struct ir3_block * ir3_block_create(struct ir3 *shader)
633{
634	struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
635#ifdef DEBUG
636	static uint32_t serialno = 0;
637	block->serialno = ++serialno;
638#endif
639	block->shader = shader;
640	list_inithead(&block->node);
641	list_inithead(&block->instr_list);
642	return block;
643}
644
645static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg)
646{
647	struct ir3_instruction *instr;
648	unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0]));
649	char *ptr = ir3_alloc(block->shader, sz);
650
651	instr = (struct ir3_instruction *)ptr;
652	ptr  += sizeof(*instr);
653	instr->regs = (struct ir3_register **)ptr;
654
655#ifdef DEBUG
656	instr->regs_max = nreg;
657#endif
658
659	return instr;
660}
661
662struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
663		opc_t opc, int nreg)
664{
665	struct ir3_instruction *instr = instr_create(block, nreg);
666	instr->block = block;
667	instr->opc = opc;
668	insert_instr(block, instr);
669	return instr;
670}
671
672struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc)
673{
674	/* NOTE: we could be slightly more clever, at least for non-meta,
675	 * and choose # of regs based on category.
676	 */
677	return ir3_instr_create2(block, opc, 4);
678}
679
680struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
681{
682	struct ir3_instruction *new_instr = instr_create(instr->block,
683			instr->regs_count);
684	struct ir3_register **regs;
685	unsigned i;
686
687	regs = new_instr->regs;
688	*new_instr = *instr;
689	new_instr->regs = regs;
690
691	insert_instr(instr->block, new_instr);
692
693	/* clone registers: */
694	new_instr->regs_count = 0;
695	for (i = 0; i < instr->regs_count; i++) {
696		struct ir3_register *reg = instr->regs[i];
697		struct ir3_register *new_reg =
698				ir3_reg_create(new_instr, reg->num, reg->flags);
699		*new_reg = *reg;
700	}
701
702	return new_instr;
703}
704
705struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
706		int num, int flags)
707{
708	struct ir3 *shader = instr->block->shader;
709	struct ir3_register *reg = reg_create(shader, num, flags);
710#ifdef DEBUG
711	debug_assert(instr->regs_count < instr->regs_max);
712#endif
713	instr->regs[instr->regs_count++] = reg;
714	return reg;
715}
716
717struct ir3_register * ir3_reg_clone(struct ir3 *shader,
718		struct ir3_register *reg)
719{
720	struct ir3_register *new_reg = reg_create(shader, 0, 0);
721	*new_reg = *reg;
722	return new_reg;
723}
724
725void
726ir3_instr_set_address(struct ir3_instruction *instr,
727		struct ir3_instruction *addr)
728{
729	if (instr->address != addr) {
730		struct ir3 *ir = instr->block->shader;
731		instr->address = addr;
732		array_insert(ir->indirects, instr);
733	}
734}
735
736void
737ir3_block_clear_mark(struct ir3_block *block)
738{
739	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
740		instr->flags &= ~IR3_INSTR_MARK;
741}
742
743void
744ir3_clear_mark(struct ir3 *ir)
745{
746	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
747		ir3_block_clear_mark(block);
748	}
749}
750
751/* note: this will destroy instr->depth, don't do it until after sched! */
752unsigned
753ir3_count_instructions(struct ir3 *ir)
754{
755	unsigned cnt = 0;
756	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
757		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
758			instr->ip = cnt++;
759		}
760		block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
761		block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
762	}
763	return cnt;
764}
765
766struct ir3_array *
767ir3_lookup_array(struct ir3 *ir, unsigned id)
768{
769	list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
770		if (arr->id == id)
771			return arr;
772	return NULL;
773}
774