nv50_program.c revision 7df7f7bb99441ed8e2fba2840e0459e72691f272
1#include "pipe/p_context.h"
2#include "pipe/p_defines.h"
3#include "pipe/p_state.h"
4#include "pipe/p_inlines.h"
5
6#include "pipe/p_shader_tokens.h"
7#include "tgsi/util/tgsi_parse.h"
8#include "tgsi/util/tgsi_util.h"
9
10#include "nv50_context.h"
11#include "nv50_state.h"
12
13#define NV50_SU_MAX_TEMP 64
14
15struct nv50_reg {
16	enum {
17		P_TEMP,
18		P_ATTR,
19		P_RESULT,
20		P_CONST,
21		P_IMMD
22	} type;
23	int index;
24
25	int hw;
26	int neg;
27};
28
29struct nv50_pc {
30	struct nv50_program *p;
31
32	/* hw resources */
33	struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
34
35	/* tgsi resources */
36	struct nv50_reg *temp;
37	int temp_nr;
38	struct nv50_reg *attr;
39	int attr_nr;
40	struct nv50_reg *result;
41	int result_nr;
42	struct nv50_reg *param;
43	int param_nr;
44	struct nv50_reg *immd;
45	float *immd_buf;
46	int immd_nr;
47
48	struct nv50_reg *temp_temp[8];
49	unsigned temp_temp_nr;
50};
51
52static void
53alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
54{
55	int i;
56
57	if (reg->type != P_TEMP)
58		return;
59
60	if (reg->hw >= 0) {
61		/*XXX: do this here too to catch FP temp-as-attr usage..
62		 *     not clean, but works */
63		if (pc->p->cfg.high_temp < (reg->hw + 1))
64			pc->p->cfg.high_temp = reg->hw + 1;
65		return;
66	}
67
68	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
69		if (!(pc->r_temp[i])) {
70			pc->r_temp[i] = reg;
71			reg->hw = i;
72			if (pc->p->cfg.high_temp < (i + 1))
73				pc->p->cfg.high_temp = i + 1;
74			return;
75		}
76	}
77
78	assert(0);
79}
80
81static struct nv50_reg *
82alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
83{
84	struct nv50_reg *r;
85	int i;
86
87	if (dst && dst->type == P_TEMP && dst->hw == -1)
88		return dst;
89
90	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
91		if (!pc->r_temp[i]) {
92			r = CALLOC_STRUCT(nv50_reg);
93			r->type = P_TEMP;
94			r->index = -1;
95			r->hw = i;
96			pc->r_temp[i] = r;
97			return r;
98		}
99	}
100
101	assert(0);
102	return NULL;
103}
104
105static void
106free_temp(struct nv50_pc *pc, struct nv50_reg *r)
107{
108	if (r->index == -1) {
109		FREE(pc->r_temp[r->hw]);
110		pc->r_temp[r->hw] = NULL;
111	}
112}
113
114static struct nv50_reg *
115temp_temp(struct nv50_pc *pc)
116{
117	if (pc->temp_temp_nr >= 8)
118		assert(0);
119
120	pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
121	return pc->temp_temp[pc->temp_temp_nr++];
122}
123
124static void
125kill_temp_temp(struct nv50_pc *pc)
126{
127	int i;
128
129	for (i = 0; i < pc->temp_temp_nr; i++)
130		free_temp(pc, pc->temp_temp[i]);
131	pc->temp_temp_nr = 0;
132}
133
134static struct nv50_reg *
135tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
136{
137	switch (dst->DstRegister.File) {
138	case TGSI_FILE_TEMPORARY:
139		return &pc->temp[dst->DstRegister.Index * 4 + c];
140	case TGSI_FILE_OUTPUT:
141		return &pc->result[dst->DstRegister.Index * 4 + c];
142	case TGSI_FILE_NULL:
143		return NULL;
144	default:
145		break;
146	}
147
148	return NULL;
149}
150
151static struct nv50_reg *
152tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src)
153{
154	/* Handle swizzling */
155	switch (c) {
156	case 0: c = src->SrcRegister.SwizzleX; break;
157	case 1: c = src->SrcRegister.SwizzleY; break;
158	case 2: c = src->SrcRegister.SwizzleZ; break;
159	case 3: c = src->SrcRegister.SwizzleW; break;
160	default:
161		assert(0);
162	}
163
164	switch (src->SrcRegister.File) {
165	case TGSI_FILE_INPUT:
166		return &pc->attr[src->SrcRegister.Index * 4 + c];
167	case TGSI_FILE_TEMPORARY:
168		return &pc->temp[src->SrcRegister.Index * 4 + c];
169	case TGSI_FILE_CONSTANT:
170		return &pc->param[src->SrcRegister.Index * 4 + c];
171	case TGSI_FILE_IMMEDIATE:
172		return &pc->immd[src->SrcRegister.Index * 4 + c];
173	default:
174		break;
175	}
176
177	return NULL;
178}
179
180static void
181emit(struct nv50_pc *pc, unsigned *inst)
182{
183	struct nv50_program *p = pc->p;
184
185       if (inst[0] & 1) {
186               p->insns_nr += 2;
187               p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
188               memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
189       } else {
190               p->insns_nr += 1;
191               p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
192               memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
193       }
194}
195
196static INLINE void set_long(struct nv50_pc *, unsigned *);
197
198static boolean
199is_long(unsigned *inst)
200{
201	if (inst[0] & 1)
202		return TRUE;
203	return FALSE;
204}
205
206static boolean
207is_immd(unsigned *inst)
208{
209	if (is_long(inst) && (inst[1] & 3) == 3)
210		return TRUE;
211	return FALSE;
212}
213
214static INLINE void
215set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
216{
217	set_long(pc, inst);
218	inst[1] &= ~((0x1f << 7) | (0x3 << 12));
219	inst[1] |= (pred << 7) | (idx << 12);
220}
221
222static INLINE void
223set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
224{
225	set_long(pc, inst);
226	inst[1] &= ~((0x3 << 4) | (1 << 6));
227	inst[1] |= (idx << 4) | (on << 6);
228}
229
230static INLINE void
231set_long(struct nv50_pc *pc, unsigned *inst)
232{
233	if (is_long(inst))
234		return;
235
236	inst[0] |= 1;
237	set_pred(pc, 0xf, 0, inst);
238	set_pred_wr(pc, 0, 0, inst);
239}
240
241static INLINE void
242set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
243{
244	if (dst->type == P_RESULT) {
245		set_long(pc, inst);
246		inst[1] |= 0x00000008;
247	}
248
249	alloc_reg(pc, dst);
250	inst[0] |= (dst->hw << 2);
251}
252
253static INLINE void
254set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
255{
256	unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
257
258	set_long(pc, inst);
259	/*XXX: can't be predicated - bits overlap.. catch cases where both
260	 *     are required and avoid them. */
261	set_pred(pc, 0, 0, inst);
262	set_pred_wr(pc, 0, 0, inst);
263
264	inst[1] |= 0x00000002 | 0x00000001;
265	inst[0] |= (val & 0x3f) << 16;
266	inst[1] |= (val >> 6) << 2;
267}
268
269static void
270emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
271	    struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
272{
273	unsigned inst[2] = { 0, 0 };
274
275	inst[0] |= 0x80000000;
276	set_dst(pc, dst, inst);
277	alloc_reg(pc, iv);
278	inst[0] |= (iv->hw << 9);
279	alloc_reg(pc, src);
280	inst[0] |= (src->hw << 16);
281	if (noperspective)
282		inst[0] |= (1 << 25);
283
284	emit(pc, inst);
285}
286
287static void
288set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
289{
290	set_long(pc, inst);
291	if (src->type == P_IMMD) {
292		inst[1] |= (NV50_CB_PMISC << 22);
293	} else {
294		if (pc->p->type == NV50_PROG_VERTEX)
295			inst[1] |= (NV50_CB_PVP << 22);
296		else
297			inst[1] |= (NV50_CB_PFP << 22);
298	}
299}
300
301static void
302emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
303{
304	unsigned inst[2] = { 0, 0 };
305
306	inst[0] |= 0x10000000;
307
308	set_dst(pc, dst, inst);
309
310	if (dst->type != P_RESULT && src->type == P_IMMD) {
311		set_immd(pc, src, inst);
312		/*XXX: 32-bit, but steals part of "half" reg space - need to
313		 *     catch and handle this case if/when we do half-regs
314		 */
315		inst[0] |= 0x00008000;
316	} else
317	if (src->type == P_IMMD || src->type == P_CONST) {
318		set_long(pc, inst);
319		set_cseg(pc, src, inst);
320		inst[0] |= (src->hw << 9);
321		inst[1] |= 0x20000000; /* src0 const? */
322	} else {
323		if (src->type == P_ATTR) {
324			set_long(pc, inst);
325			inst[1] |= 0x00200000;
326		}
327
328		alloc_reg(pc, src);
329		inst[0] |= (src->hw << 9);
330	}
331
332	/* We really should support "half" instructions here at some point,
333	 * but I don't feel confident enough about them yet.
334	 */
335	set_long(pc, inst);
336	if (is_long(inst) && !is_immd(inst)) {
337		inst[1] |= 0x04000000; /* 32-bit */
338		inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
339	}
340
341	emit(pc, inst);
342}
343
344static boolean
345check_swap_src_0_1(struct nv50_pc *pc,
346		   struct nv50_reg **s0, struct nv50_reg **s1)
347{
348	struct nv50_reg *src0 = *s0, *src1 = *s1;
349
350	if (src0->type == P_CONST) {
351		if (src1->type != P_CONST) {
352			*s0 = src1;
353			*s1 = src0;
354			return TRUE;
355		}
356	} else
357	if (src1->type == P_ATTR) {
358		if (src0->type != P_ATTR) {
359			*s0 = src1;
360			*s1 = src0;
361			return TRUE;
362		}
363	}
364
365	return FALSE;
366}
367
368static void
369set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
370{
371	if (src->type == P_ATTR) {
372		set_long(pc, inst);
373		inst[1] |= 0x00200000;
374	} else
375	if (src->type == P_CONST || src->type == P_IMMD) {
376		struct nv50_reg *temp = temp_temp(pc);
377
378		emit_mov(pc, temp, src);
379		src = temp;
380	}
381
382	alloc_reg(pc, src);
383	inst[0] |= (src->hw << 9);
384}
385
386static void
387set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
388{
389	if (src->type == P_ATTR) {
390		struct nv50_reg *temp = temp_temp(pc);
391
392		emit_mov(pc, temp, src);
393		src = temp;
394	} else
395	if (src->type == P_CONST || src->type == P_IMMD) {
396		set_cseg(pc, src, inst);
397		inst[0] |= 0x00800000;
398	}
399
400	alloc_reg(pc, src);
401	inst[0] |= (src->hw << 16);
402}
403
404static void
405set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
406{
407	set_long(pc, inst);
408
409	if (src->type == P_ATTR) {
410		struct nv50_reg *temp = temp_temp(pc);
411
412		emit_mov(pc, temp, src);
413		src = temp;
414	} else
415	if (src->type == P_CONST || src->type == P_IMMD) {
416		set_cseg(pc, src, inst);
417		inst[0] |= 0x01000000;
418	}
419
420	alloc_reg(pc, src);
421	inst[1] |= (src->hw << 14);
422}
423
424static void
425emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
426	 struct nv50_reg *src1)
427{
428	unsigned inst[2] = { 0, 0 };
429
430	inst[0] |= 0xc0000000;
431
432	check_swap_src_0_1(pc, &src0, &src1);
433	set_dst(pc, dst, inst);
434	set_src_0(pc, src0, inst);
435	set_src_1(pc, src1, inst);
436
437	emit(pc, inst);
438}
439
440static void
441emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
442	 struct nv50_reg *src0, struct nv50_reg *src1)
443{
444	unsigned inst[2] = { 0, 0 };
445
446	inst[0] |= 0xb0000000;
447
448	check_swap_src_0_1(pc, &src0, &src1);
449	set_dst(pc, dst, inst);
450	set_src_0(pc, src0, inst);
451	if (is_long(inst))
452		set_src_2(pc, src1, inst);
453	else
454		set_src_1(pc, src1, inst);
455
456	emit(pc, inst);
457}
458
459static void
460emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
461	    struct nv50_reg *src0, struct nv50_reg *src1)
462{
463	unsigned inst[2] = { 0, 0 };
464
465	set_long(pc, inst);
466	inst[0] |= 0xb0000000;
467	inst[1] |= (sub << 29);
468
469	check_swap_src_0_1(pc, &src0, &src1);
470	set_dst(pc, dst, inst);
471	set_src_0(pc, src0, inst);
472	set_src_1(pc, src1, inst);
473
474	emit(pc, inst);
475}
476
477static void
478emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
479	 struct nv50_reg *src1)
480{
481	unsigned inst[2] = { 0, 0 };
482
483	inst[0] |= 0xb0000000;
484
485	set_long(pc, inst);
486	if (check_swap_src_0_1(pc, &src0, &src1))
487		inst[1] |= 0x04000000;
488	else
489		inst[1] |= 0x08000000;
490
491	set_dst(pc, dst, inst);
492	set_src_0(pc, src0, inst);
493	set_src_2(pc, src1, inst);
494
495	emit(pc, inst);
496}
497
498static void
499emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
500	 struct nv50_reg *src1, struct nv50_reg *src2)
501{
502	unsigned inst[2] = { 0, 0 };
503
504	inst[0] |= 0xe0000000;
505
506	check_swap_src_0_1(pc, &src0, &src1);
507	set_dst(pc, dst, inst);
508	set_src_0(pc, src0, inst);
509	set_src_1(pc, src1, inst);
510	set_src_2(pc, src2, inst);
511
512	emit(pc, inst);
513}
514
515static void
516emit_flop(struct nv50_pc *pc, unsigned sub,
517	  struct nv50_reg *dst, struct nv50_reg *src)
518{
519	unsigned inst[2] = { 0, 0 };
520
521	inst[0] |= 0x90000000;
522	if (sub) {
523		set_long(pc, inst);
524		inst[1] |= (sub << 29);
525	}
526
527	set_dst(pc, dst, inst);
528	set_src_0(pc, src, inst);
529
530	emit(pc, inst);
531}
532
533static boolean
534nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
535{
536	const struct tgsi_full_instruction *inst = &tok->FullInstruction;
537	struct nv50_reg *dst[4], *src[3][4], *temp;
538	unsigned mask;
539	int i, c;
540
541	NOUVEAU_ERR("insn %p\n", tok);
542
543	mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
544
545	for (c = 0; c < 4; c++) {
546		if (mask & (1 << c))
547			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
548		else
549			dst[c] = NULL;
550	}
551
552	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
553		for (c = 0; c < 4; c++)
554			src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
555	}
556
557	switch (inst->Instruction.Opcode) {
558	case TGSI_OPCODE_ADD:
559		for (c = 0; c < 4; c++) {
560			if (!(mask & (1 << c)))
561				continue;
562			emit_add(pc, dst[c], src[0][c], src[1][c]);
563		}
564		break;
565	case TGSI_OPCODE_COS:
566		for (c = 0; c < 4; c++) {
567			if (!(mask & (1 << c)))
568				continue;
569			emit_flop(pc, 5, dst[c], src[0][c]);
570		}
571		break;
572	case TGSI_OPCODE_DP3:
573		temp = alloc_temp(pc, NULL);
574		emit_mul(pc, temp, src[0][0], src[1][0]);
575		emit_mad(pc, temp, src[0][1], src[1][1], temp);
576		emit_mad(pc, temp, src[0][2], src[1][2], temp);
577		for (c = 0; c < 4; c++) {
578			if (!(mask & (1 << c)))
579				continue;
580			emit_mov(pc, dst[c], temp);
581		}
582		free_temp(pc, temp);
583		break;
584	case TGSI_OPCODE_DP4:
585		temp = alloc_temp(pc, NULL);
586		emit_mul(pc, temp, src[0][0], src[1][0]);
587		emit_mad(pc, temp, src[0][1], src[1][1], temp);
588		emit_mad(pc, temp, src[0][2], src[1][2], temp);
589		emit_mad(pc, temp, src[0][3], src[1][3], temp);
590		for (c = 0; c < 4; c++) {
591			if (!(mask & (1 << c)))
592				continue;
593			emit_mov(pc, dst[c], temp);
594		}
595		free_temp(pc, temp);
596		break;
597	case TGSI_OPCODE_EX2:
598		for (c = 0; c < 4; c++) {
599			if (!(mask & (1 << c)))
600				continue;
601			emit_flop(pc, 6, dst[c], src[0][c]);
602		}
603		break;
604	case TGSI_OPCODE_LG2:
605		for (c = 0; c < 4; c++) {
606			if (!(mask & (1 << c)))
607				continue;
608			emit_flop(pc, 3, dst[c], src[0][c]);
609		}
610		break;
611	case TGSI_OPCODE_MAD:
612		for (c = 0; c < 4; c++) {
613			if (!(mask & (1 << c)))
614				continue;
615			emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
616		}
617		break;
618	case TGSI_OPCODE_MAX:
619		for (c = 0; c < 4; c++) {
620			if (!(mask & (1 << c)))
621				continue;
622			emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
623		}
624		break;
625	case TGSI_OPCODE_MIN:
626		for (c = 0; c < 4; c++) {
627			if (!(mask & (1 << c)))
628				continue;
629			emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
630		}
631		break;
632	case TGSI_OPCODE_MOV:
633		for (c = 0; c < 4; c++) {
634			if (!(mask & (1 << c)))
635				continue;
636			emit_mov(pc, dst[c], src[0][c]);
637		}
638		break;
639	case TGSI_OPCODE_MUL:
640		for (c = 0; c < 4; c++) {
641			if (!(mask & (1 << c)))
642				continue;
643			emit_mul(pc, dst[c], src[0][c], src[1][c]);
644		}
645		break;
646	case TGSI_OPCODE_RCP:
647		for (c = 0; c < 4; c++) {
648			if (!(mask & (1 << c)))
649				continue;
650			emit_flop(pc, 0, dst[c], src[0][c]);
651		}
652		break;
653	case TGSI_OPCODE_RSQ:
654		for (c = 0; c < 4; c++) {
655			if (!(mask & (1 << c)))
656				continue;
657			emit_flop(pc, 2, dst[c], src[0][c]);
658		}
659		break;
660	case TGSI_OPCODE_SIN:
661		for (c = 0; c < 4; c++) {
662			if (!(mask & (1 << c)))
663				continue;
664			emit_flop(pc, 4, dst[c], src[0][c]);
665		}
666		break;
667	case TGSI_OPCODE_SUB:
668		for (c = 0; c < 4; c++) {
669			if (!(mask & (1 << c)))
670				continue;
671			emit_sub(pc, dst[c], src[0][c], src[1][c]);
672		}
673		break;
674	case TGSI_OPCODE_END:
675		break;
676	default:
677		NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
678		return FALSE;
679	}
680
681	kill_temp_temp(pc);
682	return TRUE;
683}
684
685static boolean
686nv50_program_tx_prep(struct nv50_pc *pc)
687{
688	struct tgsi_parse_context p;
689	boolean ret = FALSE;
690	unsigned i, c;
691
692	tgsi_parse_init(&p, pc->p->pipe.tokens);
693	while (!tgsi_parse_end_of_tokens(&p)) {
694		const union tgsi_full_token *tok = &p.FullToken;
695
696		tgsi_parse_token(&p);
697		switch (tok->Token.Type) {
698		case TGSI_TOKEN_TYPE_IMMEDIATE:
699		{
700			const struct tgsi_full_immediate *imm =
701				&p.FullToken.FullImmediate;
702
703			pc->immd_nr++;
704			pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr *
705							     sizeof(float));
706			pc->immd_buf[4 * (pc->immd_nr - 1) + 0] =
707				imm->u.ImmediateFloat32[0].Float;
708			pc->immd_buf[4 * (pc->immd_nr - 1) + 1] =
709				imm->u.ImmediateFloat32[1].Float;
710			pc->immd_buf[4 * (pc->immd_nr - 1) + 2] =
711				imm->u.ImmediateFloat32[2].Float;
712			pc->immd_buf[4 * (pc->immd_nr - 1) + 3] =
713				imm->u.ImmediateFloat32[3].Float;
714		}
715			break;
716		case TGSI_TOKEN_TYPE_DECLARATION:
717		{
718			const struct tgsi_full_declaration *d;
719			unsigned last;
720
721			d = &p.FullToken.FullDeclaration;
722			last = d->u.DeclarationRange.Last;
723
724			switch (d->Declaration.File) {
725			case TGSI_FILE_TEMPORARY:
726				if (pc->temp_nr < (last + 1))
727					pc->temp_nr = last + 1;
728				break;
729			case TGSI_FILE_OUTPUT:
730				if (pc->result_nr < (last + 1))
731					pc->result_nr = last + 1;
732				break;
733			case TGSI_FILE_INPUT:
734				if (pc->attr_nr < (last + 1))
735					pc->attr_nr = last + 1;
736				break;
737			case TGSI_FILE_CONSTANT:
738				if (pc->param_nr < (last + 1))
739					pc->param_nr = last + 1;
740				break;
741			default:
742				NOUVEAU_ERR("bad decl file %d\n",
743					    d->Declaration.File);
744				goto out_err;
745			}
746		}
747			break;
748		case TGSI_TOKEN_TYPE_INSTRUCTION:
749			break;
750		default:
751			break;
752		}
753	}
754
755	NOUVEAU_ERR("%d temps\n", pc->temp_nr);
756	if (pc->temp_nr) {
757		pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
758		if (!pc->temp)
759			goto out_err;
760
761		for (i = 0; i < pc->temp_nr; i++) {
762			for (c = 0; c < 4; c++) {
763				pc->temp[i*4+c].type = P_TEMP;
764				pc->temp[i*4+c].hw = -1;
765				pc->temp[i*4+c].index = i;
766			}
767		}
768	}
769
770	NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
771	if (pc->attr_nr) {
772		struct nv50_reg *iv = NULL, *tmp = NULL;
773		int aid = 0;
774
775		pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
776		if (!pc->attr)
777			goto out_err;
778
779		if (pc->p->type == NV50_PROG_FRAGMENT) {
780			iv = alloc_temp(pc, NULL);
781			aid++;
782		}
783
784		for (i = 0; i < pc->attr_nr; i++) {
785			struct nv50_reg *a = &pc->attr[i*4];
786
787			for (c = 0; c < 4; c++) {
788				if (pc->p->type == NV50_PROG_FRAGMENT) {
789					struct nv50_reg *at =
790						alloc_temp(pc, NULL);
791					pc->attr[i*4+c].type = at->type;
792					pc->attr[i*4+c].hw = at->hw;
793					pc->attr[i*4+c].index = at->index;
794				} else {
795					pc->p->cfg.vp.attr[aid/32] |=
796						(1 << (aid % 32));
797					pc->attr[i*4+c].type = P_ATTR;
798					pc->attr[i*4+c].hw = aid++;
799					pc->attr[i*4+c].index = i;
800				}
801			}
802
803			if (pc->p->type != NV50_PROG_FRAGMENT)
804				continue;
805
806			emit_interp(pc, iv, iv, iv, FALSE);
807			tmp = alloc_temp(pc, NULL);
808			{
809				unsigned inst[2] = { 0, 0 };
810				inst[0]  = 0x90000000;
811				inst[0] |= (tmp->hw << 2);
812				emit(pc, inst);
813			}
814			emit_interp(pc, &a[0], &a[0], tmp, TRUE);
815			emit_interp(pc, &a[1], &a[1], tmp, TRUE);
816			emit_interp(pc, &a[2], &a[2], tmp, TRUE);
817			emit_interp(pc, &a[3], &a[3], tmp, TRUE);
818			free_temp(pc, tmp);
819		}
820
821		if (iv)
822			free_temp(pc, iv);
823	}
824
825	NOUVEAU_ERR("%d result regs\n", pc->result_nr);
826	if (pc->result_nr) {
827		int rid = 0;
828
829		pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
830		if (!pc->result)
831			goto out_err;
832
833		for (i = 0; i < pc->result_nr; i++) {
834			for (c = 0; c < 4; c++) {
835				if (pc->p->type == NV50_PROG_FRAGMENT)
836					pc->result[i*4+c].type = P_TEMP;
837				else
838					pc->result[i*4+c].type = P_RESULT;
839				pc->result[i*4+c].hw = rid++;
840				pc->result[i*4+c].index = i;
841			}
842		}
843	}
844
845	NOUVEAU_ERR("%d param regs\n", pc->param_nr);
846	if (pc->param_nr) {
847		int rid = 0;
848
849		pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
850		if (!pc->param)
851			goto out_err;
852
853		for (i = 0; i < pc->param_nr; i++) {
854			for (c = 0; c < 4; c++) {
855				pc->param[i*4+c].type = P_CONST;
856				pc->param[i*4+c].hw = rid++;
857				pc->param[i*4+c].index = i;
858			}
859		}
860	}
861
862	if (pc->immd_nr) {
863		int rid = 0;
864
865		pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
866		if (!pc->immd)
867			goto out_err;
868
869		for (i = 0; i < pc->immd_nr; i++) {
870			for (c = 0; c < 4; c++) {
871				pc->immd[i*4+c].type = P_IMMD;
872				pc->immd[i*4+c].hw = rid++;
873				pc->immd[i*4+c].index = i;
874			}
875		}
876	}
877
878	ret = TRUE;
879out_err:
880	tgsi_parse_free(&p);
881	return ret;
882}
883
884static boolean
885nv50_program_tx(struct nv50_program *p)
886{
887	struct tgsi_parse_context parse;
888	struct nv50_pc *pc;
889	boolean ret;
890
891	pc = CALLOC_STRUCT(nv50_pc);
892	if (!pc)
893		return FALSE;
894	pc->p = p;
895	pc->p->cfg.high_temp = 4;
896
897	ret = nv50_program_tx_prep(pc);
898	if (ret == FALSE)
899		goto out_cleanup;
900
901	tgsi_parse_init(&parse, pc->p->pipe.tokens);
902	while (!tgsi_parse_end_of_tokens(&parse)) {
903		const union tgsi_full_token *tok = &parse.FullToken;
904
905		tgsi_parse_token(&parse);
906
907		switch (tok->Token.Type) {
908		case TGSI_TOKEN_TYPE_INSTRUCTION:
909			ret = nv50_program_tx_insn(pc, tok);
910			if (ret == FALSE)
911				goto out_err;
912			break;
913		default:
914			break;
915		}
916	}
917
918	p->immd_nr = pc->immd_nr * 4;
919	p->immd = pc->immd_buf;
920
921out_err:
922	tgsi_parse_free(&parse);
923
924out_cleanup:
925	return ret;
926}
927
928static void
929nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
930{
931	int i;
932
933	if (nv50_program_tx(p) == FALSE)
934		assert(0);
935	/* *not* sufficient, it's fine if last inst is long and
936	 * NOT immd - otherwise it's fucked fucked fucked */
937	p->insns[p->insns_nr - 1] |= 0x00000001;
938
939	if (p->type == NV50_PROG_VERTEX) {
940	for (i = 0; i < p->insns_nr; i++)
941		NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
942	} else {
943	for (i = 0; i < p->insns_nr; i++)
944		NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
945	}
946
947	p->translated = TRUE;
948}
949
950static void
951nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
952{
953	int i;
954
955	for (i = 0; i < p->immd_nr; i++) {
956		BEGIN_RING(tesla, 0x0f00, 2);
957		OUT_RING  ((NV50_CB_PMISC << 16) | (i << 8));
958		OUT_RING  (fui(p->immd[i]));
959	}
960}
961
962static void
963nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
964{
965	struct pipe_winsys *ws = nv50->pipe.winsys;
966	void *map;
967
968	if (!p->buffer)
969		p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
970	map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
971	memcpy(map, p->insns, p->insns_nr * 4);
972	ws->buffer_unmap(ws, p->buffer);
973}
974
975void
976nv50_vertprog_validate(struct nv50_context *nv50)
977{
978	struct nouveau_grobj *tesla = nv50->screen->tesla;
979	struct nv50_program *p = nv50->vertprog;
980	struct nouveau_stateobj *so;
981
982	if (!p->translated) {
983		nv50_program_validate(nv50, p);
984		if (!p->translated)
985			assert(0);
986	}
987
988	nv50_program_validate_data(nv50, p);
989	nv50_program_validate_code(nv50, p);
990
991	so = so_new(11, 2);
992	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
993	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
994		  NOUVEAU_BO_HIGH, 0, 0);
995	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
996		  NOUVEAU_BO_LOW, 0, 0);
997	so_method(so, tesla, 0x1650, 2);
998	so_data  (so, p->cfg.vp.attr[0]);
999	so_data  (so, p->cfg.vp.attr[1]);
1000	so_method(so, tesla, 0x16ac, 2);
1001	so_data  (so, 8);
1002	so_data  (so, p->cfg.high_temp);
1003	so_method(so, tesla, 0x140c, 1);
1004	so_data  (so, 0); /* program start offset */
1005	so_emit(nv50->screen->nvws, so);
1006	so_ref(NULL, &so);
1007}
1008
1009void
1010nv50_fragprog_validate(struct nv50_context *nv50)
1011{
1012	struct pipe_winsys *ws = nv50->pipe.winsys;
1013	struct nouveau_grobj *tesla = nv50->screen->tesla;
1014	struct nv50_program *p = nv50->fragprog;
1015	struct nouveau_stateobj *so;
1016	void *map;
1017
1018	if (!p->translated) {
1019		nv50_program_validate(nv50, p);
1020		if (!p->translated)
1021			assert(0);
1022	}
1023
1024	nv50_program_validate_data(nv50, p);
1025	nv50_program_validate_code(nv50, p);
1026
1027	so = so_new(7, 2);
1028	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1029	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1030		  NOUVEAU_BO_HIGH, 0, 0);
1031	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1032		  NOUVEAU_BO_LOW, 0, 0);
1033	so_method(so, tesla, 0x198c, 1);
1034	so_data  (so, p->cfg.high_temp);
1035	so_method(so, tesla, 0x1414, 1);
1036	so_data  (so, 0); /* program start offset */
1037	so_emit(nv50->screen->nvws, so);
1038	so_ref(NULL, &so);
1039}
1040
1041void
1042nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1043{
1044	struct pipe_winsys *ws = nv50->pipe.winsys;
1045
1046	if (p->insns_nr) {
1047		if (p->insns)
1048			FREE(p->insns);
1049		p->insns_nr = 0;
1050	}
1051
1052	if (p->buffer)
1053		pipe_buffer_reference(ws, &p->buffer, NULL);
1054
1055	p->translated = 0;
1056}
1057
1058