nv50_program.c revision fbf4027dd9b279ec159906dcad134f71e34aaec8
1#include "pipe/p_context.h"
2#include "pipe/p_defines.h"
3#include "pipe/p_state.h"
4#include "pipe/p_inlines.h"
5
6#include "pipe/p_shader_tokens.h"
7#include "tgsi/util/tgsi_parse.h"
8#include "tgsi/util/tgsi_util.h"
9
10#include "nv50_context.h"
11#include "nv50_state.h"
12
13#define NV50_SU_MAX_TEMP 64
14
15struct nv50_reg {
16	enum {
17		P_TEMP,
18		P_ATTR,
19		P_RESULT,
20		P_CONST,
21		P_IMMD
22	} type;
23	int index;
24
25	int hw;
26	int neg;
27};
28
29struct nv50_pc {
30	struct nv50_program *p;
31
32	/* hw resources */
33	struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
34
35	/* tgsi resources */
36	struct nv50_reg *temp;
37	int temp_nr;
38	struct nv50_reg *attr;
39	int attr_nr;
40	struct nv50_reg *result;
41	int result_nr;
42	struct nv50_reg *param;
43	int param_nr;
44	struct nv50_reg *immd;
45	float *immd_buf;
46	int immd_nr;
47
48	struct nv50_reg *temp_temp[8];
49	unsigned temp_temp_nr;
50};
51
52static void
53alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
54{
55	int i;
56
57	if (reg->type != P_TEMP)
58		return;
59
60	if (reg->hw >= 0) {
61		/*XXX: do this here too to catch FP temp-as-attr usage..
62		 *     not clean, but works */
63		if (pc->p->cfg.high_temp < (reg->hw + 1))
64			pc->p->cfg.high_temp = reg->hw + 1;
65		return;
66	}
67
68	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
69		if (!(pc->r_temp[i])) {
70			pc->r_temp[i] = reg;
71			reg->hw = i;
72			if (pc->p->cfg.high_temp < (i + 1))
73				pc->p->cfg.high_temp = i + 1;
74			return;
75		}
76	}
77
78	assert(0);
79}
80
81static struct nv50_reg *
82alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
83{
84	struct nv50_reg *r;
85	int i;
86
87	if (dst && dst->type == P_TEMP && dst->hw == -1)
88		return dst;
89
90	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
91		if (!pc->r_temp[i]) {
92			r = CALLOC_STRUCT(nv50_reg);
93			r->type = P_TEMP;
94			r->index = -1;
95			r->hw = i;
96			pc->r_temp[i] = r;
97			return r;
98		}
99	}
100
101	assert(0);
102	return NULL;
103}
104
105static void
106free_temp(struct nv50_pc *pc, struct nv50_reg *r)
107{
108	if (r->index == -1) {
109		FREE(pc->r_temp[r->hw]);
110		pc->r_temp[r->hw] = NULL;
111	}
112}
113
114static struct nv50_reg *
115temp_temp(struct nv50_pc *pc)
116{
117	if (pc->temp_temp_nr >= 8)
118		assert(0);
119
120	pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
121	return pc->temp_temp[pc->temp_temp_nr++];
122}
123
124static void
125kill_temp_temp(struct nv50_pc *pc)
126{
127	int i;
128
129	for (i = 0; i < pc->temp_temp_nr; i++)
130		free_temp(pc, pc->temp_temp[i]);
131	pc->temp_temp_nr = 0;
132}
133
134static struct nv50_reg *
135tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
136{
137	switch (dst->DstRegister.File) {
138	case TGSI_FILE_TEMPORARY:
139		return &pc->temp[dst->DstRegister.Index * 4 + c];
140	case TGSI_FILE_OUTPUT:
141		return &pc->result[dst->DstRegister.Index * 4 + c];
142	case TGSI_FILE_NULL:
143		return NULL;
144	default:
145		break;
146	}
147
148	return NULL;
149}
150
151static struct nv50_reg *
152tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src)
153{
154	/* Handle swizzling */
155	switch (c) {
156	case 0: c = src->SrcRegister.SwizzleX; break;
157	case 1: c = src->SrcRegister.SwizzleY; break;
158	case 2: c = src->SrcRegister.SwizzleZ; break;
159	case 3: c = src->SrcRegister.SwizzleW; break;
160	default:
161		assert(0);
162	}
163
164	switch (src->SrcRegister.File) {
165	case TGSI_FILE_INPUT:
166		return &pc->attr[src->SrcRegister.Index * 4 + c];
167	case TGSI_FILE_TEMPORARY:
168		return &pc->temp[src->SrcRegister.Index * 4 + c];
169	case TGSI_FILE_CONSTANT:
170		return &pc->param[src->SrcRegister.Index * 4 + c];
171	case TGSI_FILE_IMMEDIATE:
172		return &pc->immd[src->SrcRegister.Index * 4 + c];
173	default:
174		break;
175	}
176
177	return NULL;
178}
179
180static void
181emit(struct nv50_pc *pc, unsigned *inst)
182{
183	struct nv50_program *p = pc->p;
184
185       if (inst[0] & 1) {
186               p->insns_nr += 2;
187               p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
188               memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
189       } else {
190               p->insns_nr += 1;
191               p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
192               memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
193       }
194}
195
196static INLINE void set_long(struct nv50_pc *, unsigned *);
197
198static boolean
199is_long(unsigned *inst)
200{
201	if (inst[0] & 1)
202		return TRUE;
203	return FALSE;
204}
205
206static boolean
207is_immd(unsigned *inst)
208{
209	if (is_long(inst) && (inst[1] & 3) == 3)
210		return TRUE;
211	return FALSE;
212}
213
214static INLINE void
215set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
216{
217	set_long(pc, inst);
218	inst[1] &= ~((0x1f << 7) | (0x3 << 12));
219	inst[1] |= (pred << 7) | (idx << 12);
220}
221
222static INLINE void
223set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
224{
225	set_long(pc, inst);
226	inst[1] &= ~((0x3 << 4) | (1 << 6));
227	inst[1] |= (idx << 4) | (on << 6);
228}
229
230static INLINE void
231set_long(struct nv50_pc *pc, unsigned *inst)
232{
233	if (is_long(inst))
234		return;
235
236	inst[0] |= 1;
237	set_pred(pc, 0xf, 0, inst);
238	set_pred_wr(pc, 0, 0, inst);
239}
240
241static INLINE void
242set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
243{
244	if (dst->type == P_RESULT) {
245		set_long(pc, inst);
246		inst[1] |= 0x00000008;
247	}
248
249	alloc_reg(pc, dst);
250	inst[0] |= (dst->hw << 2);
251}
252
253static INLINE void
254set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
255{
256	unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
257
258	set_long(pc, inst);
259	/*XXX: can't be predicated - bits overlap.. catch cases where both
260	 *     are required and avoid them. */
261	set_pred(pc, 0, 0, inst);
262	set_pred_wr(pc, 0, 0, inst);
263
264	inst[1] |= 0x00000002 | 0x00000001;
265	inst[0] |= (val & 0x3f) << 16;
266	inst[1] |= (val >> 6) << 2;
267}
268
269static void
270emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
271	    struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
272{
273	unsigned inst[2] = { 0, 0 };
274
275	inst[0] |= 0x80000000;
276	set_dst(pc, dst, inst);
277	alloc_reg(pc, iv);
278	inst[0] |= (iv->hw << 9);
279	alloc_reg(pc, src);
280	inst[0] |= (src->hw << 16);
281	if (noperspective)
282		inst[0] |= (1 << 25);
283
284	emit(pc, inst);
285}
286
287static void
288set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
289{
290	set_long(pc, inst);
291	if (src->type == P_IMMD) {
292		inst[1] |= (NV50_CB_PMISC << 22);
293	} else {
294		if (pc->p->type == NV50_PROG_VERTEX)
295			inst[1] |= (NV50_CB_PVP << 22);
296		else
297			inst[1] |= (NV50_CB_PFP << 22);
298	}
299}
300
301static void
302emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
303{
304	unsigned inst[2] = { 0, 0 };
305
306	inst[0] |= 0x10000000;
307
308	set_dst(pc, dst, inst);
309
310	if (dst->type != P_RESULT && src->type == P_IMMD) {
311		set_immd(pc, src, inst);
312		/*XXX: 32-bit, but steals part of "half" reg space - need to
313		 *     catch and handle this case if/when we do half-regs
314		 */
315		inst[0] |= 0x00008000;
316	} else
317	if (src->type == P_IMMD || src->type == P_CONST) {
318		set_long(pc, inst);
319		set_cseg(pc, src, inst);
320		inst[0] |= (src->hw << 9);
321		inst[1] |= 0x20000000; /* src0 const? */
322	} else {
323		if (src->type == P_ATTR) {
324			set_long(pc, inst);
325			inst[1] |= 0x00200000;
326		}
327
328		alloc_reg(pc, src);
329		inst[0] |= (src->hw << 9);
330	}
331
332	/* We really should support "half" instructions here at some point,
333	 * but I don't feel confident enough about them yet.
334	 */
335	set_long(pc, inst);
336	if (is_long(inst) && !is_immd(inst)) {
337		inst[1] |= 0x04000000; /* 32-bit */
338		inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
339	}
340
341	emit(pc, inst);
342}
343
344static boolean
345check_swap_src_0_1(struct nv50_pc *pc,
346		   struct nv50_reg **s0, struct nv50_reg **s1)
347{
348	struct nv50_reg *src0 = *s0, *src1 = *s1;
349
350	if (src0->type == P_CONST) {
351		if (src1->type != P_CONST) {
352			*s0 = src1;
353			*s1 = src0;
354			return TRUE;
355		}
356	} else
357	if (src1->type == P_ATTR) {
358		if (src0->type != P_ATTR) {
359			*s0 = src1;
360			*s1 = src0;
361			return TRUE;
362		}
363	}
364
365	return FALSE;
366}
367
368static void
369set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
370{
371	if (src->type == P_ATTR) {
372		set_long(pc, inst);
373		inst[1] |= 0x00200000;
374	} else
375	if (src->type == P_CONST || src->type == P_IMMD) {
376		struct nv50_reg *temp = temp_temp(pc);
377
378		emit_mov(pc, temp, src);
379		src = temp;
380	}
381
382	alloc_reg(pc, src);
383	inst[0] |= (src->hw << 9);
384}
385
386static void
387set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
388{
389	if (src->type == P_ATTR) {
390		struct nv50_reg *temp = temp_temp(pc);
391
392		emit_mov(pc, temp, src);
393		src = temp;
394	} else
395	if (src->type == P_CONST || src->type == P_IMMD) {
396		set_cseg(pc, src, inst);
397		inst[0] |= 0x00800000;
398	}
399
400	alloc_reg(pc, src);
401	inst[0] |= (src->hw << 16);
402}
403
404static void
405set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
406{
407	set_long(pc, inst);
408
409	if (src->type == P_ATTR) {
410		struct nv50_reg *temp = temp_temp(pc);
411
412		emit_mov(pc, temp, src);
413		src = temp;
414	} else
415	if (src->type == P_CONST || src->type == P_IMMD) {
416		set_cseg(pc, src, inst);
417		inst[0] |= 0x01000000;
418	}
419
420	alloc_reg(pc, src);
421	inst[1] |= (src->hw << 14);
422}
423
424static void
425emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
426	 struct nv50_reg *src1)
427{
428	unsigned inst[2] = { 0, 0 };
429
430	inst[0] |= 0xc0000000;
431
432	check_swap_src_0_1(pc, &src0, &src1);
433	set_dst(pc, dst, inst);
434	set_src_0(pc, src0, inst);
435	set_src_1(pc, src1, inst);
436
437	emit(pc, inst);
438}
439
440static void
441emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
442	 struct nv50_reg *src0, struct nv50_reg *src1)
443{
444	unsigned inst[2] = { 0, 0 };
445
446	inst[0] |= 0xb0000000;
447
448	check_swap_src_0_1(pc, &src0, &src1);
449	set_dst(pc, dst, inst);
450	set_src_0(pc, src0, inst);
451	if (is_long(inst))
452		set_src_2(pc, src1, inst);
453	else
454		set_src_1(pc, src1, inst);
455
456	emit(pc, inst);
457}
458
459static void
460emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
461	    struct nv50_reg *src0, struct nv50_reg *src1)
462{
463	unsigned inst[2] = { 0, 0 };
464
465	set_long(pc, inst);
466	inst[0] |= 0xb0000000;
467	inst[1] |= (sub << 29);
468
469	check_swap_src_0_1(pc, &src0, &src1);
470	set_dst(pc, dst, inst);
471	set_src_0(pc, src0, inst);
472	set_src_1(pc, src1, inst);
473
474	emit(pc, inst);
475}
476
477static void
478emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
479	 struct nv50_reg *src1)
480{
481	unsigned inst[2] = { 0, 0 };
482
483	inst[0] |= 0xb0000000;
484
485	set_long(pc, inst);
486	if (check_swap_src_0_1(pc, &src0, &src1))
487		inst[1] |= 0x04000000;
488	else
489		inst[1] |= 0x08000000;
490
491	set_dst(pc, dst, inst);
492	set_src_0(pc, src0, inst);
493	set_src_2(pc, src1, inst);
494
495	emit(pc, inst);
496}
497
498static void
499emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
500	 struct nv50_reg *src1, struct nv50_reg *src2)
501{
502	unsigned inst[2] = { 0, 0 };
503
504	inst[0] |= 0xe0000000;
505
506	check_swap_src_0_1(pc, &src0, &src1);
507	set_dst(pc, dst, inst);
508	set_src_0(pc, src0, inst);
509	set_src_1(pc, src1, inst);
510	set_src_2(pc, src2, inst);
511
512	emit(pc, inst);
513}
514
515static void
516emit_flop(struct nv50_pc *pc, unsigned sub,
517	  struct nv50_reg *dst, struct nv50_reg *src)
518{
519	unsigned inst[2] = { 0, 0 };
520
521	inst[0] |= 0x90000000;
522	if (sub) {
523		set_long(pc, inst);
524		inst[1] |= (sub << 29);
525	}
526
527	set_dst(pc, dst, inst);
528	set_src_0(pc, src, inst);
529
530	emit(pc, inst);
531}
532
533static boolean
534nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
535{
536	const struct tgsi_full_instruction *inst = &tok->FullInstruction;
537	struct nv50_reg *dst[4], *src[3][4], *temp;
538	unsigned mask;
539	int i, c;
540
541	NOUVEAU_ERR("insn %p\n", tok);
542
543	mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
544
545	for (c = 0; c < 4; c++) {
546		if (mask & (1 << c))
547			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
548		else
549			dst[c] = NULL;
550	}
551
552	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
553		for (c = 0; c < 4; c++)
554			src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
555	}
556
557	switch (inst->Instruction.Opcode) {
558	case TGSI_OPCODE_ADD:
559		for (c = 0; c < 4; c++) {
560			if (!(mask & (1 << c)))
561				continue;
562			emit_add(pc, dst[c], src[0][c], src[1][c]);
563		}
564		break;
565	case TGSI_OPCODE_COS:
566		for (c = 0; c < 4; c++) {
567			if (!(mask & (1 << c)))
568				continue;
569			emit_flop(pc, 5, dst[c], src[0][c]);
570		}
571		break;
572	case TGSI_OPCODE_DP3:
573		temp = alloc_temp(pc, NULL);
574		emit_mul(pc, temp, src[0][0], src[1][0]);
575		emit_mad(pc, temp, src[0][1], src[1][1], temp);
576		emit_mad(pc, temp, src[0][2], src[1][2], temp);
577		for (c = 0; c < 4; c++) {
578			if (!(mask & (1 << c)))
579				continue;
580			emit_mov(pc, dst[c], temp);
581		}
582		free_temp(pc, temp);
583		break;
584	case TGSI_OPCODE_DP4:
585		temp = alloc_temp(pc, NULL);
586		emit_mul(pc, temp, src[0][0], src[1][0]);
587		emit_mad(pc, temp, src[0][1], src[1][1], temp);
588		emit_mad(pc, temp, src[0][2], src[1][2], temp);
589		emit_mad(pc, temp, src[0][3], src[1][3], temp);
590		for (c = 0; c < 4; c++) {
591			if (!(mask & (1 << c)))
592				continue;
593			emit_mov(pc, dst[c], temp);
594		}
595		free_temp(pc, temp);
596		break;
597	case TGSI_OPCODE_EX2:
598		temp = alloc_temp(pc, NULL);
599		for (c = 0; c < 4; c++) {
600			if (!(mask & (1 << c)))
601				continue;
602			{
603				unsigned inst[2] = { 0, 0 };
604				inst[0] |= 0xb0000000;
605				set_dst(pc, temp, inst);
606				set_src_0(pc, src[0][c], inst);
607				set_long(pc, inst);
608				inst[1] |= (6 << 29) | 0x00004000;
609				emit(pc, inst);
610			}
611			emit_flop(pc, 6, dst[c], temp);
612		}
613		free_temp(pc, temp);
614		break;
615	case TGSI_OPCODE_LG2:
616		for (c = 0; c < 4; c++) {
617			if (!(mask & (1 << c)))
618				continue;
619			emit_flop(pc, 3, dst[c], src[0][c]);
620		}
621		break;
622	case TGSI_OPCODE_MAD:
623		for (c = 0; c < 4; c++) {
624			if (!(mask & (1 << c)))
625				continue;
626			emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
627		}
628		break;
629	case TGSI_OPCODE_MAX:
630		for (c = 0; c < 4; c++) {
631			if (!(mask & (1 << c)))
632				continue;
633			emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
634		}
635		break;
636	case TGSI_OPCODE_MIN:
637		for (c = 0; c < 4; c++) {
638			if (!(mask & (1 << c)))
639				continue;
640			emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
641		}
642		break;
643	case TGSI_OPCODE_MOV:
644		for (c = 0; c < 4; c++) {
645			if (!(mask & (1 << c)))
646				continue;
647			emit_mov(pc, dst[c], src[0][c]);
648		}
649		break;
650	case TGSI_OPCODE_MUL:
651		for (c = 0; c < 4; c++) {
652			if (!(mask & (1 << c)))
653				continue;
654			emit_mul(pc, dst[c], src[0][c], src[1][c]);
655		}
656		break;
657	case TGSI_OPCODE_RCP:
658		for (c = 0; c < 4; c++) {
659			if (!(mask & (1 << c)))
660				continue;
661			emit_flop(pc, 0, dst[c], src[0][c]);
662		}
663		break;
664	case TGSI_OPCODE_RSQ:
665		for (c = 0; c < 4; c++) {
666			if (!(mask & (1 << c)))
667				continue;
668			emit_flop(pc, 2, dst[c], src[0][c]);
669		}
670		break;
671	case TGSI_OPCODE_SIN:
672		for (c = 0; c < 4; c++) {
673			if (!(mask & (1 << c)))
674				continue;
675			emit_flop(pc, 4, dst[c], src[0][c]);
676		}
677		break;
678	case TGSI_OPCODE_SUB:
679		for (c = 0; c < 4; c++) {
680			if (!(mask & (1 << c)))
681				continue;
682			emit_sub(pc, dst[c], src[0][c], src[1][c]);
683		}
684		break;
685	case TGSI_OPCODE_END:
686		break;
687	default:
688		NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
689		return FALSE;
690	}
691
692	kill_temp_temp(pc);
693	return TRUE;
694}
695
696static boolean
697nv50_program_tx_prep(struct nv50_pc *pc)
698{
699	struct tgsi_parse_context p;
700	boolean ret = FALSE;
701	unsigned i, c;
702
703	tgsi_parse_init(&p, pc->p->pipe.tokens);
704	while (!tgsi_parse_end_of_tokens(&p)) {
705		const union tgsi_full_token *tok = &p.FullToken;
706
707		tgsi_parse_token(&p);
708		switch (tok->Token.Type) {
709		case TGSI_TOKEN_TYPE_IMMEDIATE:
710		{
711			const struct tgsi_full_immediate *imm =
712				&p.FullToken.FullImmediate;
713
714			pc->immd_nr++;
715			pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr *
716							     sizeof(float));
717			pc->immd_buf[4 * (pc->immd_nr - 1) + 0] =
718				imm->u.ImmediateFloat32[0].Float;
719			pc->immd_buf[4 * (pc->immd_nr - 1) + 1] =
720				imm->u.ImmediateFloat32[1].Float;
721			pc->immd_buf[4 * (pc->immd_nr - 1) + 2] =
722				imm->u.ImmediateFloat32[2].Float;
723			pc->immd_buf[4 * (pc->immd_nr - 1) + 3] =
724				imm->u.ImmediateFloat32[3].Float;
725		}
726			break;
727		case TGSI_TOKEN_TYPE_DECLARATION:
728		{
729			const struct tgsi_full_declaration *d;
730			unsigned last;
731
732			d = &p.FullToken.FullDeclaration;
733			last = d->u.DeclarationRange.Last;
734
735			switch (d->Declaration.File) {
736			case TGSI_FILE_TEMPORARY:
737				if (pc->temp_nr < (last + 1))
738					pc->temp_nr = last + 1;
739				break;
740			case TGSI_FILE_OUTPUT:
741				if (pc->result_nr < (last + 1))
742					pc->result_nr = last + 1;
743				break;
744			case TGSI_FILE_INPUT:
745				if (pc->attr_nr < (last + 1))
746					pc->attr_nr = last + 1;
747				break;
748			case TGSI_FILE_CONSTANT:
749				if (pc->param_nr < (last + 1))
750					pc->param_nr = last + 1;
751				break;
752			default:
753				NOUVEAU_ERR("bad decl file %d\n",
754					    d->Declaration.File);
755				goto out_err;
756			}
757		}
758			break;
759		case TGSI_TOKEN_TYPE_INSTRUCTION:
760			break;
761		default:
762			break;
763		}
764	}
765
766	NOUVEAU_ERR("%d temps\n", pc->temp_nr);
767	if (pc->temp_nr) {
768		pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
769		if (!pc->temp)
770			goto out_err;
771
772		for (i = 0; i < pc->temp_nr; i++) {
773			for (c = 0; c < 4; c++) {
774				pc->temp[i*4+c].type = P_TEMP;
775				pc->temp[i*4+c].hw = -1;
776				pc->temp[i*4+c].index = i;
777			}
778		}
779	}
780
781	NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
782	if (pc->attr_nr) {
783		struct nv50_reg *iv = NULL, *tmp = NULL;
784		int aid = 0;
785
786		pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
787		if (!pc->attr)
788			goto out_err;
789
790		if (pc->p->type == NV50_PROG_FRAGMENT) {
791			iv = alloc_temp(pc, NULL);
792			aid++;
793		}
794
795		for (i = 0; i < pc->attr_nr; i++) {
796			struct nv50_reg *a = &pc->attr[i*4];
797
798			for (c = 0; c < 4; c++) {
799				if (pc->p->type == NV50_PROG_FRAGMENT) {
800					struct nv50_reg *at =
801						alloc_temp(pc, NULL);
802					pc->attr[i*4+c].type = at->type;
803					pc->attr[i*4+c].hw = at->hw;
804					pc->attr[i*4+c].index = at->index;
805				} else {
806					pc->p->cfg.vp.attr[aid/32] |=
807						(1 << (aid % 32));
808					pc->attr[i*4+c].type = P_ATTR;
809					pc->attr[i*4+c].hw = aid++;
810					pc->attr[i*4+c].index = i;
811				}
812			}
813
814			if (pc->p->type != NV50_PROG_FRAGMENT)
815				continue;
816
817			emit_interp(pc, iv, iv, iv, FALSE);
818			tmp = alloc_temp(pc, NULL);
819			{
820				unsigned inst[2] = { 0, 0 };
821				inst[0]  = 0x90000000;
822				inst[0] |= (tmp->hw << 2);
823				emit(pc, inst);
824			}
825			emit_interp(pc, &a[0], &a[0], tmp, TRUE);
826			emit_interp(pc, &a[1], &a[1], tmp, TRUE);
827			emit_interp(pc, &a[2], &a[2], tmp, TRUE);
828			emit_interp(pc, &a[3], &a[3], tmp, TRUE);
829			free_temp(pc, tmp);
830		}
831
832		if (iv)
833			free_temp(pc, iv);
834	}
835
836	NOUVEAU_ERR("%d result regs\n", pc->result_nr);
837	if (pc->result_nr) {
838		int rid = 0;
839
840		pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
841		if (!pc->result)
842			goto out_err;
843
844		for (i = 0; i < pc->result_nr; i++) {
845			for (c = 0; c < 4; c++) {
846				if (pc->p->type == NV50_PROG_FRAGMENT)
847					pc->result[i*4+c].type = P_TEMP;
848				else
849					pc->result[i*4+c].type = P_RESULT;
850				pc->result[i*4+c].hw = rid++;
851				pc->result[i*4+c].index = i;
852			}
853		}
854	}
855
856	NOUVEAU_ERR("%d param regs\n", pc->param_nr);
857	if (pc->param_nr) {
858		int rid = 0;
859
860		pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
861		if (!pc->param)
862			goto out_err;
863
864		for (i = 0; i < pc->param_nr; i++) {
865			for (c = 0; c < 4; c++) {
866				pc->param[i*4+c].type = P_CONST;
867				pc->param[i*4+c].hw = rid++;
868				pc->param[i*4+c].index = i;
869			}
870		}
871	}
872
873	if (pc->immd_nr) {
874		int rid = 0;
875
876		pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
877		if (!pc->immd)
878			goto out_err;
879
880		for (i = 0; i < pc->immd_nr; i++) {
881			for (c = 0; c < 4; c++) {
882				pc->immd[i*4+c].type = P_IMMD;
883				pc->immd[i*4+c].hw = rid++;
884				pc->immd[i*4+c].index = i;
885			}
886		}
887	}
888
889	ret = TRUE;
890out_err:
891	tgsi_parse_free(&p);
892	return ret;
893}
894
895static boolean
896nv50_program_tx(struct nv50_program *p)
897{
898	struct tgsi_parse_context parse;
899	struct nv50_pc *pc;
900	boolean ret;
901
902	pc = CALLOC_STRUCT(nv50_pc);
903	if (!pc)
904		return FALSE;
905	pc->p = p;
906	pc->p->cfg.high_temp = 4;
907
908	ret = nv50_program_tx_prep(pc);
909	if (ret == FALSE)
910		goto out_cleanup;
911
912	tgsi_parse_init(&parse, pc->p->pipe.tokens);
913	while (!tgsi_parse_end_of_tokens(&parse)) {
914		const union tgsi_full_token *tok = &parse.FullToken;
915
916		tgsi_parse_token(&parse);
917
918		switch (tok->Token.Type) {
919		case TGSI_TOKEN_TYPE_INSTRUCTION:
920			ret = nv50_program_tx_insn(pc, tok);
921			if (ret == FALSE)
922				goto out_err;
923			break;
924		default:
925			break;
926		}
927	}
928
929	p->immd_nr = pc->immd_nr * 4;
930	p->immd = pc->immd_buf;
931
932out_err:
933	tgsi_parse_free(&parse);
934
935out_cleanup:
936	return ret;
937}
938
939static void
940nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
941{
942	int i;
943
944	if (nv50_program_tx(p) == FALSE)
945		assert(0);
946	/* *not* sufficient, it's fine if last inst is long and
947	 * NOT immd - otherwise it's fucked fucked fucked */
948	p->insns[p->insns_nr - 1] |= 0x00000001;
949
950	if (p->type == NV50_PROG_VERTEX) {
951	for (i = 0; i < p->insns_nr; i++)
952		NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
953	} else {
954	for (i = 0; i < p->insns_nr; i++)
955		NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
956	}
957
958	p->translated = TRUE;
959}
960
961static void
962nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
963{
964	int i;
965
966	for (i = 0; i < p->immd_nr; i++) {
967		BEGIN_RING(tesla, 0x0f00, 2);
968		OUT_RING  ((NV50_CB_PMISC << 16) | (i << 8));
969		OUT_RING  (fui(p->immd[i]));
970	}
971}
972
973static void
974nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
975{
976	struct pipe_winsys *ws = nv50->pipe.winsys;
977	void *map;
978
979	if (!p->buffer)
980		p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
981	map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
982	memcpy(map, p->insns, p->insns_nr * 4);
983	ws->buffer_unmap(ws, p->buffer);
984}
985
986void
987nv50_vertprog_validate(struct nv50_context *nv50)
988{
989	struct nouveau_grobj *tesla = nv50->screen->tesla;
990	struct nv50_program *p = nv50->vertprog;
991	struct nouveau_stateobj *so;
992
993	if (!p->translated) {
994		nv50_program_validate(nv50, p);
995		if (!p->translated)
996			assert(0);
997	}
998
999	nv50_program_validate_data(nv50, p);
1000	nv50_program_validate_code(nv50, p);
1001
1002	so = so_new(11, 2);
1003	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1004	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1005		  NOUVEAU_BO_HIGH, 0, 0);
1006	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1007		  NOUVEAU_BO_LOW, 0, 0);
1008	so_method(so, tesla, 0x1650, 2);
1009	so_data  (so, p->cfg.vp.attr[0]);
1010	so_data  (so, p->cfg.vp.attr[1]);
1011	so_method(so, tesla, 0x16ac, 2);
1012	so_data  (so, 8);
1013	so_data  (so, p->cfg.high_temp);
1014	so_method(so, tesla, 0x140c, 1);
1015	so_data  (so, 0); /* program start offset */
1016	so_emit(nv50->screen->nvws, so);
1017	so_ref(NULL, &so);
1018}
1019
1020void
1021nv50_fragprog_validate(struct nv50_context *nv50)
1022{
1023	struct pipe_winsys *ws = nv50->pipe.winsys;
1024	struct nouveau_grobj *tesla = nv50->screen->tesla;
1025	struct nv50_program *p = nv50->fragprog;
1026	struct nouveau_stateobj *so;
1027	void *map;
1028
1029	if (!p->translated) {
1030		nv50_program_validate(nv50, p);
1031		if (!p->translated)
1032			assert(0);
1033	}
1034
1035	nv50_program_validate_data(nv50, p);
1036	nv50_program_validate_code(nv50, p);
1037
1038	so = so_new(7, 2);
1039	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1040	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1041		  NOUVEAU_BO_HIGH, 0, 0);
1042	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1043		  NOUVEAU_BO_LOW, 0, 0);
1044	so_method(so, tesla, 0x198c, 1);
1045	so_data  (so, p->cfg.high_temp);
1046	so_method(so, tesla, 0x1414, 1);
1047	so_data  (so, 0); /* program start offset */
1048	so_emit(nv50->screen->nvws, so);
1049	so_ref(NULL, &so);
1050}
1051
1052void
1053nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1054{
1055	struct pipe_winsys *ws = nv50->pipe.winsys;
1056
1057	if (p->insns_nr) {
1058		if (p->insns)
1059			FREE(p->insns);
1060		p->insns_nr = 0;
1061	}
1062
1063	if (p->buffer)
1064		pipe_buffer_reference(ws, &p->buffer, NULL);
1065
1066	p->translated = 0;
1067}
1068
1069