nv50_program.c revision 52a69196c1680ff16d1ad1fc88e5869bc6055d00
1#include "pipe/p_context.h"
2#include "pipe/p_defines.h"
3#include "pipe/p_state.h"
4#include "pipe/p_inlines.h"
5
6#include "pipe/p_shader_tokens.h"
7#include "tgsi/util/tgsi_parse.h"
8#include "tgsi/util/tgsi_util.h"
9
10#include "nv50_context.h"
11#include "nv50_state.h"
12
13#define NV50_SU_MAX_TEMP 64
14
15struct nv50_reg {
16	enum {
17		P_TEMP,
18		P_ATTR,
19		P_RESULT,
20		P_CONST,
21		P_IMMD
22	} type;
23	int index;
24
25	int hw;
26	int neg;
27};
28
29struct nv50_pc {
30	struct nv50_program *p;
31
32	/* hw resources */
33	struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
34
35	/* tgsi resources */
36	struct nv50_reg *temp;
37	int temp_nr;
38	struct nv50_reg *attr;
39	int attr_nr;
40	struct nv50_reg *result;
41	int result_nr;
42	struct nv50_reg *param;
43	int param_nr;
44	struct nv50_reg *immd;
45	float *immd_buf;
46	int immd_nr;
47
48	struct nv50_reg *temp_temp[8];
49	unsigned temp_temp_nr;
50};
51
52static void
53alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
54{
55	int i;
56
57	if (reg->type != P_TEMP)
58		return;
59
60	if (reg->hw >= 0) {
61		/*XXX: do this here too to catch FP temp-as-attr usage..
62		 *     not clean, but works */
63		if (pc->p->cfg.high_temp < (reg->hw + 1))
64			pc->p->cfg.high_temp = reg->hw + 1;
65		return;
66	}
67
68	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
69		if (!(pc->r_temp[i])) {
70			pc->r_temp[i] = reg;
71			reg->hw = i;
72			if (pc->p->cfg.high_temp < (i + 1))
73				pc->p->cfg.high_temp = i + 1;
74			return;
75		}
76	}
77
78	assert(0);
79}
80
81static struct nv50_reg *
82alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
83{
84	struct nv50_reg *r;
85	int i;
86
87	if (dst && dst->type == P_TEMP && dst->hw == -1)
88		return dst;
89
90	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
91		if (!pc->r_temp[i]) {
92			r = CALLOC_STRUCT(nv50_reg);
93			r->type = P_TEMP;
94			r->index = -1;
95			r->hw = i;
96			pc->r_temp[i] = r;
97			return r;
98		}
99	}
100
101	assert(0);
102	return NULL;
103}
104
105static void
106free_temp(struct nv50_pc *pc, struct nv50_reg *r)
107{
108	if (r->index == -1) {
109		FREE(pc->r_temp[r->hw]);
110		pc->r_temp[r->hw] = NULL;
111	}
112}
113
114static struct nv50_reg *
115temp_temp(struct nv50_pc *pc)
116{
117	if (pc->temp_temp_nr >= 8)
118		assert(0);
119
120	pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
121	return pc->temp_temp[pc->temp_temp_nr++];
122}
123
124static void
125kill_temp_temp(struct nv50_pc *pc)
126{
127	int i;
128
129	for (i = 0; i < pc->temp_temp_nr; i++)
130		free_temp(pc, pc->temp_temp[i]);
131	pc->temp_temp_nr = 0;
132}
133
134static struct nv50_reg *
135tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
136{
137	switch (dst->DstRegister.File) {
138	case TGSI_FILE_TEMPORARY:
139		return &pc->temp[dst->DstRegister.Index * 4 + c];
140	case TGSI_FILE_OUTPUT:
141		return &pc->result[dst->DstRegister.Index * 4 + c];
142	case TGSI_FILE_NULL:
143		return NULL;
144	default:
145		break;
146	}
147
148	return NULL;
149}
150
151static struct nv50_reg *
152tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src)
153{
154	/* Handle swizzling */
155	switch (c) {
156	case 0: c = src->SrcRegister.SwizzleX; break;
157	case 1: c = src->SrcRegister.SwizzleY; break;
158	case 2: c = src->SrcRegister.SwizzleZ; break;
159	case 3: c = src->SrcRegister.SwizzleW; break;
160	default:
161		assert(0);
162	}
163
164	switch (src->SrcRegister.File) {
165	case TGSI_FILE_INPUT:
166		return &pc->attr[src->SrcRegister.Index * 4 + c];
167	case TGSI_FILE_TEMPORARY:
168		return &pc->temp[src->SrcRegister.Index * 4 + c];
169	case TGSI_FILE_CONSTANT:
170		return &pc->param[src->SrcRegister.Index * 4 + c];
171	case TGSI_FILE_IMMEDIATE:
172		return &pc->immd[src->SrcRegister.Index * 4 + c];
173	default:
174		break;
175	}
176
177	return NULL;
178}
179
180static void
181emit(struct nv50_pc *pc, unsigned *inst)
182{
183	struct nv50_program *p = pc->p;
184
185       if (inst[0] & 1) {
186               p->insns_nr += 2;
187               p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
188               memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
189       } else {
190               p->insns_nr += 1;
191               p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
192               memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
193       }
194}
195
196static INLINE void set_long(struct nv50_pc *, unsigned *);
197
198static boolean
199is_long(unsigned *inst)
200{
201	if (inst[0] & 1)
202		return TRUE;
203	return FALSE;
204}
205
206static boolean
207is_immd(unsigned *inst)
208{
209	if (is_long(inst) && (inst[1] & 3) == 3)
210		return TRUE;
211	return FALSE;
212}
213
214static INLINE void
215set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
216{
217	set_long(pc, inst);
218	inst[1] &= ~((0x1f << 7) | (0x3 << 12));
219	inst[1] |= (pred << 7) | (idx << 12);
220}
221
222static INLINE void
223set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
224{
225	set_long(pc, inst);
226	inst[1] &= ~((0x3 << 4) | (1 << 6));
227	inst[1] |= (idx << 4) | (on << 6);
228}
229
230static INLINE void
231set_long(struct nv50_pc *pc, unsigned *inst)
232{
233	if (is_long(inst))
234		return;
235
236	inst[0] |= 1;
237	set_pred(pc, 0xf, 0, inst);
238	set_pred_wr(pc, 0, 0, inst);
239}
240
241static INLINE void
242set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
243{
244	if (dst->type == P_RESULT) {
245		set_long(pc, inst);
246		inst[1] |= 0x00000008;
247	}
248
249	alloc_reg(pc, dst);
250	inst[0] |= (dst->hw << 2);
251}
252
253static INLINE void
254set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
255{
256	unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
257
258	set_long(pc, inst);
259	/*XXX: can't be predicated - bits overlap.. catch cases where both
260	 *     are required and avoid them. */
261	set_pred(pc, 0, 0, inst);
262	set_pred_wr(pc, 0, 0, inst);
263
264	inst[1] |= 0x00000002 | 0x00000001;
265	inst[0] |= (val & 0x3f) << 16;
266	inst[1] |= (val >> 6) << 2;
267}
268
269static void
270emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
271	    struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
272{
273	unsigned inst[2] = { 0, 0 };
274
275	inst[0] |= 0x80000000;
276	set_dst(pc, dst, inst);
277	alloc_reg(pc, iv);
278	inst[0] |= (iv->hw << 9);
279	alloc_reg(pc, src);
280	inst[0] |= (src->hw << 16);
281	if (noperspective)
282		inst[0] |= (1 << 25);
283
284	emit(pc, inst);
285}
286
287static void
288set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
289{
290	set_long(pc, inst);
291	if (src->type == P_IMMD) {
292		inst[1] |= (NV50_CB_PMISC << 22);
293	} else {
294		if (pc->p->type == NV50_PROG_VERTEX)
295			inst[1] |= (NV50_CB_PVP << 22);
296		else
297			inst[1] |= (NV50_CB_PFP << 22);
298	}
299}
300
301static void
302emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
303{
304	unsigned inst[2] = { 0, 0 };
305
306	inst[0] |= 0x10000000;
307
308	set_dst(pc, dst, inst);
309
310	if (dst->type != P_RESULT && src->type == P_IMMD) {
311		set_immd(pc, src, inst);
312		/*XXX: 32-bit, but steals part of "half" reg space - need to
313		 *     catch and handle this case if/when we do half-regs
314		 */
315		inst[0] |= 0x00008000;
316	} else
317	if (src->type == P_IMMD || src->type == P_CONST) {
318		set_long(pc, inst);
319		set_cseg(pc, src, inst);
320		inst[0] |= (src->hw << 9);
321		inst[1] |= 0x20000000; /* src0 const? */
322	} else {
323		if (src->type == P_ATTR) {
324			set_long(pc, inst);
325			inst[1] |= 0x00200000;
326		}
327
328		alloc_reg(pc, src);
329		inst[0] |= (src->hw << 9);
330	}
331
332	/* We really should support "half" instructions here at some point,
333	 * but I don't feel confident enough about them yet.
334	 */
335	set_long(pc, inst);
336	if (is_long(inst) && !is_immd(inst)) {
337		inst[1] |= 0x04000000; /* 32-bit */
338		inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
339	}
340
341	emit(pc, inst);
342}
343
344static boolean
345check_swap_src_0_1(struct nv50_pc *pc,
346		   struct nv50_reg **s0, struct nv50_reg **s1)
347{
348	struct nv50_reg *src0 = *s0, *src1 = *s1;
349
350	if (src0->type == P_CONST) {
351		if (src1->type != P_CONST) {
352			*s0 = src1;
353			*s1 = src0;
354			return TRUE;
355		}
356	} else
357	if (src1->type == P_ATTR) {
358		if (src0->type != P_ATTR) {
359			*s0 = src1;
360			*s1 = src0;
361			return TRUE;
362		}
363	}
364
365	return FALSE;
366}
367
368static void
369set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
370{
371	if (src->type == P_ATTR) {
372		set_long(pc, inst);
373		inst[1] |= 0x00200000;
374	} else
375	if (src->type == P_CONST || src->type == P_IMMD) {
376		struct nv50_reg *temp = temp_temp(pc);
377
378		emit_mov(pc, temp, src);
379		src = temp;
380	}
381
382	alloc_reg(pc, src);
383	inst[0] |= (src->hw << 9);
384}
385
386static void
387set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
388{
389	if (src->type == P_ATTR) {
390		struct nv50_reg *temp = temp_temp(pc);
391
392		emit_mov(pc, temp, src);
393		src = temp;
394	} else
395	if (src->type == P_CONST || src->type == P_IMMD) {
396		set_cseg(pc, src, inst);
397		inst[0] |= 0x00800000;
398	}
399
400	alloc_reg(pc, src);
401	inst[0] |= (src->hw << 16);
402}
403
404static void
405set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
406{
407	set_long(pc, inst);
408
409	if (src->type == P_ATTR) {
410		struct nv50_reg *temp = temp_temp(pc);
411
412		emit_mov(pc, temp, src);
413		src = temp;
414	} else
415	if (src->type == P_CONST || src->type == P_IMMD) {
416		set_cseg(pc, src, inst);
417		inst[0] |= 0x01000000;
418	}
419
420	alloc_reg(pc, src);
421	inst[1] |= (src->hw << 14);
422}
423
424static void
425emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
426	 struct nv50_reg *src1)
427{
428	unsigned inst[2] = { 0, 0 };
429
430	inst[0] |= 0xc0000000;
431
432	check_swap_src_0_1(pc, &src0, &src1);
433	set_dst(pc, dst, inst);
434	set_src_0(pc, src0, inst);
435	set_src_1(pc, src1, inst);
436
437	emit(pc, inst);
438}
439
440static void
441emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
442	 struct nv50_reg *src0, struct nv50_reg *src1)
443{
444	unsigned inst[2] = { 0, 0 };
445
446	inst[0] |= 0xb0000000;
447
448	check_swap_src_0_1(pc, &src0, &src1);
449	set_dst(pc, dst, inst);
450	set_src_0(pc, src0, inst);
451	if (is_long(inst))
452		set_src_2(pc, src1, inst);
453	else
454		set_src_1(pc, src1, inst);
455
456	emit(pc, inst);
457}
458
459static void
460emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
461	    struct nv50_reg *src0, struct nv50_reg *src1)
462{
463	unsigned inst[2] = { 0, 0 };
464
465	set_long(pc, inst);
466	inst[0] |= 0xb0000000;
467	inst[1] |= (sub << 29);
468
469	check_swap_src_0_1(pc, &src0, &src1);
470	set_dst(pc, dst, inst);
471	set_src_0(pc, src0, inst);
472	set_src_1(pc, src1, inst);
473
474	emit(pc, inst);
475}
476
477static void
478emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
479	 struct nv50_reg *src1)
480{
481	unsigned inst[2] = { 0, 0 };
482
483	inst[0] |= 0xb0000000;
484
485	set_long(pc, inst);
486	if (check_swap_src_0_1(pc, &src0, &src1))
487		inst[1] |= 0x04000000;
488	else
489		inst[1] |= 0x08000000;
490
491	set_dst(pc, dst, inst);
492	set_src_0(pc, src0, inst);
493	set_src_2(pc, src1, inst);
494
495	emit(pc, inst);
496}
497
498static void
499emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
500	 struct nv50_reg *src1, struct nv50_reg *src2)
501{
502	unsigned inst[2] = { 0, 0 };
503
504	inst[0] |= 0xe0000000;
505
506	check_swap_src_0_1(pc, &src0, &src1);
507	set_dst(pc, dst, inst);
508	set_src_0(pc, src0, inst);
509	set_src_1(pc, src1, inst);
510	set_src_2(pc, src2, inst);
511
512	emit(pc, inst);
513}
514
515static void
516emit_flop(struct nv50_pc *pc, unsigned sub,
517	  struct nv50_reg *dst, struct nv50_reg *src)
518{
519	unsigned inst[2] = { 0, 0 };
520
521	inst[0] |= 0x90000000;
522	if (sub) {
523		set_long(pc, inst);
524		inst[1] |= (sub << 29);
525	}
526
527	set_dst(pc, dst, inst);
528	set_src_0(pc, src, inst);
529
530	emit(pc, inst);
531}
532
533static boolean
534nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
535{
536	const struct tgsi_full_instruction *inst = &tok->FullInstruction;
537	struct nv50_reg *dst[4], *src[3][4], *temp;
538	unsigned mask;
539	int i, c;
540
541	NOUVEAU_ERR("insn %p\n", tok);
542
543	mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
544
545	for (c = 0; c < 4; c++) {
546		if (mask & (1 << c))
547			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
548		else
549			dst[c] = NULL;
550	}
551
552	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
553		for (c = 0; c < 4; c++)
554			src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
555	}
556
557	switch (inst->Instruction.Opcode) {
558	case TGSI_OPCODE_ADD:
559		for (c = 0; c < 4; c++)
560			emit_add(pc, dst[c], src[0][c], src[1][c]);
561		break;
562	case TGSI_OPCODE_COS:
563		for (c = 0; c < 4; c++)
564			emit_flop(pc, 5, dst[c], src[0][c]);
565		break;
566	case TGSI_OPCODE_DP3:
567		temp = alloc_temp(pc, NULL);
568		emit_mul(pc, temp, src[0][0], src[1][0]);
569		emit_mad(pc, temp, src[0][1], src[1][1], temp);
570		emit_mad(pc, temp, src[0][2], src[1][2], temp);
571		for (c = 0; c < 4; c++)
572			emit_mov(pc, dst[c], temp);
573		free_temp(pc, temp);
574		break;
575	case TGSI_OPCODE_DP4:
576		temp = alloc_temp(pc, NULL);
577		emit_mul(pc, temp, src[0][0], src[1][0]);
578		emit_mad(pc, temp, src[0][1], src[1][1], temp);
579		emit_mad(pc, temp, src[0][2], src[1][2], temp);
580		emit_mad(pc, temp, src[0][3], src[1][3], temp);
581		for (c = 0; c < 4; c++)
582			emit_mov(pc, dst[c], temp);
583		free_temp(pc, temp);
584		break;
585	case TGSI_OPCODE_EX2:
586		for (c = 0; c < 4; c++)
587			emit_flop(pc, 6, dst[c], src[0][c]);
588		break;
589	case TGSI_OPCODE_LG2:
590		for (c = 0; c < 4; c++)
591			emit_flop(pc, 3, dst[c], src[0][c]);
592		break;
593	case TGSI_OPCODE_MAD:
594		for (c = 0; c < 4; c++)
595			emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
596		break;
597	case TGSI_OPCODE_MAX:
598		for (c = 0; c < 4; c++)
599			emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
600		break;
601	case TGSI_OPCODE_MIN:
602		for (c = 0; c < 4; c++)
603			emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
604		break;
605	case TGSI_OPCODE_MOV:
606		for (c = 0; c < 4; c++)
607			emit_mov(pc, dst[c], src[0][c]);
608		break;
609	case TGSI_OPCODE_MUL:
610		for (c = 0; c < 4; c++)
611			emit_mul(pc, dst[c], src[0][c], src[1][c]);
612		break;
613	case TGSI_OPCODE_RCP:
614		for (c = 0; c < 4; c++)
615			emit_flop(pc, 0, dst[c], src[0][c]);
616		break;
617	case TGSI_OPCODE_RSQ:
618		for (c = 0; c < 4; c++)
619			emit_flop(pc, 2, dst[c], src[0][c]);
620		break;
621	case TGSI_OPCODE_SIN:
622		for (c = 0; c < 4; c++)
623			emit_flop(pc, 4, dst[c], src[0][c]);
624		break;
625	case TGSI_OPCODE_SUB:
626		for (c = 0; c < 4; c++)
627			emit_sub(pc, dst[c], src[0][c], src[1][c]);
628		break;
629	case TGSI_OPCODE_END:
630		break;
631	default:
632		NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
633		return FALSE;
634	}
635
636	kill_temp_temp(pc);
637	return TRUE;
638}
639
640static boolean
641nv50_program_tx_prep(struct nv50_pc *pc)
642{
643	struct tgsi_parse_context p;
644	boolean ret = FALSE;
645	unsigned i, c;
646
647	tgsi_parse_init(&p, pc->p->pipe.tokens);
648	while (!tgsi_parse_end_of_tokens(&p)) {
649		const union tgsi_full_token *tok = &p.FullToken;
650
651		tgsi_parse_token(&p);
652		switch (tok->Token.Type) {
653		case TGSI_TOKEN_TYPE_IMMEDIATE:
654		{
655			const struct tgsi_full_immediate *imm =
656				&p.FullToken.FullImmediate;
657
658			pc->immd_nr++;
659			pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr *
660							     sizeof(float));
661			pc->immd_buf[4 * (pc->immd_nr - 1) + 0] =
662				imm->u.ImmediateFloat32[0].Float;
663			pc->immd_buf[4 * (pc->immd_nr - 1) + 1] =
664				imm->u.ImmediateFloat32[1].Float;
665			pc->immd_buf[4 * (pc->immd_nr - 1) + 2] =
666				imm->u.ImmediateFloat32[2].Float;
667			pc->immd_buf[4 * (pc->immd_nr - 1) + 3] =
668				imm->u.ImmediateFloat32[3].Float;
669		}
670			break;
671		case TGSI_TOKEN_TYPE_DECLARATION:
672		{
673			const struct tgsi_full_declaration *d;
674			unsigned last;
675
676			d = &p.FullToken.FullDeclaration;
677			last = d->u.DeclarationRange.Last;
678
679			switch (d->Declaration.File) {
680			case TGSI_FILE_TEMPORARY:
681				if (pc->temp_nr < (last + 1))
682					pc->temp_nr = last + 1;
683				break;
684			case TGSI_FILE_OUTPUT:
685				if (pc->result_nr < (last + 1))
686					pc->result_nr = last + 1;
687				break;
688			case TGSI_FILE_INPUT:
689				if (pc->attr_nr < (last + 1))
690					pc->attr_nr = last + 1;
691				break;
692			case TGSI_FILE_CONSTANT:
693				if (pc->param_nr < (last + 1))
694					pc->param_nr = last + 1;
695				break;
696			default:
697				NOUVEAU_ERR("bad decl file %d\n",
698					    d->Declaration.File);
699				goto out_err;
700			}
701		}
702			break;
703		case TGSI_TOKEN_TYPE_INSTRUCTION:
704			break;
705		default:
706			break;
707		}
708	}
709
710	NOUVEAU_ERR("%d temps\n", pc->temp_nr);
711	if (pc->temp_nr) {
712		pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
713		if (!pc->temp)
714			goto out_err;
715
716		for (i = 0; i < pc->temp_nr; i++) {
717			for (c = 0; c < 4; c++) {
718				pc->temp[i*4+c].type = P_TEMP;
719				pc->temp[i*4+c].hw = -1;
720				pc->temp[i*4+c].index = i;
721			}
722		}
723	}
724
725	NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
726	if (pc->attr_nr) {
727		struct nv50_reg *iv = NULL, *tmp = NULL;
728		int aid = 0;
729
730		pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
731		if (!pc->attr)
732			goto out_err;
733
734		if (pc->p->type == NV50_PROG_FRAGMENT) {
735			iv = alloc_temp(pc, NULL);
736			aid++;
737		}
738
739		for (i = 0; i < pc->attr_nr; i++) {
740			struct nv50_reg *a = &pc->attr[i*4];
741
742			for (c = 0; c < 4; c++) {
743				if (pc->p->type == NV50_PROG_FRAGMENT) {
744					struct nv50_reg *at =
745						alloc_temp(pc, NULL);
746					pc->attr[i*4+c].type = at->type;
747					pc->attr[i*4+c].hw = at->hw;
748					pc->attr[i*4+c].index = at->index;
749				} else {
750					pc->p->cfg.vp.attr[aid/32] |=
751						(1 << (aid % 32));
752					pc->attr[i*4+c].type = P_ATTR;
753					pc->attr[i*4+c].hw = aid++;
754					pc->attr[i*4+c].index = i;
755				}
756			}
757
758			if (pc->p->type != NV50_PROG_FRAGMENT)
759				continue;
760
761			emit_interp(pc, iv, iv, iv, FALSE);
762			tmp = alloc_temp(pc, NULL);
763			{
764				unsigned inst[2] = { 0, 0 };
765				inst[0]  = 0x90000000;
766				inst[0] |= (tmp->hw << 2);
767				emit(pc, inst);
768			}
769			emit_interp(pc, &a[0], &a[0], tmp, TRUE);
770			emit_interp(pc, &a[1], &a[1], tmp, TRUE);
771			emit_interp(pc, &a[2], &a[2], tmp, TRUE);
772			emit_interp(pc, &a[3], &a[3], tmp, TRUE);
773			free_temp(pc, tmp);
774		}
775
776		if (iv)
777			free_temp(pc, iv);
778	}
779
780	NOUVEAU_ERR("%d result regs\n", pc->result_nr);
781	if (pc->result_nr) {
782		int rid = 0;
783
784		pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
785		if (!pc->result)
786			goto out_err;
787
788		for (i = 0; i < pc->result_nr; i++) {
789			for (c = 0; c < 4; c++) {
790				if (pc->p->type == NV50_PROG_FRAGMENT)
791					pc->result[i*4+c].type = P_TEMP;
792				else
793					pc->result[i*4+c].type = P_RESULT;
794				pc->result[i*4+c].hw = rid++;
795				pc->result[i*4+c].index = i;
796			}
797		}
798	}
799
800	NOUVEAU_ERR("%d param regs\n", pc->param_nr);
801	if (pc->param_nr) {
802		int rid = 0;
803
804		pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
805		if (!pc->param)
806			goto out_err;
807
808		for (i = 0; i < pc->param_nr; i++) {
809			for (c = 0; c < 4; c++) {
810				pc->param[i*4+c].type = P_CONST;
811				pc->param[i*4+c].hw = rid++;
812				pc->param[i*4+c].index = i;
813			}
814		}
815	}
816
817	if (pc->immd_nr) {
818		int rid = 0;
819
820		pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
821		if (!pc->immd)
822			goto out_err;
823
824		for (i = 0; i < pc->immd_nr; i++) {
825			for (c = 0; c < 4; c++) {
826				pc->immd[i*4+c].type = P_IMMD;
827				pc->immd[i*4+c].hw = rid++;
828				pc->immd[i*4+c].index = i;
829			}
830		}
831	}
832
833	ret = TRUE;
834out_err:
835	tgsi_parse_free(&p);
836	return ret;
837}
838
839static boolean
840nv50_program_tx(struct nv50_program *p)
841{
842	struct tgsi_parse_context parse;
843	struct nv50_pc *pc;
844	boolean ret;
845
846	pc = CALLOC_STRUCT(nv50_pc);
847	if (!pc)
848		return FALSE;
849	pc->p = p;
850	pc->p->cfg.high_temp = 4;
851
852	ret = nv50_program_tx_prep(pc);
853	if (ret == FALSE)
854		goto out_cleanup;
855
856	tgsi_parse_init(&parse, pc->p->pipe.tokens);
857	while (!tgsi_parse_end_of_tokens(&parse)) {
858		const union tgsi_full_token *tok = &parse.FullToken;
859
860		tgsi_parse_token(&parse);
861
862		switch (tok->Token.Type) {
863		case TGSI_TOKEN_TYPE_INSTRUCTION:
864			ret = nv50_program_tx_insn(pc, tok);
865			if (ret == FALSE)
866				goto out_err;
867			break;
868		default:
869			break;
870		}
871	}
872
873	p->immd_nr = pc->immd_nr * 4;
874	p->immd = pc->immd_buf;
875
876out_err:
877	tgsi_parse_free(&parse);
878
879out_cleanup:
880	return ret;
881}
882
883static void
884nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
885{
886	int i;
887
888	if (nv50_program_tx(p) == FALSE)
889		assert(0);
890	/* *not* sufficient, it's fine if last inst is long and
891	 * NOT immd - otherwise it's fucked fucked fucked */
892	p->insns[p->insns_nr - 1] |= 0x00000001;
893
894	if (p->type == NV50_PROG_VERTEX) {
895	for (i = 0; i < p->insns_nr; i++)
896		NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
897	} else {
898	for (i = 0; i < p->insns_nr; i++)
899		NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
900	}
901
902	p->translated = TRUE;
903}
904
905static void
906nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
907{
908	int i;
909
910	for (i = 0; i < p->immd_nr; i++) {
911		BEGIN_RING(tesla, 0x0f00, 2);
912		OUT_RING  ((NV50_CB_PMISC << 16) | (i << 8));
913		OUT_RING  (fui(p->immd[i]));
914	}
915}
916
917static void
918nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
919{
920	struct pipe_winsys *ws = nv50->pipe.winsys;
921	void *map;
922
923	if (!p->buffer)
924		p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
925	map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
926	memcpy(map, p->insns, p->insns_nr * 4);
927	ws->buffer_unmap(ws, p->buffer);
928}
929
930void
931nv50_vertprog_validate(struct nv50_context *nv50)
932{
933	struct nouveau_grobj *tesla = nv50->screen->tesla;
934	struct nv50_program *p = nv50->vertprog;
935	struct nouveau_stateobj *so;
936
937	if (!p->translated) {
938		nv50_program_validate(nv50, p);
939		if (!p->translated)
940			assert(0);
941	}
942
943	nv50_program_validate_data(nv50, p);
944	nv50_program_validate_code(nv50, p);
945
946	so = so_new(11, 2);
947	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
948	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
949		  NOUVEAU_BO_HIGH, 0, 0);
950	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
951		  NOUVEAU_BO_LOW, 0, 0);
952	so_method(so, tesla, 0x1650, 2);
953	so_data  (so, p->cfg.vp.attr[0]);
954	so_data  (so, p->cfg.vp.attr[1]);
955	so_method(so, tesla, 0x16ac, 2);
956	so_data  (so, 8);
957	so_data  (so, p->cfg.high_temp);
958	so_method(so, tesla, 0x140c, 1);
959	so_data  (so, 0); /* program start offset */
960	so_emit(nv50->screen->nvws, so);
961	so_ref(NULL, &so);
962}
963
964void
965nv50_fragprog_validate(struct nv50_context *nv50)
966{
967	struct pipe_winsys *ws = nv50->pipe.winsys;
968	struct nouveau_grobj *tesla = nv50->screen->tesla;
969	struct nv50_program *p = nv50->fragprog;
970	struct nouveau_stateobj *so;
971	void *map;
972
973	if (!p->translated) {
974		nv50_program_validate(nv50, p);
975		if (!p->translated)
976			assert(0);
977	}
978
979	nv50_program_validate_data(nv50, p);
980	nv50_program_validate_code(nv50, p);
981
982	so = so_new(7, 2);
983	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
984	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
985		  NOUVEAU_BO_HIGH, 0, 0);
986	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
987		  NOUVEAU_BO_LOW, 0, 0);
988	so_method(so, tesla, 0x198c, 1);
989	so_data  (so, p->cfg.high_temp);
990	so_method(so, tesla, 0x1414, 1);
991	so_data  (so, 0); /* program start offset */
992	so_emit(nv50->screen->nvws, so);
993	so_ref(NULL, &so);
994}
995
996void
997nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
998{
999	struct pipe_winsys *ws = nv50->pipe.winsys;
1000
1001	if (p->insns_nr) {
1002		if (p->insns)
1003			FREE(p->insns);
1004		p->insns_nr = 0;
1005	}
1006
1007	if (p->buffer)
1008		pipe_buffer_reference(ws, &p->buffer, NULL);
1009
1010	p->translated = 0;
1011}
1012
1013