nv50_program.c revision 21e688e0a3faeef18b07c4d860bd71cc6e3ddf4a
1#include "pipe/p_context.h"
2#include "pipe/p_defines.h"
3#include "pipe/p_state.h"
4#include "pipe/p_inlines.h"
5
6#include "pipe/p_shader_tokens.h"
7#include "tgsi/util/tgsi_parse.h"
8#include "tgsi/util/tgsi_util.h"
9
10#include "nv50_context.h"
11#include "nv50_state.h"
12
13#define NV50_SU_MAX_TEMP 64
14
15/* ARL - gallium craps itself on progs/vp/arl.txt
16 *
17 * MSB - Like MAD, but MUL+SUB
18 * 	- Fuck it off, introduce a way to negate args for ops that
19 * 	  support it.
20 *
21 * Look into inlining IMMD for ops other than MOV (make it general?)
22 * 	- Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
23 * 	  but can emit to P_TEMP first - then MOV later. NVIDIA does this
24 *
25 * Verify half-insns work where expected - and force disable them where they
26 * don't work - MUL has it forcibly disabled atm as it fixes POW..
27 *
28 * FUCK! watch dst==src vectors, can overwrite components that are needed.
29 * 	ie. SUB R0, R0.yzxw, R0
30 *
31 * MOV dst, -src
32 * 	"delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0)
33 * 	mov dst, tmp
34 *
35 * Things to check with renouveau:
36 * 	FP attr/result assignment - how?
37 * 		attrib
38 * 			- 0x16bc maps vp output onto fp hpos
39 * 			- 0x16c0 maps vp output onto fp col0
40 * 		result
41 * 			- colr always 0-3
42 * 			- depr always 4
43 * 0x16bc->0x16e8 --> some binding between vp/fp regs
44 * 0x16b8 --> VP output count
45 *
46 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
47 * 	      "MOV rcol.x, fcol.y" = 0x00000004
48 * 0x19a8 --> as above but 0x00000100 and 0x00000000
49 * 	- 0x00100000 used when KIL used
50 * 0x196c --> as above but 0x00000011 and 0x00000000
51 *
52 * 0x1988 --> 0xXXNNNNNN
53 * 	- XX == FP high something
54 */
55struct nv50_reg {
56	enum {
57		P_TEMP,
58		P_ATTR,
59		P_RESULT,
60		P_CONST,
61		P_IMMD
62	} type;
63	int index;
64
65	int hw;
66	int neg;
67};
68
69struct nv50_pc {
70	struct nv50_program *p;
71
72	/* hw resources */
73	struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
74
75	/* tgsi resources */
76	struct nv50_reg *temp;
77	int temp_nr;
78	struct nv50_reg *attr;
79	int attr_nr;
80	struct nv50_reg *result;
81	int result_nr;
82	struct nv50_reg *param;
83	int param_nr;
84	struct nv50_reg *immd;
85	float *immd_buf;
86	int immd_nr;
87
88	struct nv50_reg *temp_temp[16];
89	unsigned temp_temp_nr;
90};
91
92static void
93alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
94{
95	int i;
96
97	if (reg->type != P_TEMP)
98		return;
99
100	if (reg->hw >= 0) {
101		/*XXX: do this here too to catch FP temp-as-attr usage..
102		 *     not clean, but works */
103		if (pc->p->cfg.high_temp < (reg->hw + 1))
104			pc->p->cfg.high_temp = reg->hw + 1;
105		return;
106	}
107
108	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
109		if (!(pc->r_temp[i])) {
110			pc->r_temp[i] = reg;
111			reg->hw = i;
112			if (pc->p->cfg.high_temp < (i + 1))
113				pc->p->cfg.high_temp = i + 1;
114			return;
115		}
116	}
117
118	assert(0);
119}
120
121static struct nv50_reg *
122alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
123{
124	struct nv50_reg *r;
125	int i;
126
127	if (dst && dst->type == P_TEMP && dst->hw == -1)
128		return dst;
129
130	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
131		if (!pc->r_temp[i]) {
132			r = CALLOC_STRUCT(nv50_reg);
133			r->type = P_TEMP;
134			r->index = -1;
135			r->hw = i;
136			pc->r_temp[i] = r;
137			return r;
138		}
139	}
140
141	assert(0);
142	return NULL;
143}
144
145static void
146free_temp(struct nv50_pc *pc, struct nv50_reg *r)
147{
148	if (r->index == -1) {
149		FREE(pc->r_temp[r->hw]);
150		pc->r_temp[r->hw] = NULL;
151	}
152}
153
154static struct nv50_reg *
155temp_temp(struct nv50_pc *pc)
156{
157	if (pc->temp_temp_nr >= 16)
158		assert(0);
159
160	pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
161	return pc->temp_temp[pc->temp_temp_nr++];
162}
163
164static void
165kill_temp_temp(struct nv50_pc *pc)
166{
167	int i;
168
169	for (i = 0; i < pc->temp_temp_nr; i++)
170		free_temp(pc, pc->temp_temp[i]);
171	pc->temp_temp_nr = 0;
172}
173
174static int
175ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
176{
177	pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 *
178					     sizeof(float));
179	pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
180	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
181	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
182	pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
183
184	return pc->immd_nr++;
185}
186
187static struct nv50_reg *
188alloc_immd(struct nv50_pc *pc, float f)
189{
190	struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
191	unsigned hw;
192
193	hw = ctor_immd(pc, f, 0, 0, 0) * 4;
194	r->type = P_IMMD;
195	r->hw = hw;
196	r->index = -1;
197	return r;
198}
199
200static void
201emit(struct nv50_pc *pc, unsigned *inst)
202{
203	struct nv50_program *p = pc->p;
204
205	if (inst[0] & 1) {
206		p->insns_nr += 2;
207		p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
208		memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
209	} else {
210		p->insns_nr += 1;
211		p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
212		memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
213	}
214}
215
216static INLINE void set_long(struct nv50_pc *, unsigned *);
217
218static boolean
219is_long(unsigned *inst)
220{
221	if (inst[0] & 1)
222		return TRUE;
223	return FALSE;
224}
225
226static boolean
227is_immd(unsigned *inst)
228{
229	if (is_long(inst) && (inst[1] & 3) == 3)
230		return TRUE;
231	return FALSE;
232}
233
234static INLINE void
235set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
236{
237	set_long(pc, inst);
238	inst[1] &= ~((0x1f << 7) | (0x3 << 12));
239	inst[1] |= (pred << 7) | (idx << 12);
240}
241
242static INLINE void
243set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
244{
245	set_long(pc, inst);
246	inst[1] &= ~((0x3 << 4) | (1 << 6));
247	inst[1] |= (idx << 4) | (on << 6);
248}
249
250static INLINE void
251set_long(struct nv50_pc *pc, unsigned *inst)
252{
253	if (is_long(inst))
254		return;
255
256	inst[0] |= 1;
257	set_pred(pc, 0xf, 0, inst);
258	set_pred_wr(pc, 0, 0, inst);
259}
260
261static INLINE void
262set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
263{
264	if (dst->type == P_RESULT) {
265		set_long(pc, inst);
266		inst[1] |= 0x00000008;
267	}
268
269	alloc_reg(pc, dst);
270	inst[0] |= (dst->hw << 2);
271}
272
273static INLINE void
274set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
275{
276	unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
277
278	set_long(pc, inst);
279	/*XXX: can't be predicated - bits overlap.. catch cases where both
280	 *     are required and avoid them. */
281	set_pred(pc, 0, 0, inst);
282	set_pred_wr(pc, 0, 0, inst);
283
284	inst[1] |= 0x00000002 | 0x00000001;
285	inst[0] |= (val & 0x3f) << 16;
286	inst[1] |= (val >> 6) << 2;
287}
288
289static void
290emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
291	    struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
292{
293	unsigned inst[2] = { 0, 0 };
294
295	inst[0] |= 0x80000000;
296	set_dst(pc, dst, inst);
297	alloc_reg(pc, iv);
298	inst[0] |= (iv->hw << 9);
299	alloc_reg(pc, src);
300	inst[0] |= (src->hw << 16);
301	if (noperspective)
302		inst[0] |= (1 << 25);
303
304	emit(pc, inst);
305}
306
307static void
308set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
309{
310	set_long(pc, inst);
311	if (src->type == P_IMMD) {
312		inst[1] |= (NV50_CB_PMISC << 22);
313	} else {
314		if (pc->p->type == PIPE_SHADER_VERTEX)
315			inst[1] |= (NV50_CB_PVP << 22);
316		else
317			inst[1] |= (NV50_CB_PFP << 22);
318	}
319}
320
321static void
322emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
323{
324	unsigned inst[2] = { 0, 0 };
325
326	inst[0] |= 0x10000000;
327
328	set_dst(pc, dst, inst);
329
330	if (dst->type != P_RESULT && src->type == P_IMMD) {
331		set_immd(pc, src, inst);
332		/*XXX: 32-bit, but steals part of "half" reg space - need to
333		 *     catch and handle this case if/when we do half-regs
334		 */
335		inst[0] |= 0x00008000;
336	} else
337	if (src->type == P_IMMD || src->type == P_CONST) {
338		set_long(pc, inst);
339		set_cseg(pc, src, inst);
340		inst[0] |= (src->hw << 9);
341		inst[1] |= 0x20000000; /* src0 const? */
342	} else {
343		if (src->type == P_ATTR) {
344			set_long(pc, inst);
345			inst[1] |= 0x00200000;
346		}
347
348		alloc_reg(pc, src);
349		inst[0] |= (src->hw << 9);
350	}
351
352	/* We really should support "half" instructions here at some point,
353	 * but I don't feel confident enough about them yet.
354	 */
355	set_long(pc, inst);
356	if (is_long(inst) && !is_immd(inst)) {
357		inst[1] |= 0x04000000; /* 32-bit */
358		inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
359	}
360
361	emit(pc, inst);
362}
363
364static boolean
365check_swap_src_0_1(struct nv50_pc *pc,
366		   struct nv50_reg **s0, struct nv50_reg **s1)
367{
368	struct nv50_reg *src0 = *s0, *src1 = *s1;
369
370	if (src0->type == P_CONST) {
371		if (src1->type != P_CONST) {
372			*s0 = src1;
373			*s1 = src0;
374			return TRUE;
375		}
376	} else
377	if (src1->type == P_ATTR) {
378		if (src0->type != P_ATTR) {
379			*s0 = src1;
380			*s1 = src0;
381			return TRUE;
382		}
383	}
384
385	return FALSE;
386}
387
388static void
389set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
390{
391	if (src->type == P_ATTR) {
392		set_long(pc, inst);
393		inst[1] |= 0x00200000;
394	} else
395	if (src->type == P_CONST || src->type == P_IMMD) {
396		struct nv50_reg *temp = temp_temp(pc);
397
398		emit_mov(pc, temp, src);
399		src = temp;
400	}
401
402	alloc_reg(pc, src);
403	inst[0] |= (src->hw << 9);
404}
405
406static void
407set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
408{
409	if (src->type == P_ATTR) {
410		struct nv50_reg *temp = temp_temp(pc);
411
412		emit_mov(pc, temp, src);
413		src = temp;
414	} else
415	if (src->type == P_CONST || src->type == P_IMMD) {
416		assert(!(inst[0] & 0x00800000));
417		if (inst[0] & 0x01000000) {
418			struct nv50_reg *temp = temp_temp(pc);
419
420			emit_mov(pc, temp, src);
421			src = temp;
422		} else {
423			set_cseg(pc, src, inst);
424			inst[0] |= 0x00800000;
425		}
426	}
427
428	alloc_reg(pc, src);
429	inst[0] |= (src->hw << 16);
430}
431
432static void
433set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
434{
435	set_long(pc, inst);
436
437	if (src->type == P_ATTR) {
438		struct nv50_reg *temp = temp_temp(pc);
439
440		emit_mov(pc, temp, src);
441		src = temp;
442	} else
443	if (src->type == P_CONST || src->type == P_IMMD) {
444		assert(!(inst[0] & 0x01000000));
445		if (inst[0] & 0x00800000) {
446			struct nv50_reg *temp = temp_temp(pc);
447
448			emit_mov(pc, temp, src);
449			src = temp;
450		} else {
451			set_cseg(pc, src, inst);
452			inst[0] |= 0x01000000;
453		}
454	}
455
456	alloc_reg(pc, src);
457	inst[1] |= (src->hw << 14);
458}
459
460static void
461emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
462	 struct nv50_reg *src1)
463{
464	unsigned inst[2] = { 0, 0 };
465
466	inst[0] |= 0xc0000000;
467	set_long(pc, inst);
468
469	check_swap_src_0_1(pc, &src0, &src1);
470	set_dst(pc, dst, inst);
471	set_src_0(pc, src0, inst);
472	set_src_1(pc, src1, inst);
473
474	emit(pc, inst);
475}
476
477static void
478emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
479	 struct nv50_reg *src0, struct nv50_reg *src1)
480{
481	unsigned inst[2] = { 0, 0 };
482
483	inst[0] |= 0xb0000000;
484
485	check_swap_src_0_1(pc, &src0, &src1);
486	set_dst(pc, dst, inst);
487	set_src_0(pc, src0, inst);
488	if (is_long(inst))
489		set_src_2(pc, src1, inst);
490	else
491		set_src_1(pc, src1, inst);
492
493	emit(pc, inst);
494}
495
496static void
497emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
498	    struct nv50_reg *src0, struct nv50_reg *src1)
499{
500	unsigned inst[2] = { 0, 0 };
501
502	set_long(pc, inst);
503	inst[0] |= 0xb0000000;
504	inst[1] |= (sub << 29);
505
506	check_swap_src_0_1(pc, &src0, &src1);
507	set_dst(pc, dst, inst);
508	set_src_0(pc, src0, inst);
509	set_src_1(pc, src1, inst);
510
511	emit(pc, inst);
512}
513
514static void
515emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
516	 struct nv50_reg *src1)
517{
518	unsigned inst[2] = { 0, 0 };
519
520	inst[0] |= 0xb0000000;
521
522	set_long(pc, inst);
523	if (check_swap_src_0_1(pc, &src0, &src1))
524		inst[1] |= 0x04000000;
525	else
526		inst[1] |= 0x08000000;
527
528	set_dst(pc, dst, inst);
529	set_src_0(pc, src0, inst);
530	set_src_2(pc, src1, inst);
531
532	emit(pc, inst);
533}
534
535static void
536emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
537	 struct nv50_reg *src1, struct nv50_reg *src2)
538{
539	unsigned inst[2] = { 0, 0 };
540
541	inst[0] |= 0xe0000000;
542
543	check_swap_src_0_1(pc, &src0, &src1);
544	set_dst(pc, dst, inst);
545	set_src_0(pc, src0, inst);
546	set_src_1(pc, src1, inst);
547	set_src_2(pc, src2, inst);
548
549	emit(pc, inst);
550}
551
552static void
553emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
554	 struct nv50_reg *src1, struct nv50_reg *src2)
555{
556	unsigned inst[2] = { 0, 0 };
557
558	inst[0] |= 0xe0000000;
559	set_long(pc, inst);
560	inst[1] |= 0x08000000; /* src0 * src1 - src2 */
561
562	check_swap_src_0_1(pc, &src0, &src1);
563	set_dst(pc, dst, inst);
564	set_src_0(pc, src0, inst);
565	set_src_1(pc, src1, inst);
566	set_src_2(pc, src2, inst);
567
568	emit(pc, inst);
569}
570
571static void
572emit_flop(struct nv50_pc *pc, unsigned sub,
573	  struct nv50_reg *dst, struct nv50_reg *src)
574{
575	unsigned inst[2] = { 0, 0 };
576
577	inst[0] |= 0x90000000;
578	if (sub) {
579		set_long(pc, inst);
580		inst[1] |= (sub << 29);
581	}
582
583	set_dst(pc, dst, inst);
584	set_src_0(pc, src, inst);
585
586	emit(pc, inst);
587}
588
589static void
590emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
591{
592	unsigned inst[2] = { 0, 0 };
593
594	inst[0] |= 0xb0000000;
595
596	set_dst(pc, dst, inst);
597	set_src_0(pc, src, inst);
598	set_long(pc, inst);
599	inst[1] |= (6 << 29) | 0x00004000;
600
601	emit(pc, inst);
602}
603
604static void
605emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
606	 struct nv50_reg *src0, struct nv50_reg *src1)
607{
608	unsigned inst[2] = { 0, 0 };
609	unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
610	struct nv50_reg *rdst;
611
612	assert(c_op <= 7);
613	if (check_swap_src_0_1(pc, &src0, &src1))
614		c_op = inv_cop[c_op];
615
616	rdst = dst;
617	if (dst->type != P_TEMP)
618		dst = alloc_temp(pc, NULL);
619
620	/* set.u32 */
621	set_long(pc, inst);
622	inst[0] |= 0xb0000000;
623	inst[1] |= (3 << 29);
624	inst[1] |= (c_op << 14);
625	/*XXX: breaks things, .u32 by default?
626	 *     decuda will disasm as .u16 and use .lo/.hi regs, but this
627	 *     doesn't seem to match what the hw actually does.
628	inst[1] |= 0x04000000; << breaks things.. .u32 by default?
629	 */
630	set_dst(pc, dst, inst);
631	set_src_0(pc, src0, inst);
632	set_src_1(pc, src1, inst);
633	emit(pc, inst);
634
635	/* cvt.f32.u32 */
636	inst[0] = 0xa0000001;
637	inst[1] = 0x64014780;
638	set_dst(pc, rdst, inst);
639	set_src_0(pc, dst, inst);
640	emit(pc, inst);
641
642	if (dst != rdst)
643		free_temp(pc, dst);
644}
645
646static void
647emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
648{
649	unsigned inst[2] = { 0, 0 };
650
651	inst[0] = 0xa0000000; /* cvt */
652	set_long(pc, inst);
653	inst[1] |= (6 << 29); /* cvt */
654	inst[1] |= 0x08000000; /* integer mode */
655	inst[1] |= 0x04000000; /* 32 bit */
656	inst[1] |= ((0x1 << 3)) << 14; /* .rn */
657	inst[1] |= (1 << 14); /* src .f32 */
658	set_dst(pc, dst, inst);
659	set_src_0(pc, src, inst);
660
661	emit(pc, inst);
662}
663
664static void
665emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
666	 struct nv50_reg *v, struct nv50_reg *e)
667{
668	struct nv50_reg *temp = alloc_temp(pc, NULL);
669
670	emit_flop(pc, 3, temp, v);
671	emit_mul(pc, temp, temp, e);
672	emit_preex2(pc, temp, temp);
673	emit_flop(pc, 6, dst, temp);
674
675	free_temp(pc, temp);
676}
677
678static void
679emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
680{
681	unsigned inst[2] = { 0, 0 };
682
683	inst[0] = 0xa0000000; /* cvt */
684	set_long(pc, inst);
685	inst[1] |= (6 << 29); /* cvt */
686	inst[1] |= 0x04000000; /* 32 bit */
687	inst[1] |= (1 << 14); /* src .f32 */
688	inst[1] |= ((1 << 6) << 14); /* .abs */
689	set_dst(pc, dst, inst);
690	set_src_0(pc, src, inst);
691
692	emit(pc, inst);
693}
694
695static void
696emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src)
697{
698	struct nv50_reg *one = alloc_immd(pc, 1.0);
699	struct nv50_reg *zero = alloc_immd(pc, 0.0);
700	struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
701	struct nv50_reg *pos128 = alloc_immd(pc,  127.999999);
702	struct nv50_reg *tmp[4];
703
704	emit_mov(pc, dst[0], one);
705	emit_mov(pc, dst[3], one);
706
707	tmp[0] = temp_temp(pc);
708	emit_minmax(pc, 4, dst[1], src[0], zero);
709	set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]);
710
711	tmp[1] = temp_temp(pc);
712	emit_minmax(pc, 4, tmp[1], src[1], zero);
713
714	tmp[3] = temp_temp(pc);
715	emit_minmax(pc, 4, tmp[3], src[3], neg128);
716	emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
717
718	emit_pow(pc, dst[2], tmp[1], tmp[3]);
719	emit_mov(pc, dst[2], zero);
720	set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]);
721}
722
723static void
724emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
725{
726	unsigned inst[2] = { 0, 0 };
727
728	set_long(pc, inst);
729	inst[0] |= 0xa0000000; /* delta */
730	inst[1] |= (7 << 29); /* delta */
731	inst[1] |= 0x04000000; /* negate arg0? probably not */
732	inst[1] |= (1 << 14); /* src .f32 */
733	set_dst(pc, dst, inst);
734	set_src_0(pc, src, inst);
735
736	emit(pc, inst);
737}
738
739static struct nv50_reg *
740tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
741{
742	switch (dst->DstRegister.File) {
743	case TGSI_FILE_TEMPORARY:
744		return &pc->temp[dst->DstRegister.Index * 4 + c];
745	case TGSI_FILE_OUTPUT:
746		return &pc->result[dst->DstRegister.Index * 4 + c];
747	case TGSI_FILE_NULL:
748		return NULL;
749	default:
750		break;
751	}
752
753	return NULL;
754}
755
756static struct nv50_reg *
757tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
758{
759	struct nv50_reg *r = NULL;
760	struct nv50_reg *temp;
761	unsigned c;
762
763	c = tgsi_util_get_full_src_register_extswizzle(src, chan);
764	switch (c) {
765	case TGSI_EXTSWIZZLE_X:
766	case TGSI_EXTSWIZZLE_Y:
767	case TGSI_EXTSWIZZLE_Z:
768	case TGSI_EXTSWIZZLE_W:
769		switch (src->SrcRegister.File) {
770		case TGSI_FILE_INPUT:
771			r = &pc->attr[src->SrcRegister.Index * 4 + c];
772			break;
773		case TGSI_FILE_TEMPORARY:
774			r = &pc->temp[src->SrcRegister.Index * 4 + c];
775			break;
776		case TGSI_FILE_CONSTANT:
777			r = &pc->param[src->SrcRegister.Index * 4 + c];
778			break;
779		case TGSI_FILE_IMMEDIATE:
780			r = &pc->immd[src->SrcRegister.Index * 4 + c];
781			break;
782		default:
783			assert(0);
784			break;
785		}
786		break;
787	case TGSI_EXTSWIZZLE_ZERO:
788		r = alloc_immd(pc, 0.0);
789		break;
790	case TGSI_EXTSWIZZLE_ONE:
791		r = alloc_immd(pc, 1.0);
792		break;
793	default:
794		assert(0);
795		break;
796	}
797
798	switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
799	case TGSI_UTIL_SIGN_KEEP:
800		break;
801	case TGSI_UTIL_SIGN_CLEAR:
802		temp = temp_temp(pc);
803		emit_abs(pc, temp, r);
804		r = temp;
805		break;
806	case TGSI_UTIL_SIGN_TOGGLE:
807		temp = temp_temp(pc);
808		emit_neg(pc, temp, r);
809		r = temp;
810		break;
811	case TGSI_UTIL_SIGN_SET:
812		temp = temp_temp(pc);
813		emit_abs(pc, temp, r);
814		emit_neg(pc, temp, r);
815		r = temp;
816		break;
817	default:
818		assert(0);
819		break;
820	}
821
822	return r;
823}
824
825static boolean
826nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
827{
828	const struct tgsi_full_instruction *inst = &tok->FullInstruction;
829	struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
830	unsigned mask, sat;
831	int i, c;
832
833	NOUVEAU_ERR("insn %p\n", tok);
834
835	mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
836	sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
837
838	for (c = 0; c < 4; c++) {
839		if (mask & (1 << c))
840			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
841		else
842			dst[c] = NULL;
843	}
844
845	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
846		for (c = 0; c < 4; c++)
847			src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
848	}
849
850	if (sat) {
851		for (c = 0; c < 4; c++) {
852			rdst[c] = dst[c];
853			dst[c] = temp_temp(pc);
854		}
855	}
856
857	switch (inst->Instruction.Opcode) {
858	case TGSI_OPCODE_ABS:
859		for (c = 0; c < 4; c++) {
860			if (!(mask & (1 << c)))
861				continue;
862			emit_abs(pc, dst[c], src[0][c]);
863		}
864		break;
865	case TGSI_OPCODE_ADD:
866		for (c = 0; c < 4; c++) {
867			if (!(mask & (1 << c)))
868				continue;
869			emit_add(pc, dst[c], src[0][c], src[1][c]);
870		}
871		break;
872	case TGSI_OPCODE_COS:
873		for (c = 0; c < 4; c++) {
874			if (!(mask & (1 << c)))
875				continue;
876			emit_flop(pc, 5, dst[c], src[0][c]);
877		}
878		break;
879	case TGSI_OPCODE_DP3:
880		temp = alloc_temp(pc, NULL);
881		emit_mul(pc, temp, src[0][0], src[1][0]);
882		emit_mad(pc, temp, src[0][1], src[1][1], temp);
883		emit_mad(pc, temp, src[0][2], src[1][2], temp);
884		for (c = 0; c < 4; c++) {
885			if (!(mask & (1 << c)))
886				continue;
887			emit_mov(pc, dst[c], temp);
888		}
889		free_temp(pc, temp);
890		break;
891	case TGSI_OPCODE_DP4:
892		temp = alloc_temp(pc, NULL);
893		emit_mul(pc, temp, src[0][0], src[1][0]);
894		emit_mad(pc, temp, src[0][1], src[1][1], temp);
895		emit_mad(pc, temp, src[0][2], src[1][2], temp);
896		emit_mad(pc, temp, src[0][3], src[1][3], temp);
897		for (c = 0; c < 4; c++) {
898			if (!(mask & (1 << c)))
899				continue;
900			emit_mov(pc, dst[c], temp);
901		}
902		free_temp(pc, temp);
903		break;
904	case TGSI_OPCODE_DPH:
905		temp = alloc_temp(pc, NULL);
906		emit_mul(pc, temp, src[0][0], src[1][0]);
907		emit_mad(pc, temp, src[0][1], src[1][1], temp);
908		emit_mad(pc, temp, src[0][2], src[1][2], temp);
909		emit_add(pc, temp, src[1][3], temp);
910		for (c = 0; c < 4; c++) {
911			if (!(mask & (1 << c)))
912				continue;
913			emit_mov(pc, dst[c], temp);
914		}
915		free_temp(pc, temp);
916		break;
917	case TGSI_OPCODE_DST:
918	{
919		struct nv50_reg *one = alloc_immd(pc, 1.0);
920		if (mask & (1 << 0))
921			emit_mov(pc, dst[0], one);
922		if (mask & (1 << 1))
923			emit_mul(pc, dst[1], src[0][1], src[1][1]);
924		if (mask & (1 << 2))
925			emit_mov(pc, dst[2], src[0][2]);
926		if (mask & (1 << 3))
927			emit_mov(pc, dst[3], src[1][3]);
928		FREE(one);
929	}
930		break;
931	case TGSI_OPCODE_EX2:
932		temp = alloc_temp(pc, NULL);
933		for (c = 0; c < 4; c++) {
934			if (!(mask & (1 << c)))
935				continue;
936			emit_preex2(pc, temp, src[0][c]);
937			emit_flop(pc, 6, dst[c], temp);
938		}
939		free_temp(pc, temp);
940		break;
941	case TGSI_OPCODE_FLR:
942		for (c = 0; c < 4; c++) {
943			if (!(mask & (1 << c)))
944				continue;
945			emit_flr(pc, dst[c], src[0][c]);
946		}
947		break;
948	case TGSI_OPCODE_FRC:
949		temp = alloc_temp(pc, NULL);
950		for (c = 0; c < 4; c++) {
951			if (!(mask & (1 << c)))
952				continue;
953			emit_flr(pc, temp, src[0][c]);
954			emit_sub(pc, dst[c], src[0][c], temp);
955		}
956		free_temp(pc, temp);
957		break;
958	case TGSI_OPCODE_LIT:
959		/*XXX: writemask */
960		emit_lit(pc, &dst[0], &src[0][0]);
961		break;
962	case TGSI_OPCODE_LG2:
963		for (c = 0; c < 4; c++) {
964			if (!(mask & (1 << c)))
965				continue;
966			emit_flop(pc, 3, dst[c], src[0][c]);
967		}
968		break;
969	case TGSI_OPCODE_LRP:
970		for (c = 0; c < 4; c++) {
971			if (!(mask & (1 << c)))
972				continue;
973			/*XXX: we can do better than this */
974			temp = alloc_temp(pc, NULL);
975			emit_neg(pc, temp, src[0][c]);
976			emit_mad(pc, temp, temp, src[2][c], src[2][c]);
977			emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
978			free_temp(pc, temp);
979		}
980		break;
981	case TGSI_OPCODE_MAD:
982		for (c = 0; c < 4; c++) {
983			if (!(mask & (1 << c)))
984				continue;
985			emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
986		}
987		break;
988	case TGSI_OPCODE_MAX:
989		for (c = 0; c < 4; c++) {
990			if (!(mask & (1 << c)))
991				continue;
992			emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
993		}
994		break;
995	case TGSI_OPCODE_MIN:
996		for (c = 0; c < 4; c++) {
997			if (!(mask & (1 << c)))
998				continue;
999			emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1000		}
1001		break;
1002	case TGSI_OPCODE_MOV:
1003		for (c = 0; c < 4; c++) {
1004			if (!(mask & (1 << c)))
1005				continue;
1006			emit_mov(pc, dst[c], src[0][c]);
1007		}
1008		break;
1009	case TGSI_OPCODE_MUL:
1010		for (c = 0; c < 4; c++) {
1011			if (!(mask & (1 << c)))
1012				continue;
1013			emit_mul(pc, dst[c], src[0][c], src[1][c]);
1014		}
1015		break;
1016	case TGSI_OPCODE_POW:
1017		temp = alloc_temp(pc, NULL);
1018		emit_pow(pc, temp, src[0][0], src[1][0]);
1019		for (c = 0; c < 4; c++) {
1020			if (!(mask & (1 << c)))
1021				continue;
1022			emit_mov(pc, dst[c], temp);
1023		}
1024		free_temp(pc, temp);
1025		break;
1026	case TGSI_OPCODE_RCP:
1027		for (c = 0; c < 4; c++) {
1028			if (!(mask & (1 << c)))
1029				continue;
1030			emit_flop(pc, 0, dst[c], src[0][c]);
1031		}
1032		break;
1033	case TGSI_OPCODE_RSQ:
1034		for (c = 0; c < 4; c++) {
1035			if (!(mask & (1 << c)))
1036				continue;
1037			emit_flop(pc, 2, dst[c], src[0][c]);
1038		}
1039		break;
1040	case TGSI_OPCODE_SGE:
1041		for (c = 0; c < 4; c++) {
1042			if (!(mask & (1 << c)))
1043				continue;
1044			emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1045		}
1046		break;
1047	case TGSI_OPCODE_SIN:
1048		for (c = 0; c < 4; c++) {
1049			if (!(mask & (1 << c)))
1050				continue;
1051			emit_flop(pc, 4, dst[c], src[0][c]);
1052		}
1053		break;
1054	case TGSI_OPCODE_SLT:
1055		for (c = 0; c < 4; c++) {
1056			if (!(mask & (1 << c)))
1057				continue;
1058			emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1059		}
1060		break;
1061	case TGSI_OPCODE_SUB:
1062		for (c = 0; c < 4; c++) {
1063			if (!(mask & (1 << c)))
1064				continue;
1065			emit_sub(pc, dst[c], src[0][c], src[1][c]);
1066		}
1067		break;
1068	case TGSI_OPCODE_XPD:
1069		temp = alloc_temp(pc, NULL);
1070		if (mask & (1 << 0)) {
1071			emit_mul(pc, temp, src[0][2], src[1][1]);
1072			emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1073		}
1074		if (mask & (1 << 1)) {
1075			emit_mul(pc, temp, src[0][0], src[1][2]);
1076			emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1077		}
1078		if (mask & (1 << 2)) {
1079			emit_mul(pc, temp, src[0][1], src[1][0]);
1080			emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1081		}
1082		free_temp(pc, temp);
1083		break;
1084	case TGSI_OPCODE_END:
1085		break;
1086	default:
1087		NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1088		return FALSE;
1089	}
1090
1091	if (sat) {
1092		for (c = 0; c < 4; c++) {
1093			unsigned inst[2] = { 0, 0 };
1094
1095			if (!(mask & (1 << c)))
1096				continue;
1097
1098			inst[0] = 0xa0000000; /* cvt */
1099			set_long(pc, inst);
1100			inst[1] |= (6 << 29); /* cvt */
1101			inst[1] |= 0x04000000; /* 32 bit */
1102			inst[1] |= (1 << 14); /* src .f32 */
1103			inst[1] |= ((1 << 5) << 14); /* .sat */
1104			set_dst(pc, rdst[c], inst);
1105			set_src_0(pc, dst[c], inst);
1106			emit(pc, inst);
1107		}
1108	}
1109
1110	kill_temp_temp(pc);
1111	return TRUE;
1112}
1113
1114static boolean
1115nv50_program_tx_prep(struct nv50_pc *pc)
1116{
1117	struct tgsi_parse_context p;
1118	boolean ret = FALSE;
1119	unsigned i, c;
1120
1121	tgsi_parse_init(&p, pc->p->pipe.tokens);
1122	while (!tgsi_parse_end_of_tokens(&p)) {
1123		const union tgsi_full_token *tok = &p.FullToken;
1124
1125		tgsi_parse_token(&p);
1126		switch (tok->Token.Type) {
1127		case TGSI_TOKEN_TYPE_IMMEDIATE:
1128		{
1129			const struct tgsi_full_immediate *imm =
1130				&p.FullToken.FullImmediate;
1131
1132			ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1133				      imm->u.ImmediateFloat32[1].Float,
1134				      imm->u.ImmediateFloat32[2].Float,
1135				      imm->u.ImmediateFloat32[3].Float);
1136		}
1137			break;
1138		case TGSI_TOKEN_TYPE_DECLARATION:
1139		{
1140			const struct tgsi_full_declaration *d;
1141			unsigned last;
1142
1143			d = &p.FullToken.FullDeclaration;
1144			last = d->u.DeclarationRange.Last;
1145
1146			switch (d->Declaration.File) {
1147			case TGSI_FILE_TEMPORARY:
1148				if (pc->temp_nr < (last + 1))
1149					pc->temp_nr = last + 1;
1150				break;
1151			case TGSI_FILE_OUTPUT:
1152				if (pc->result_nr < (last + 1))
1153					pc->result_nr = last + 1;
1154				break;
1155			case TGSI_FILE_INPUT:
1156				if (pc->attr_nr < (last + 1))
1157					pc->attr_nr = last + 1;
1158				break;
1159			case TGSI_FILE_CONSTANT:
1160				if (pc->param_nr < (last + 1))
1161					pc->param_nr = last + 1;
1162				break;
1163			default:
1164				NOUVEAU_ERR("bad decl file %d\n",
1165					    d->Declaration.File);
1166				goto out_err;
1167			}
1168		}
1169			break;
1170		case TGSI_TOKEN_TYPE_INSTRUCTION:
1171			break;
1172		default:
1173			break;
1174		}
1175	}
1176
1177	NOUVEAU_ERR("%d temps\n", pc->temp_nr);
1178	if (pc->temp_nr) {
1179		pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
1180		if (!pc->temp)
1181			goto out_err;
1182
1183		for (i = 0; i < pc->temp_nr; i++) {
1184			for (c = 0; c < 4; c++) {
1185				pc->temp[i*4+c].type = P_TEMP;
1186				pc->temp[i*4+c].hw = -1;
1187				pc->temp[i*4+c].index = i;
1188			}
1189		}
1190	}
1191
1192	NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
1193	if (pc->attr_nr) {
1194		struct nv50_reg *iv = NULL, *tmp = NULL;
1195		int aid = 0;
1196
1197		pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
1198		if (!pc->attr)
1199			goto out_err;
1200
1201		if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1202			iv = alloc_temp(pc, NULL);
1203			aid++;
1204		}
1205
1206		for (i = 0; i < pc->attr_nr; i++) {
1207			struct nv50_reg *a = &pc->attr[i*4];
1208
1209			for (c = 0; c < 4; c++) {
1210				if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1211					struct nv50_reg *at =
1212						alloc_temp(pc, NULL);
1213					pc->attr[i*4+c].type = at->type;
1214					pc->attr[i*4+c].hw = at->hw;
1215					pc->attr[i*4+c].index = at->index;
1216				} else {
1217					pc->p->cfg.vp.attr[aid/32] |=
1218						(1 << (aid % 32));
1219					pc->attr[i*4+c].type = P_ATTR;
1220					pc->attr[i*4+c].hw = aid++;
1221					pc->attr[i*4+c].index = i;
1222				}
1223			}
1224
1225			if (pc->p->type != PIPE_SHADER_FRAGMENT)
1226				continue;
1227
1228			emit_interp(pc, iv, iv, iv, FALSE);
1229			tmp = alloc_temp(pc, NULL);
1230			emit_flop(pc, 0, tmp, iv);
1231			emit_interp(pc, &a[0], &a[0], tmp, TRUE);
1232			emit_interp(pc, &a[1], &a[1], tmp, TRUE);
1233			emit_interp(pc, &a[2], &a[2], tmp, TRUE);
1234			emit_interp(pc, &a[3], &a[3], tmp, TRUE);
1235			free_temp(pc, tmp);
1236		}
1237
1238		if (iv)
1239			free_temp(pc, iv);
1240	}
1241
1242	NOUVEAU_ERR("%d result regs\n", pc->result_nr);
1243	if (pc->result_nr) {
1244		int rid = 0;
1245
1246		pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
1247		if (!pc->result)
1248			goto out_err;
1249
1250		for (i = 0; i < pc->result_nr; i++) {
1251			for (c = 0; c < 4; c++) {
1252				if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1253					pc->result[i*4+c].type = P_TEMP;
1254					pc->result[i*4+c].hw = -1;
1255				} else {
1256					pc->result[i*4+c].type = P_RESULT;
1257					pc->result[i*4+c].hw = rid++;
1258				}
1259				pc->result[i*4+c].index = i;
1260			}
1261		}
1262	}
1263
1264	NOUVEAU_ERR("%d param regs\n", pc->param_nr);
1265	if (pc->param_nr) {
1266		int rid = 0;
1267
1268		pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
1269		if (!pc->param)
1270			goto out_err;
1271
1272		for (i = 0; i < pc->param_nr; i++) {
1273			for (c = 0; c < 4; c++) {
1274				pc->param[i*4+c].type = P_CONST;
1275				pc->param[i*4+c].hw = rid++;
1276				pc->param[i*4+c].index = i;
1277			}
1278		}
1279	}
1280
1281	if (pc->immd_nr) {
1282		int rid = 0;
1283
1284		pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1285		if (!pc->immd)
1286			goto out_err;
1287
1288		for (i = 0; i < pc->immd_nr; i++) {
1289			for (c = 0; c < 4; c++) {
1290				pc->immd[i*4+c].type = P_IMMD;
1291				pc->immd[i*4+c].hw = rid++;
1292				pc->immd[i*4+c].index = i;
1293			}
1294		}
1295	}
1296
1297	ret = TRUE;
1298out_err:
1299	tgsi_parse_free(&p);
1300	return ret;
1301}
1302
1303static boolean
1304nv50_program_tx(struct nv50_program *p)
1305{
1306	struct tgsi_parse_context parse;
1307	struct nv50_pc *pc;
1308	boolean ret;
1309
1310	pc = CALLOC_STRUCT(nv50_pc);
1311	if (!pc)
1312		return FALSE;
1313	pc->p = p;
1314	pc->p->cfg.high_temp = 4;
1315
1316	ret = nv50_program_tx_prep(pc);
1317	if (ret == FALSE)
1318		goto out_cleanup;
1319
1320	tgsi_parse_init(&parse, pc->p->pipe.tokens);
1321	while (!tgsi_parse_end_of_tokens(&parse)) {
1322		const union tgsi_full_token *tok = &parse.FullToken;
1323
1324		tgsi_parse_token(&parse);
1325
1326		switch (tok->Token.Type) {
1327		case TGSI_TOKEN_TYPE_INSTRUCTION:
1328			ret = nv50_program_tx_insn(pc, tok);
1329			if (ret == FALSE)
1330				goto out_err;
1331			break;
1332		default:
1333			break;
1334		}
1335	}
1336
1337	if (p->type == PIPE_SHADER_FRAGMENT) {
1338		struct nv50_reg out;
1339
1340		out.type = P_TEMP;
1341		for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1342			emit_mov(pc, &out, &pc->result[out.hw]);
1343	}
1344
1345	p->immd_nr = pc->immd_nr * 4;
1346	p->immd = pc->immd_buf;
1347
1348out_err:
1349	tgsi_parse_free(&parse);
1350
1351out_cleanup:
1352	return ret;
1353}
1354
1355static void
1356nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1357{
1358	int i;
1359
1360	if (nv50_program_tx(p) == FALSE)
1361		assert(0);
1362	/* *not* sufficient, it's fine if last inst is long and
1363	 * NOT immd - otherwise it's fucked fucked fucked */
1364	p->insns[p->insns_nr - 1] |= 0x00000001;
1365
1366	if (p->type == PIPE_SHADER_VERTEX) {
1367	for (i = 0; i < p->insns_nr; i++)
1368		NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
1369	} else {
1370	for (i = 0; i < p->insns_nr; i++)
1371		NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
1372	}
1373
1374	p->translated = TRUE;
1375}
1376
1377static void
1378nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1379{
1380	int i;
1381
1382	for (i = 0; i < p->immd_nr; i++) {
1383		BEGIN_RING(tesla, 0x0f00, 2);
1384		OUT_RING  ((NV50_CB_PMISC << 0) | (i << 8));
1385		OUT_RING  (fui(p->immd[i]));
1386	}
1387}
1388
1389static void
1390nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1391{
1392	struct pipe_winsys *ws = nv50->pipe.winsys;
1393	void *map;
1394
1395	if (!p->buffer)
1396		p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
1397	map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
1398	memcpy(map, p->insns, p->insns_nr * 4);
1399	ws->buffer_unmap(ws, p->buffer);
1400}
1401
1402void
1403nv50_vertprog_validate(struct nv50_context *nv50)
1404{
1405	struct nouveau_grobj *tesla = nv50->screen->tesla;
1406	struct nv50_program *p = nv50->vertprog;
1407	struct nouveau_stateobj *so;
1408
1409	if (!p->translated) {
1410		nv50_program_validate(nv50, p);
1411		if (!p->translated)
1412			assert(0);
1413	}
1414
1415	nv50_program_validate_data(nv50, p);
1416	nv50_program_validate_code(nv50, p);
1417
1418	so = so_new(11, 2);
1419	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1420	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1421		  NOUVEAU_BO_HIGH, 0, 0);
1422	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1423		  NOUVEAU_BO_LOW, 0, 0);
1424	so_method(so, tesla, 0x1650, 2);
1425	so_data  (so, p->cfg.vp.attr[0]);
1426	so_data  (so, p->cfg.vp.attr[1]);
1427	so_method(so, tesla, 0x16ac, 2);
1428	so_data  (so, 8);
1429	so_data  (so, p->cfg.high_temp);
1430	so_method(so, tesla, 0x140c, 1);
1431	so_data  (so, 0); /* program start offset */
1432	so_emit(nv50->screen->nvws, so);
1433	so_ref(NULL, &so);
1434}
1435
1436void
1437nv50_fragprog_validate(struct nv50_context *nv50)
1438{
1439	struct nouveau_grobj *tesla = nv50->screen->tesla;
1440	struct nv50_program *p = nv50->fragprog;
1441	struct nouveau_stateobj *so;
1442
1443	if (!p->translated) {
1444		nv50_program_validate(nv50, p);
1445		if (!p->translated)
1446			assert(0);
1447	}
1448
1449	nv50_program_validate_data(nv50, p);
1450	nv50_program_validate_code(nv50, p);
1451
1452	so = so_new(7, 2);
1453	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1454	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1455		  NOUVEAU_BO_HIGH, 0, 0);
1456	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1457		  NOUVEAU_BO_LOW, 0, 0);
1458	so_method(so, tesla, 0x198c, 1);
1459	so_data  (so, p->cfg.high_temp);
1460	so_method(so, tesla, 0x1414, 1);
1461	so_data  (so, 0); /* program start offset */
1462	so_emit(nv50->screen->nvws, so);
1463	so_ref(NULL, &so);
1464}
1465
1466void
1467nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1468{
1469	struct pipe_winsys *ws = nv50->pipe.winsys;
1470
1471	if (p->insns_nr) {
1472		if (p->insns)
1473			FREE(p->insns);
1474		p->insns_nr = 0;
1475	}
1476
1477	if (p->buffer)
1478		pipe_buffer_reference(ws, &p->buffer, NULL);
1479
1480	p->translated = 0;
1481}
1482
1483