nv50_program.c revision b5bbf09c42a9d563984fad875ced5c4814033a3d
1#include "pipe/p_context.h"
2#include "pipe/p_defines.h"
3#include "pipe/p_state.h"
4#include "pipe/p_inlines.h"
5
6#include "pipe/p_shader_tokens.h"
7#include "tgsi/util/tgsi_parse.h"
8#include "tgsi/util/tgsi_util.h"
9
10#include "nv50_context.h"
11#include "nv50_state.h"
12
13#define NV50_SU_MAX_TEMP 64
14
15/* ARL - gallium craps itself on progs/vp/arl.txt
16 *
17 * MSB - Like MAD, but MUL+SUB
18 * 	- Fuck it off, introduce a way to negate args for ops that
19 * 	  support it.
20 *
21 * Look into inlining IMMD for ops other than MOV (make it general?)
22 * 	- Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
23 * 	  but can emit to P_TEMP first - then MOV later. NVIDIA does this
24 *
25 * Verify half-insns work where expected - and force disable them where they
26 * don't work - MUL has it forcibly disabled atm as it fixes POW..
27 *
28 * FUCK! watch dst==src vectors, can overwrite components that are needed.
29 * 	ie. SUB R0, R0.yzxw, R0
30 *
31 * Things to check with renouveau:
32 * 	SGE/SLT with needed src0/1 swap
33 * 	FP attr/result assignment - how?
34 * 	FP/VP constbuf usage
35 */
36struct nv50_reg {
37	enum {
38		P_TEMP,
39		P_ATTR,
40		P_RESULT,
41		P_CONST,
42		P_IMMD
43	} type;
44	int index;
45
46	int hw;
47	int neg;
48};
49
50struct nv50_pc {
51	struct nv50_program *p;
52
53	/* hw resources */
54	struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
55
56	/* tgsi resources */
57	struct nv50_reg *temp;
58	int temp_nr;
59	struct nv50_reg *attr;
60	int attr_nr;
61	struct nv50_reg *result;
62	int result_nr;
63	struct nv50_reg *param;
64	int param_nr;
65	struct nv50_reg *immd;
66	float *immd_buf;
67	int immd_nr;
68
69	struct nv50_reg *temp_temp[8];
70	unsigned temp_temp_nr;
71};
72
73static void
74alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
75{
76	int i;
77
78	if (reg->type != P_TEMP)
79		return;
80
81	if (reg->hw >= 0) {
82		/*XXX: do this here too to catch FP temp-as-attr usage..
83		 *     not clean, but works */
84		if (pc->p->cfg.high_temp < (reg->hw + 1))
85			pc->p->cfg.high_temp = reg->hw + 1;
86		return;
87	}
88
89	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
90		if (!(pc->r_temp[i])) {
91			pc->r_temp[i] = reg;
92			reg->hw = i;
93			if (pc->p->cfg.high_temp < (i + 1))
94				pc->p->cfg.high_temp = i + 1;
95			return;
96		}
97	}
98
99	assert(0);
100}
101
102static struct nv50_reg *
103alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
104{
105	struct nv50_reg *r;
106	int i;
107
108	if (dst && dst->type == P_TEMP && dst->hw == -1)
109		return dst;
110
111	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
112		if (!pc->r_temp[i]) {
113			r = CALLOC_STRUCT(nv50_reg);
114			r->type = P_TEMP;
115			r->index = -1;
116			r->hw = i;
117			pc->r_temp[i] = r;
118			return r;
119		}
120	}
121
122	assert(0);
123	return NULL;
124}
125
126static void
127free_temp(struct nv50_pc *pc, struct nv50_reg *r)
128{
129	if (r->index == -1) {
130		FREE(pc->r_temp[r->hw]);
131		pc->r_temp[r->hw] = NULL;
132	}
133}
134
135static struct nv50_reg *
136temp_temp(struct nv50_pc *pc)
137{
138	if (pc->temp_temp_nr >= 8)
139		assert(0);
140
141	pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
142	return pc->temp_temp[pc->temp_temp_nr++];
143}
144
145static void
146kill_temp_temp(struct nv50_pc *pc)
147{
148	int i;
149
150	for (i = 0; i < pc->temp_temp_nr; i++)
151		free_temp(pc, pc->temp_temp[i]);
152	pc->temp_temp_nr = 0;
153}
154
155static int
156ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
157{
158	pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 *
159					     sizeof(float));
160	pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
161	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
162	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
163	pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
164
165	return pc->immd_nr++;
166}
167
168static struct nv50_reg *
169alloc_immd(struct nv50_pc *pc, float f)
170{
171	struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
172	unsigned hw;
173
174	hw = ctor_immd(pc, f, 0, 0, 0) * 4;
175	r->type = P_IMMD;
176	r->hw = hw;
177	r->index = -1;
178	return r;
179}
180
181static void
182emit(struct nv50_pc *pc, unsigned *inst)
183{
184	struct nv50_program *p = pc->p;
185
186	if (inst[0] & 1) {
187		p->insns_nr += 2;
188		p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
189		memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
190	} else {
191		p->insns_nr += 1;
192		p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
193		memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
194	}
195}
196
197static INLINE void set_long(struct nv50_pc *, unsigned *);
198
199static boolean
200is_long(unsigned *inst)
201{
202	if (inst[0] & 1)
203		return TRUE;
204	return FALSE;
205}
206
207static boolean
208is_immd(unsigned *inst)
209{
210	if (is_long(inst) && (inst[1] & 3) == 3)
211		return TRUE;
212	return FALSE;
213}
214
215static INLINE void
216set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
217{
218	set_long(pc, inst);
219	inst[1] &= ~((0x1f << 7) | (0x3 << 12));
220	inst[1] |= (pred << 7) | (idx << 12);
221}
222
223static INLINE void
224set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
225{
226	set_long(pc, inst);
227	inst[1] &= ~((0x3 << 4) | (1 << 6));
228	inst[1] |= (idx << 4) | (on << 6);
229}
230
231static INLINE void
232set_long(struct nv50_pc *pc, unsigned *inst)
233{
234	if (is_long(inst))
235		return;
236
237	inst[0] |= 1;
238	set_pred(pc, 0xf, 0, inst);
239	set_pred_wr(pc, 0, 0, inst);
240}
241
242static INLINE void
243set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
244{
245	if (dst->type == P_RESULT) {
246		set_long(pc, inst);
247		inst[1] |= 0x00000008;
248	}
249
250	alloc_reg(pc, dst);
251	inst[0] |= (dst->hw << 2);
252}
253
254static INLINE void
255set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
256{
257	unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
258
259	set_long(pc, inst);
260	/*XXX: can't be predicated - bits overlap.. catch cases where both
261	 *     are required and avoid them. */
262	set_pred(pc, 0, 0, inst);
263	set_pred_wr(pc, 0, 0, inst);
264
265	inst[1] |= 0x00000002 | 0x00000001;
266	inst[0] |= (val & 0x3f) << 16;
267	inst[1] |= (val >> 6) << 2;
268}
269
270static void
271emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
272	    struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
273{
274	unsigned inst[2] = { 0, 0 };
275
276	inst[0] |= 0x80000000;
277	set_dst(pc, dst, inst);
278	alloc_reg(pc, iv);
279	inst[0] |= (iv->hw << 9);
280	alloc_reg(pc, src);
281	inst[0] |= (src->hw << 16);
282	if (noperspective)
283		inst[0] |= (1 << 25);
284
285	emit(pc, inst);
286}
287
288static void
289set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
290{
291	set_long(pc, inst);
292	if (src->type == P_IMMD) {
293		inst[1] |= (NV50_CB_PMISC << 22);
294	} else {
295		if (pc->p->type == PIPE_SHADER_VERTEX)
296			inst[1] |= (NV50_CB_PVP << 22);
297		else
298			inst[1] |= (NV50_CB_PFP << 22);
299	}
300}
301
302static void
303emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
304{
305	unsigned inst[2] = { 0, 0 };
306
307	inst[0] |= 0x10000000;
308
309	set_dst(pc, dst, inst);
310
311	if (dst->type != P_RESULT && src->type == P_IMMD) {
312		set_immd(pc, src, inst);
313		/*XXX: 32-bit, but steals part of "half" reg space - need to
314		 *     catch and handle this case if/when we do half-regs
315		 */
316		inst[0] |= 0x00008000;
317	} else
318	if (src->type == P_IMMD || src->type == P_CONST) {
319		set_long(pc, inst);
320		set_cseg(pc, src, inst);
321		inst[0] |= (src->hw << 9);
322		inst[1] |= 0x20000000; /* src0 const? */
323	} else {
324		if (src->type == P_ATTR) {
325			set_long(pc, inst);
326			inst[1] |= 0x00200000;
327		}
328
329		alloc_reg(pc, src);
330		inst[0] |= (src->hw << 9);
331	}
332
333	/* We really should support "half" instructions here at some point,
334	 * but I don't feel confident enough about them yet.
335	 */
336	set_long(pc, inst);
337	if (is_long(inst) && !is_immd(inst)) {
338		inst[1] |= 0x04000000; /* 32-bit */
339		inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
340	}
341
342	emit(pc, inst);
343}
344
345static boolean
346check_swap_src_0_1(struct nv50_pc *pc,
347		   struct nv50_reg **s0, struct nv50_reg **s1)
348{
349	struct nv50_reg *src0 = *s0, *src1 = *s1;
350
351	if (src0->type == P_CONST) {
352		if (src1->type != P_CONST) {
353			*s0 = src1;
354			*s1 = src0;
355			return TRUE;
356		}
357	} else
358	if (src1->type == P_ATTR) {
359		if (src0->type != P_ATTR) {
360			*s0 = src1;
361			*s1 = src0;
362			return TRUE;
363		}
364	}
365
366	return FALSE;
367}
368
369static void
370set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
371{
372	if (src->type == P_ATTR) {
373		set_long(pc, inst);
374		inst[1] |= 0x00200000;
375	} else
376	if (src->type == P_CONST || src->type == P_IMMD) {
377		struct nv50_reg *temp = temp_temp(pc);
378
379		emit_mov(pc, temp, src);
380		src = temp;
381	}
382
383	alloc_reg(pc, src);
384	inst[0] |= (src->hw << 9);
385}
386
387static void
388set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
389{
390	if (src->type == P_ATTR) {
391		struct nv50_reg *temp = temp_temp(pc);
392
393		emit_mov(pc, temp, src);
394		src = temp;
395	} else
396	if (src->type == P_CONST || src->type == P_IMMD) {
397		assert(!(inst[0] & 0x00800000));
398		if (inst[0] & 0x01000000) {
399			struct nv50_reg *temp = temp_temp(pc);
400
401			emit_mov(pc, temp, src);
402			src = temp;
403		} else {
404			set_cseg(pc, src, inst);
405			inst[0] |= 0x00800000;
406		}
407	}
408
409	alloc_reg(pc, src);
410	inst[0] |= (src->hw << 16);
411}
412
413static void
414set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
415{
416	set_long(pc, inst);
417
418	if (src->type == P_ATTR) {
419		struct nv50_reg *temp = temp_temp(pc);
420
421		emit_mov(pc, temp, src);
422		src = temp;
423	} else
424	if (src->type == P_CONST || src->type == P_IMMD) {
425		assert(!(inst[0] & 0x01000000));
426		if (inst[0] & 0x00800000) {
427			struct nv50_reg *temp = temp_temp(pc);
428
429			emit_mov(pc, temp, src);
430			src = temp;
431		} else {
432			set_cseg(pc, src, inst);
433			inst[0] |= 0x01000000;
434		}
435	}
436
437	alloc_reg(pc, src);
438	inst[1] |= (src->hw << 14);
439}
440
441static void
442emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
443	 struct nv50_reg *src1)
444{
445	unsigned inst[2] = { 0, 0 };
446
447	inst[0] |= 0xc0000000;
448	set_long(pc, inst);
449
450	check_swap_src_0_1(pc, &src0, &src1);
451	set_dst(pc, dst, inst);
452	set_src_0(pc, src0, inst);
453	set_src_1(pc, src1, inst);
454
455	emit(pc, inst);
456}
457
458static void
459emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
460	 struct nv50_reg *src0, struct nv50_reg *src1)
461{
462	unsigned inst[2] = { 0, 0 };
463
464	inst[0] |= 0xb0000000;
465
466	check_swap_src_0_1(pc, &src0, &src1);
467	set_dst(pc, dst, inst);
468	set_src_0(pc, src0, inst);
469	if (is_long(inst))
470		set_src_2(pc, src1, inst);
471	else
472		set_src_1(pc, src1, inst);
473
474	emit(pc, inst);
475}
476
477static void
478emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
479	    struct nv50_reg *src0, struct nv50_reg *src1)
480{
481	unsigned inst[2] = { 0, 0 };
482
483	set_long(pc, inst);
484	inst[0] |= 0xb0000000;
485	inst[1] |= (sub << 29);
486
487	check_swap_src_0_1(pc, &src0, &src1);
488	set_dst(pc, dst, inst);
489	set_src_0(pc, src0, inst);
490	set_src_1(pc, src1, inst);
491
492	emit(pc, inst);
493}
494
495static void
496emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
497	 struct nv50_reg *src1)
498{
499	unsigned inst[2] = { 0, 0 };
500
501	inst[0] |= 0xb0000000;
502
503	set_long(pc, inst);
504	if (check_swap_src_0_1(pc, &src0, &src1))
505		inst[1] |= 0x04000000;
506	else
507		inst[1] |= 0x08000000;
508
509	set_dst(pc, dst, inst);
510	set_src_0(pc, src0, inst);
511	set_src_2(pc, src1, inst);
512
513	emit(pc, inst);
514}
515
516static void
517emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
518	 struct nv50_reg *src1, struct nv50_reg *src2)
519{
520	unsigned inst[2] = { 0, 0 };
521
522	inst[0] |= 0xe0000000;
523
524	check_swap_src_0_1(pc, &src0, &src1);
525	set_dst(pc, dst, inst);
526	set_src_0(pc, src0, inst);
527	set_src_1(pc, src1, inst);
528	set_src_2(pc, src2, inst);
529
530	emit(pc, inst);
531}
532
533static void
534emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
535	 struct nv50_reg *src1, struct nv50_reg *src2)
536{
537	unsigned inst[2] = { 0, 0 };
538
539	inst[0] |= 0xe0000000;
540	set_long(pc, inst);
541	inst[1] |= 0x08000000; /* src0 * src1 - src2 */
542
543	check_swap_src_0_1(pc, &src0, &src1);
544	set_dst(pc, dst, inst);
545	set_src_0(pc, src0, inst);
546	set_src_1(pc, src1, inst);
547	set_src_2(pc, src2, inst);
548
549	emit(pc, inst);
550}
551
552static void
553emit_flop(struct nv50_pc *pc, unsigned sub,
554	  struct nv50_reg *dst, struct nv50_reg *src)
555{
556	unsigned inst[2] = { 0, 0 };
557
558	inst[0] |= 0x90000000;
559	if (sub) {
560		set_long(pc, inst);
561		inst[1] |= (sub << 29);
562	}
563
564	set_dst(pc, dst, inst);
565	set_src_0(pc, src, inst);
566
567	emit(pc, inst);
568}
569
570static void
571emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
572{
573	unsigned inst[2] = { 0, 0 };
574
575	inst[0] |= 0xb0000000;
576
577	set_dst(pc, dst, inst);
578	set_src_0(pc, src, inst);
579	set_long(pc, inst);
580	inst[1] |= (6 << 29) | 0x00004000;
581
582	emit(pc, inst);
583}
584
585/*XXX: inaccurate results.. why? */
586#define ALLOW_SET_SWAP 0
587
588static void
589emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
590	 struct nv50_reg *src0, struct nv50_reg *src1)
591{
592	unsigned inst[2] = { 0, 0 };
593#if ALLOW_SET_SWAP
594	unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 };
595#endif
596	struct nv50_reg *rdst;
597
598#if ALLOW_SET_SWAP
599	assert(c_op <= 7);
600	if (check_swap_src_0_1(pc, &src0, &src1))
601		c_op = inv_cop[c_op];
602#endif
603
604	rdst = dst;
605	if (dst->type != P_TEMP)
606		dst = alloc_temp(pc, NULL);
607
608	/* set.u32 */
609	set_long(pc, inst);
610	inst[0] |= 0xb0000000;
611	inst[1] |= (3 << 29);
612	inst[1] |= (c_op << 14);
613	/*XXX: breaks things, .u32 by default?
614	 *     decuda will disasm as .u16 and use .lo/.hi regs, but this
615	 *     doesn't seem to match what the hw actually does.
616	inst[1] |= 0x04000000; << breaks things.. .u32 by default?
617	 */
618	set_dst(pc, dst, inst);
619	set_src_0(pc, src0, inst);
620	set_src_1(pc, src1, inst);
621	emit(pc, inst);
622
623	/* cvt.f32.u32 */
624	inst[0] = 0xa0000001;
625	inst[1] = 0x64014780;
626	set_dst(pc, rdst, inst);
627	set_src_0(pc, dst, inst);
628	emit(pc, inst);
629
630	if (dst != rdst)
631		free_temp(pc, dst);
632}
633
634static void
635emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
636{
637	unsigned inst[2] = { 0, 0 };
638
639	inst[0] = 0xa0000000; /* cvt */
640	set_long(pc, inst);
641	inst[1] |= (6 << 29); /* cvt */
642	inst[1] |= 0x08000000; /* integer mode */
643	inst[1] |= 0x04000000; /* 32 bit */
644	inst[1] |= ((0x1 << 3)) << 14; /* .rn */
645	inst[1] |= (1 << 14); /* src .f32 */
646	set_dst(pc, dst, inst);
647	set_src_0(pc, src, inst);
648
649	emit(pc, inst);
650}
651
652static void
653emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
654	 struct nv50_reg *v, struct nv50_reg *e)
655{
656	struct nv50_reg *temp = alloc_temp(pc, NULL);
657
658	emit_flop(pc, 3, temp, v);
659	emit_mul(pc, temp, temp, e);
660	emit_preex2(pc, temp, temp);
661	emit_flop(pc, 6, dst, temp);
662
663	free_temp(pc, temp);
664}
665
666static void
667emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
668{
669	unsigned inst[2] = { 0, 0 };
670
671	inst[0] = 0xa0000000; /* cvt */
672	set_long(pc, inst);
673	inst[1] |= (6 << 29); /* cvt */
674	inst[1] |= 0x04000000; /* 32 bit */
675	inst[1] |= (1 << 14); /* src .f32 */
676	inst[1] |= ((1 << 6) << 14); /* .abs */
677	set_dst(pc, dst, inst);
678	set_src_0(pc, src, inst);
679
680	emit(pc, inst);
681}
682
683static void
684emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src)
685{
686	struct nv50_reg *one = alloc_immd(pc, 1.0);
687	struct nv50_reg *zero = alloc_immd(pc, 0.0);
688	struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
689	struct nv50_reg *pos128 = alloc_immd(pc,  127.999999);
690	struct nv50_reg *tmp[4];
691
692	emit_mov(pc, dst[0], one);
693	emit_mov(pc, dst[3], one);
694
695	tmp[0] = temp_temp(pc);
696	emit_minmax(pc, 4, dst[1], src[0], zero);
697	set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]);
698
699	tmp[1] = temp_temp(pc);
700	emit_minmax(pc, 4, tmp[1], src[1], zero);
701
702	tmp[3] = temp_temp(pc);
703	emit_minmax(pc, 4, tmp[3], src[3], neg128);
704	emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
705
706	emit_pow(pc, dst[2], tmp[1], tmp[3]);
707	emit_mov(pc, dst[2], zero);
708	set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]);
709}
710
711static struct nv50_reg *
712tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
713{
714	switch (dst->DstRegister.File) {
715	case TGSI_FILE_TEMPORARY:
716		return &pc->temp[dst->DstRegister.Index * 4 + c];
717	case TGSI_FILE_OUTPUT:
718		return &pc->result[dst->DstRegister.Index * 4 + c];
719	case TGSI_FILE_NULL:
720		return NULL;
721	default:
722		break;
723	}
724
725	return NULL;
726}
727
728static struct nv50_reg *
729tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
730{
731	struct nv50_reg *r = NULL;
732	struct nv50_reg *temp;
733	unsigned c;
734
735	c = tgsi_util_get_full_src_register_extswizzle(src, chan);
736	switch (c) {
737	case TGSI_EXTSWIZZLE_X:
738	case TGSI_EXTSWIZZLE_Y:
739	case TGSI_EXTSWIZZLE_Z:
740	case TGSI_EXTSWIZZLE_W:
741		switch (src->SrcRegister.File) {
742		case TGSI_FILE_INPUT:
743			r = &pc->attr[src->SrcRegister.Index * 4 + c];
744			break;
745		case TGSI_FILE_TEMPORARY:
746			r = &pc->temp[src->SrcRegister.Index * 4 + c];
747			break;
748		case TGSI_FILE_CONSTANT:
749			r = &pc->param[src->SrcRegister.Index * 4 + c];
750			break;
751		case TGSI_FILE_IMMEDIATE:
752			r = &pc->immd[src->SrcRegister.Index * 4 + c];
753			break;
754		default:
755			assert(0);
756			break;
757		}
758		break;
759	case TGSI_EXTSWIZZLE_ZERO:
760		r = alloc_immd(pc, 0.0);
761		break;
762	case TGSI_EXTSWIZZLE_ONE:
763		r = alloc_immd(pc, 1.0);
764		break;
765	default:
766		assert(0);
767		break;
768	}
769
770	switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
771	case TGSI_UTIL_SIGN_KEEP:
772		break;
773	case TGSI_UTIL_SIGN_CLEAR:
774		temp = temp_temp(pc);
775		emit_abs(pc, temp, r);
776		r = temp;
777		break;
778	default:
779		assert(0);
780		break;
781	}
782
783	return r;
784}
785
786static boolean
787nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
788{
789	const struct tgsi_full_instruction *inst = &tok->FullInstruction;
790	struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
791	unsigned mask, sat;
792	int i, c;
793
794	NOUVEAU_ERR("insn %p\n", tok);
795
796	mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
797	sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
798
799	for (c = 0; c < 4; c++) {
800		if (mask & (1 << c))
801			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
802		else
803			dst[c] = NULL;
804	}
805
806	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
807		for (c = 0; c < 4; c++)
808			src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
809	}
810
811	if (sat) {
812		for (c = 0; c < 4; c++) {
813			rdst[c] = dst[c];
814			dst[c] = temp_temp(pc);
815		}
816	}
817
818	switch (inst->Instruction.Opcode) {
819	case TGSI_OPCODE_ABS:
820		for (c = 0; c < 4; c++) {
821			if (!(mask & (1 << c)))
822				continue;
823			emit_abs(pc, dst[c], src[0][c]);
824		}
825		break;
826	case TGSI_OPCODE_ADD:
827		for (c = 0; c < 4; c++) {
828			if (!(mask & (1 << c)))
829				continue;
830			emit_add(pc, dst[c], src[0][c], src[1][c]);
831		}
832		break;
833	case TGSI_OPCODE_COS:
834		for (c = 0; c < 4; c++) {
835			if (!(mask & (1 << c)))
836				continue;
837			emit_flop(pc, 5, dst[c], src[0][c]);
838		}
839		break;
840	case TGSI_OPCODE_DP3:
841		temp = alloc_temp(pc, NULL);
842		emit_mul(pc, temp, src[0][0], src[1][0]);
843		emit_mad(pc, temp, src[0][1], src[1][1], temp);
844		emit_mad(pc, temp, src[0][2], src[1][2], temp);
845		for (c = 0; c < 4; c++) {
846			if (!(mask & (1 << c)))
847				continue;
848			emit_mov(pc, dst[c], temp);
849		}
850		free_temp(pc, temp);
851		break;
852	case TGSI_OPCODE_DP4:
853		temp = alloc_temp(pc, NULL);
854		emit_mul(pc, temp, src[0][0], src[1][0]);
855		emit_mad(pc, temp, src[0][1], src[1][1], temp);
856		emit_mad(pc, temp, src[0][2], src[1][2], temp);
857		emit_mad(pc, temp, src[0][3], src[1][3], temp);
858		for (c = 0; c < 4; c++) {
859			if (!(mask & (1 << c)))
860				continue;
861			emit_mov(pc, dst[c], temp);
862		}
863		free_temp(pc, temp);
864		break;
865	case TGSI_OPCODE_DPH:
866		temp = alloc_temp(pc, NULL);
867		emit_mul(pc, temp, src[0][0], src[1][0]);
868		emit_mad(pc, temp, src[0][1], src[1][1], temp);
869		emit_mad(pc, temp, src[0][2], src[1][2], temp);
870		emit_add(pc, temp, src[1][3], temp);
871		for (c = 0; c < 4; c++) {
872			if (!(mask & (1 << c)))
873				continue;
874			emit_mov(pc, dst[c], temp);
875		}
876		free_temp(pc, temp);
877		break;
878	case TGSI_OPCODE_DST:
879	{
880		struct nv50_reg *one = alloc_immd(pc, 1.0);
881		if (mask & (1 << 0))
882			emit_mov(pc, dst[0], one);
883		if (mask & (1 << 1))
884			emit_mul(pc, dst[1], src[0][1], src[1][1]);
885		if (mask & (1 << 2))
886			emit_mov(pc, dst[2], src[0][2]);
887		if (mask & (1 << 3))
888			emit_mov(pc, dst[3], src[1][3]);
889		FREE(one);
890	}
891		break;
892	case TGSI_OPCODE_EX2:
893		temp = alloc_temp(pc, NULL);
894		for (c = 0; c < 4; c++) {
895			if (!(mask & (1 << c)))
896				continue;
897			emit_preex2(pc, temp, src[0][c]);
898			emit_flop(pc, 6, dst[c], temp);
899		}
900		free_temp(pc, temp);
901		break;
902	case TGSI_OPCODE_FLR:
903		for (c = 0; c < 4; c++) {
904			if (!(mask & (1 << c)))
905				continue;
906			emit_flr(pc, dst[c], src[0][c]);
907		}
908		break;
909	case TGSI_OPCODE_FRC:
910		temp = alloc_temp(pc, NULL);
911		for (c = 0; c < 4; c++) {
912			if (!(mask & (1 << c)))
913				continue;
914			emit_flr(pc, temp, src[0][c]);
915			emit_sub(pc, dst[c], src[0][c], temp);
916		}
917		free_temp(pc, temp);
918		break;
919	case TGSI_OPCODE_LIT:
920		/*XXX: writemask */
921		emit_lit(pc, &dst[0], &src[0][0]);
922		break;
923	case TGSI_OPCODE_LG2:
924		for (c = 0; c < 4; c++) {
925			if (!(mask & (1 << c)))
926				continue;
927			emit_flop(pc, 3, dst[c], src[0][c]);
928		}
929		break;
930	case TGSI_OPCODE_MAD:
931		for (c = 0; c < 4; c++) {
932			if (!(mask & (1 << c)))
933				continue;
934			emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
935		}
936		break;
937	case TGSI_OPCODE_MAX:
938		for (c = 0; c < 4; c++) {
939			if (!(mask & (1 << c)))
940				continue;
941			emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
942		}
943		break;
944	case TGSI_OPCODE_MIN:
945		for (c = 0; c < 4; c++) {
946			if (!(mask & (1 << c)))
947				continue;
948			emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
949		}
950		break;
951	case TGSI_OPCODE_MOV:
952		for (c = 0; c < 4; c++) {
953			if (!(mask & (1 << c)))
954				continue;
955			emit_mov(pc, dst[c], src[0][c]);
956		}
957		break;
958	case TGSI_OPCODE_MUL:
959		for (c = 0; c < 4; c++) {
960			if (!(mask & (1 << c)))
961				continue;
962			emit_mul(pc, dst[c], src[0][c], src[1][c]);
963		}
964		break;
965	case TGSI_OPCODE_POW:
966		temp = alloc_temp(pc, NULL);
967		emit_pow(pc, temp, src[0][0], src[1][0]);
968		for (c = 0; c < 4; c++) {
969			if (!(mask & (1 << c)))
970				continue;
971			emit_mov(pc, dst[c], temp);
972		}
973		free_temp(pc, temp);
974		break;
975	case TGSI_OPCODE_RCP:
976		for (c = 0; c < 4; c++) {
977			if (!(mask & (1 << c)))
978				continue;
979			emit_flop(pc, 0, dst[c], src[0][c]);
980		}
981		break;
982	case TGSI_OPCODE_RSQ:
983		for (c = 0; c < 4; c++) {
984			if (!(mask & (1 << c)))
985				continue;
986			emit_flop(pc, 2, dst[c], src[0][c]);
987		}
988		break;
989	case TGSI_OPCODE_SGE:
990		for (c = 0; c < 4; c++) {
991			if (!(mask & (1 << c)))
992				continue;
993			emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
994		}
995		break;
996	case TGSI_OPCODE_SIN:
997		for (c = 0; c < 4; c++) {
998			if (!(mask & (1 << c)))
999				continue;
1000			emit_flop(pc, 4, dst[c], src[0][c]);
1001		}
1002		break;
1003	case TGSI_OPCODE_SLT:
1004		for (c = 0; c < 4; c++) {
1005			if (!(mask & (1 << c)))
1006				continue;
1007			emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1008		}
1009		break;
1010	case TGSI_OPCODE_SUB:
1011		for (c = 0; c < 4; c++) {
1012			if (!(mask & (1 << c)))
1013				continue;
1014			emit_sub(pc, dst[c], src[0][c], src[1][c]);
1015		}
1016		break;
1017	case TGSI_OPCODE_XPD:
1018		temp = alloc_temp(pc, NULL);
1019		if (mask & (1 << 0)) {
1020			emit_mul(pc, temp, src[0][2], src[1][1]);
1021			emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1022		}
1023		if (mask & (1 << 1)) {
1024			emit_mul(pc, temp, src[0][0], src[1][2]);
1025			emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1026		}
1027		if (mask & (1 << 2)) {
1028			emit_mul(pc, temp, src[0][1], src[1][0]);
1029			emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1030		}
1031		free_temp(pc, temp);
1032		break;
1033	case TGSI_OPCODE_END:
1034		break;
1035	default:
1036		NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1037		return FALSE;
1038	}
1039
1040	if (sat) {
1041		for (c = 0; c < 4; c++) {
1042			unsigned inst[2] = { 0, 0 };
1043
1044			if (!(mask & (1 << c)))
1045				continue;
1046
1047			inst[0] = 0xa0000000; /* cvt */
1048			set_long(pc, inst);
1049			inst[1] |= (6 << 29); /* cvt */
1050			inst[1] |= 0x04000000; /* 32 bit */
1051			inst[1] |= (1 << 14); /* src .f32 */
1052			inst[1] |= ((1 << 5) << 14); /* .sat */
1053			set_dst(pc, rdst[c], inst);
1054			set_src_0(pc, dst[c], inst);
1055			emit(pc, inst);
1056		}
1057	}
1058
1059	kill_temp_temp(pc);
1060	return TRUE;
1061}
1062
1063static boolean
1064nv50_program_tx_prep(struct nv50_pc *pc)
1065{
1066	struct tgsi_parse_context p;
1067	boolean ret = FALSE;
1068	unsigned i, c;
1069
1070	tgsi_parse_init(&p, pc->p->pipe.tokens);
1071	while (!tgsi_parse_end_of_tokens(&p)) {
1072		const union tgsi_full_token *tok = &p.FullToken;
1073
1074		tgsi_parse_token(&p);
1075		switch (tok->Token.Type) {
1076		case TGSI_TOKEN_TYPE_IMMEDIATE:
1077		{
1078			const struct tgsi_full_immediate *imm =
1079				&p.FullToken.FullImmediate;
1080
1081			ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1082				      imm->u.ImmediateFloat32[1].Float,
1083				      imm->u.ImmediateFloat32[2].Float,
1084				      imm->u.ImmediateFloat32[3].Float);
1085		}
1086			break;
1087		case TGSI_TOKEN_TYPE_DECLARATION:
1088		{
1089			const struct tgsi_full_declaration *d;
1090			unsigned last;
1091
1092			d = &p.FullToken.FullDeclaration;
1093			last = d->u.DeclarationRange.Last;
1094
1095			switch (d->Declaration.File) {
1096			case TGSI_FILE_TEMPORARY:
1097				if (pc->temp_nr < (last + 1))
1098					pc->temp_nr = last + 1;
1099				break;
1100			case TGSI_FILE_OUTPUT:
1101				if (pc->result_nr < (last + 1))
1102					pc->result_nr = last + 1;
1103				break;
1104			case TGSI_FILE_INPUT:
1105				if (pc->attr_nr < (last + 1))
1106					pc->attr_nr = last + 1;
1107				break;
1108			case TGSI_FILE_CONSTANT:
1109				if (pc->param_nr < (last + 1))
1110					pc->param_nr = last + 1;
1111				break;
1112			default:
1113				NOUVEAU_ERR("bad decl file %d\n",
1114					    d->Declaration.File);
1115				goto out_err;
1116			}
1117		}
1118			break;
1119		case TGSI_TOKEN_TYPE_INSTRUCTION:
1120			break;
1121		default:
1122			break;
1123		}
1124	}
1125
1126	NOUVEAU_ERR("%d temps\n", pc->temp_nr);
1127	if (pc->temp_nr) {
1128		pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
1129		if (!pc->temp)
1130			goto out_err;
1131
1132		for (i = 0; i < pc->temp_nr; i++) {
1133			for (c = 0; c < 4; c++) {
1134				pc->temp[i*4+c].type = P_TEMP;
1135				pc->temp[i*4+c].hw = -1;
1136				pc->temp[i*4+c].index = i;
1137			}
1138		}
1139	}
1140
1141	NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
1142	if (pc->attr_nr) {
1143		struct nv50_reg *iv = NULL, *tmp = NULL;
1144		int aid = 0;
1145
1146		pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
1147		if (!pc->attr)
1148			goto out_err;
1149
1150		if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1151			iv = alloc_temp(pc, NULL);
1152			aid++;
1153		}
1154
1155		for (i = 0; i < pc->attr_nr; i++) {
1156			struct nv50_reg *a = &pc->attr[i*4];
1157
1158			for (c = 0; c < 4; c++) {
1159				if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1160					struct nv50_reg *at =
1161						alloc_temp(pc, NULL);
1162					pc->attr[i*4+c].type = at->type;
1163					pc->attr[i*4+c].hw = at->hw;
1164					pc->attr[i*4+c].index = at->index;
1165				} else {
1166					pc->p->cfg.vp.attr[aid/32] |=
1167						(1 << (aid % 32));
1168					pc->attr[i*4+c].type = P_ATTR;
1169					pc->attr[i*4+c].hw = aid++;
1170					pc->attr[i*4+c].index = i;
1171				}
1172			}
1173
1174			if (pc->p->type != PIPE_SHADER_FRAGMENT)
1175				continue;
1176
1177			emit_interp(pc, iv, iv, iv, FALSE);
1178			tmp = alloc_temp(pc, NULL);
1179			{
1180				unsigned inst[2] = { 0, 0 };
1181				inst[0]  = 0x90000000;
1182				inst[0] |= (tmp->hw << 2);
1183				emit(pc, inst);
1184			}
1185			emit_interp(pc, &a[0], &a[0], tmp, TRUE);
1186			emit_interp(pc, &a[1], &a[1], tmp, TRUE);
1187			emit_interp(pc, &a[2], &a[2], tmp, TRUE);
1188			emit_interp(pc, &a[3], &a[3], tmp, TRUE);
1189			free_temp(pc, tmp);
1190		}
1191
1192		if (iv)
1193			free_temp(pc, iv);
1194	}
1195
1196	NOUVEAU_ERR("%d result regs\n", pc->result_nr);
1197	if (pc->result_nr) {
1198		int rid = 0;
1199
1200		pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
1201		if (!pc->result)
1202			goto out_err;
1203
1204		for (i = 0; i < pc->result_nr; i++) {
1205			for (c = 0; c < 4; c++) {
1206				if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1207					pc->result[i*4+c].type = P_TEMP;
1208					pc->result[i*4+c].hw = -1;
1209				} else {
1210					pc->result[i*4+c].type = P_RESULT;
1211					pc->result[i*4+c].hw = rid++;
1212				}
1213				pc->result[i*4+c].index = i;
1214			}
1215		}
1216	}
1217
1218	NOUVEAU_ERR("%d param regs\n", pc->param_nr);
1219	if (pc->param_nr) {
1220		int rid = 0;
1221
1222		pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
1223		if (!pc->param)
1224			goto out_err;
1225
1226		for (i = 0; i < pc->param_nr; i++) {
1227			for (c = 0; c < 4; c++) {
1228				pc->param[i*4+c].type = P_CONST;
1229				pc->param[i*4+c].hw = rid++;
1230				pc->param[i*4+c].index = i;
1231			}
1232		}
1233	}
1234
1235	if (pc->immd_nr) {
1236		int rid = 0;
1237
1238		pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1239		if (!pc->immd)
1240			goto out_err;
1241
1242		for (i = 0; i < pc->immd_nr; i++) {
1243			for (c = 0; c < 4; c++) {
1244				pc->immd[i*4+c].type = P_IMMD;
1245				pc->immd[i*4+c].hw = rid++;
1246				pc->immd[i*4+c].index = i;
1247			}
1248		}
1249	}
1250
1251	ret = TRUE;
1252out_err:
1253	tgsi_parse_free(&p);
1254	return ret;
1255}
1256
1257static boolean
1258nv50_program_tx(struct nv50_program *p)
1259{
1260	struct tgsi_parse_context parse;
1261	struct nv50_pc *pc;
1262	boolean ret;
1263
1264	pc = CALLOC_STRUCT(nv50_pc);
1265	if (!pc)
1266		return FALSE;
1267	pc->p = p;
1268	pc->p->cfg.high_temp = 4;
1269
1270	ret = nv50_program_tx_prep(pc);
1271	if (ret == FALSE)
1272		goto out_cleanup;
1273
1274	tgsi_parse_init(&parse, pc->p->pipe.tokens);
1275	while (!tgsi_parse_end_of_tokens(&parse)) {
1276		const union tgsi_full_token *tok = &parse.FullToken;
1277
1278		tgsi_parse_token(&parse);
1279
1280		switch (tok->Token.Type) {
1281		case TGSI_TOKEN_TYPE_INSTRUCTION:
1282			ret = nv50_program_tx_insn(pc, tok);
1283			if (ret == FALSE)
1284				goto out_err;
1285			break;
1286		default:
1287			break;
1288		}
1289	}
1290
1291	if (p->type == PIPE_SHADER_FRAGMENT) {
1292		struct nv50_reg out;
1293
1294		out.type = P_TEMP;
1295		for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1296			emit_mov(pc, &out, &pc->result[out.hw]);
1297	}
1298
1299	p->immd_nr = pc->immd_nr * 4;
1300	p->immd = pc->immd_buf;
1301
1302out_err:
1303	tgsi_parse_free(&parse);
1304
1305out_cleanup:
1306	return ret;
1307}
1308
1309static void
1310nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1311{
1312	int i;
1313
1314	if (nv50_program_tx(p) == FALSE)
1315		assert(0);
1316	/* *not* sufficient, it's fine if last inst is long and
1317	 * NOT immd - otherwise it's fucked fucked fucked */
1318	p->insns[p->insns_nr - 1] |= 0x00000001;
1319
1320	if (p->type == PIPE_SHADER_VERTEX) {
1321	for (i = 0; i < p->insns_nr; i++)
1322		NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
1323	} else {
1324	for (i = 0; i < p->insns_nr; i++)
1325		NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
1326	}
1327
1328	p->translated = TRUE;
1329}
1330
1331static void
1332nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1333{
1334	int i;
1335
1336	for (i = 0; i < p->immd_nr; i++) {
1337		BEGIN_RING(tesla, 0x0f00, 2);
1338		OUT_RING  ((NV50_CB_PMISC << 16) | (i << 8));
1339		OUT_RING  (fui(p->immd[i]));
1340	}
1341}
1342
1343static void
1344nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1345{
1346	struct pipe_winsys *ws = nv50->pipe.winsys;
1347	void *map;
1348
1349	if (!p->buffer)
1350		p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
1351	map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
1352	memcpy(map, p->insns, p->insns_nr * 4);
1353	ws->buffer_unmap(ws, p->buffer);
1354}
1355
1356void
1357nv50_vertprog_validate(struct nv50_context *nv50)
1358{
1359	struct nouveau_grobj *tesla = nv50->screen->tesla;
1360	struct nv50_program *p = nv50->vertprog;
1361	struct nouveau_stateobj *so;
1362
1363	if (!p->translated) {
1364		nv50_program_validate(nv50, p);
1365		if (!p->translated)
1366			assert(0);
1367	}
1368
1369	nv50_program_validate_data(nv50, p);
1370	nv50_program_validate_code(nv50, p);
1371
1372	so = so_new(11, 2);
1373	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1374	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1375		  NOUVEAU_BO_HIGH, 0, 0);
1376	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1377		  NOUVEAU_BO_LOW, 0, 0);
1378	so_method(so, tesla, 0x1650, 2);
1379	so_data  (so, p->cfg.vp.attr[0]);
1380	so_data  (so, p->cfg.vp.attr[1]);
1381	so_method(so, tesla, 0x16ac, 2);
1382	so_data  (so, 8);
1383	so_data  (so, p->cfg.high_temp);
1384	so_method(so, tesla, 0x140c, 1);
1385	so_data  (so, 0); /* program start offset */
1386	so_emit(nv50->screen->nvws, so);
1387	so_ref(NULL, &so);
1388}
1389
1390void
1391nv50_fragprog_validate(struct nv50_context *nv50)
1392{
1393	struct nouveau_grobj *tesla = nv50->screen->tesla;
1394	struct nv50_program *p = nv50->fragprog;
1395	struct nouveau_stateobj *so;
1396
1397	if (!p->translated) {
1398		nv50_program_validate(nv50, p);
1399		if (!p->translated)
1400			assert(0);
1401	}
1402
1403	nv50_program_validate_data(nv50, p);
1404	nv50_program_validate_code(nv50, p);
1405
1406	so = so_new(7, 2);
1407	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1408	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1409		  NOUVEAU_BO_HIGH, 0, 0);
1410	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1411		  NOUVEAU_BO_LOW, 0, 0);
1412	so_method(so, tesla, 0x198c, 1);
1413	so_data  (so, p->cfg.high_temp);
1414	so_method(so, tesla, 0x1414, 1);
1415	so_data  (so, 0); /* program start offset */
1416	so_emit(nv50->screen->nvws, so);
1417	so_ref(NULL, &so);
1418}
1419
1420void
1421nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1422{
1423	struct pipe_winsys *ws = nv50->pipe.winsys;
1424
1425	if (p->insns_nr) {
1426		if (p->insns)
1427			FREE(p->insns);
1428		p->insns_nr = 0;
1429	}
1430
1431	if (p->buffer)
1432		pipe_buffer_reference(ws, &p->buffer, NULL);
1433
1434	p->translated = 0;
1435}
1436
1437