nv50_program.c revision 619549a6377a58d54c9cf55f8863beed56b09566
1#include "pipe/p_context.h"
2#include "pipe/p_defines.h"
3#include "pipe/p_state.h"
4#include "pipe/p_inlines.h"
5
6#include "pipe/p_shader_tokens.h"
7#include "tgsi/util/tgsi_parse.h"
8#include "tgsi/util/tgsi_util.h"
9
10#include "nv50_context.h"
11
12#define NV50_SU_MAX_TEMP 64
13#define NV50_PROGRAM_DUMP
14
15/* ARL - gallium craps itself on progs/vp/arl.txt
16 *
17 * MSB - Like MAD, but MUL+SUB
18 * 	- Fuck it off, introduce a way to negate args for ops that
19 * 	  support it.
20 *
21 * Look into inlining IMMD for ops other than MOV (make it general?)
22 * 	- Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
23 * 	  but can emit to P_TEMP first - then MOV later. NVIDIA does this
24 *
25 * Verify half-insns work where expected - and force disable them where they
26 * don't work - MUL has it forcibly disabled atm as it fixes POW..
27 *
28 * FUCK! watch dst==src vectors, can overwrite components that are needed.
29 * 	ie. SUB R0, R0.yzxw, R0
30 *
31 * MOV dst, -src
32 * 	"delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0)
33 * 	mov dst, tmp
34 *
35 * Things to check with renouveau:
36 * 	FP attr/result assignment - how?
37 * 		attrib
38 * 			- 0x16bc maps vp output onto fp hpos
39 * 			- 0x16c0 maps vp output onto fp col0
40 * 		result
41 * 			- colr always 0-3
42 * 			- depr always 4
43 * 0x16bc->0x16e8 --> some binding between vp/fp regs
44 * 0x16b8 --> VP output count
45 *
46 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
47 * 	      "MOV rcol.x, fcol.y" = 0x00000004
48 * 0x19a8 --> as above but 0x00000100 and 0x00000000
49 * 	- 0x00100000 used when KIL used
50 * 0x196c --> as above but 0x00000011 and 0x00000000
51 *
52 * 0x1988 --> 0xXXNNNNNN
53 * 	- XX == FP high something
54 */
55struct nv50_reg {
56	enum {
57		P_TEMP,
58		P_ATTR,
59		P_RESULT,
60		P_CONST,
61		P_IMMD
62	} type;
63	int index;
64
65	int hw;
66	int neg;
67};
68
69struct nv50_pc {
70	struct nv50_program *p;
71
72	/* hw resources */
73	struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
74
75	/* tgsi resources */
76	struct nv50_reg *temp;
77	int temp_nr;
78	struct nv50_reg *attr;
79	int attr_nr;
80	struct nv50_reg *result;
81	int result_nr;
82	struct nv50_reg *param;
83	int param_nr;
84	struct nv50_reg *immd;
85	float *immd_buf;
86	int immd_nr;
87
88	struct nv50_reg *temp_temp[16];
89	unsigned temp_temp_nr;
90};
91
92static void
93alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
94{
95	int i;
96
97	if (reg->type != P_TEMP)
98		return;
99
100	if (reg->hw >= 0) {
101		/*XXX: do this here too to catch FP temp-as-attr usage..
102		 *     not clean, but works */
103		if (pc->p->cfg.high_temp < (reg->hw + 1))
104			pc->p->cfg.high_temp = reg->hw + 1;
105		return;
106	}
107
108	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
109		if (!(pc->r_temp[i])) {
110			pc->r_temp[i] = reg;
111			reg->hw = i;
112			if (pc->p->cfg.high_temp < (i + 1))
113				pc->p->cfg.high_temp = i + 1;
114			return;
115		}
116	}
117
118	assert(0);
119}
120
121static struct nv50_reg *
122alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
123{
124	struct nv50_reg *r;
125	int i;
126
127	if (dst && dst->type == P_TEMP && dst->hw == -1)
128		return dst;
129
130	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
131		if (!pc->r_temp[i]) {
132			r = CALLOC_STRUCT(nv50_reg);
133			r->type = P_TEMP;
134			r->index = -1;
135			r->hw = i;
136			pc->r_temp[i] = r;
137			return r;
138		}
139	}
140
141	assert(0);
142	return NULL;
143}
144
145static void
146free_temp(struct nv50_pc *pc, struct nv50_reg *r)
147{
148	if (r->index == -1) {
149		unsigned hw = r->hw;
150
151		FREE(pc->r_temp[hw]);
152		pc->r_temp[hw] = NULL;
153	}
154}
155
156static struct nv50_reg *
157temp_temp(struct nv50_pc *pc)
158{
159	if (pc->temp_temp_nr >= 16)
160		assert(0);
161
162	pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
163	return pc->temp_temp[pc->temp_temp_nr++];
164}
165
166static void
167kill_temp_temp(struct nv50_pc *pc)
168{
169	int i;
170
171	for (i = 0; i < pc->temp_temp_nr; i++)
172		free_temp(pc, pc->temp_temp[i]);
173	pc->temp_temp_nr = 0;
174}
175
176static int
177ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
178{
179	pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 *
180					     sizeof(float));
181	pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
182	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
183	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
184	pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
185
186	return pc->immd_nr++;
187}
188
189static struct nv50_reg *
190alloc_immd(struct nv50_pc *pc, float f)
191{
192	struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
193	unsigned hw;
194
195	hw = ctor_immd(pc, f, 0, 0, 0) * 4;
196	r->type = P_IMMD;
197	r->hw = hw;
198	r->index = -1;
199	return r;
200}
201
202static struct nv50_program_exec *
203exec(struct nv50_pc *pc)
204{
205	struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
206
207	e->param.index = -1;
208	return e;
209}
210
211static void
212emit(struct nv50_pc *pc, struct nv50_program_exec *e)
213{
214	struct nv50_program *p = pc->p;
215
216	if (p->exec_tail)
217		p->exec_tail->next = e;
218	if (!p->exec_head)
219		p->exec_head = e;
220	p->exec_tail = e;
221	p->exec_size += (e->inst[0] & 1) ? 2 : 1;
222}
223
224static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
225
226static boolean
227is_long(struct nv50_program_exec *e)
228{
229	if (e->inst[0] & 1)
230		return TRUE;
231	return FALSE;
232}
233
234static boolean
235is_immd(struct nv50_program_exec *e)
236{
237	if (is_long(e) && (e->inst[1] & 3) == 3)
238		return TRUE;
239	return FALSE;
240}
241
242static INLINE void
243set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
244	 struct nv50_program_exec *e)
245{
246	set_long(pc, e);
247	e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
248	e->inst[1] |= (pred << 7) | (idx << 12);
249}
250
251static INLINE void
252set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
253	    struct nv50_program_exec *e)
254{
255	set_long(pc, e);
256	e->inst[1] &= ~((0x3 << 4) | (1 << 6));
257	e->inst[1] |= (idx << 4) | (on << 6);
258}
259
260static INLINE void
261set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
262{
263	if (is_long(e))
264		return;
265
266	e->inst[0] |= 1;
267	set_pred(pc, 0xf, 0, e);
268	set_pred_wr(pc, 0, 0, e);
269}
270
271static INLINE void
272set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
273{
274	if (dst->type == P_RESULT) {
275		set_long(pc, e);
276		e->inst[1] |= 0x00000008;
277	}
278
279	alloc_reg(pc, dst);
280	e->inst[0] |= (dst->hw << 2);
281}
282
283static INLINE void
284set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
285{
286	unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
287
288	set_long(pc, e);
289	/*XXX: can't be predicated - bits overlap.. catch cases where both
290	 *     are required and avoid them. */
291	set_pred(pc, 0, 0, e);
292	set_pred_wr(pc, 0, 0, e);
293
294	e->inst[1] |= 0x00000002 | 0x00000001;
295	e->inst[0] |= (val & 0x3f) << 16;
296	e->inst[1] |= (val >> 6) << 2;
297}
298
299static void
300emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
301	    struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
302{
303	struct nv50_program_exec *e = exec(pc);
304
305	e->inst[0] |= 0x80000000;
306	set_dst(pc, dst, e);
307	alloc_reg(pc, iv);
308	e->inst[0] |= (iv->hw << 9);
309	alloc_reg(pc, src);
310	e->inst[0] |= (src->hw << 16);
311	if (noperspective)
312		e->inst[0] |= (1 << 25);
313
314	emit(pc, e);
315}
316
317static void
318set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
319	 struct nv50_program_exec *e)
320{
321	set_long(pc, e);
322#if 1
323	e->inst[1] |= (1 << 22);
324#else
325	if (src->type == P_IMMD) {
326		e->inst[1] |= (NV50_CB_PMISC << 22);
327	} else {
328		if (pc->p->type == PIPE_SHADER_VERTEX)
329			e->inst[1] |= (NV50_CB_PVP << 22);
330		else
331			e->inst[1] |= (NV50_CB_PFP << 22);
332	}
333#endif
334
335	e->param.index = src->hw;
336	e->param.shift = s;
337	e->param.mask = m << (s % 32);
338}
339
340static void
341emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
342{
343	struct nv50_program_exec *e = exec(pc);
344
345	e->inst[0] |= 0x10000000;
346
347	set_dst(pc, dst, e);
348
349	if (dst->type != P_RESULT && src->type == P_IMMD) {
350		set_immd(pc, src, e);
351		/*XXX: 32-bit, but steals part of "half" reg space - need to
352		 *     catch and handle this case if/when we do half-regs
353		 */
354		e->inst[0] |= 0x00008000;
355	} else
356	if (src->type == P_IMMD || src->type == P_CONST) {
357		set_long(pc, e);
358		set_data(pc, src, 0x7f, 9, e);
359		e->inst[1] |= 0x20000000; /* src0 const? */
360	} else {
361		if (src->type == P_ATTR) {
362			set_long(pc, e);
363			e->inst[1] |= 0x00200000;
364		}
365
366		alloc_reg(pc, src);
367		e->inst[0] |= (src->hw << 9);
368	}
369
370	/* We really should support "half" instructions here at some point,
371	 * but I don't feel confident enough about them yet.
372	 */
373	set_long(pc, e);
374	if (is_long(e) && !is_immd(e)) {
375		e->inst[1] |= 0x04000000; /* 32-bit */
376		e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
377	}
378
379	emit(pc, e);
380}
381
382static boolean
383check_swap_src_0_1(struct nv50_pc *pc,
384		   struct nv50_reg **s0, struct nv50_reg **s1)
385{
386	struct nv50_reg *src0 = *s0, *src1 = *s1;
387
388	if (src0->type == P_CONST) {
389		if (src1->type != P_CONST) {
390			*s0 = src1;
391			*s1 = src0;
392			return TRUE;
393		}
394	} else
395	if (src1->type == P_ATTR) {
396		if (src0->type != P_ATTR) {
397			*s0 = src1;
398			*s1 = src0;
399			return TRUE;
400		}
401	}
402
403	return FALSE;
404}
405
406static void
407set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
408{
409	if (src->type == P_ATTR) {
410		set_long(pc, e);
411		e->inst[1] |= 0x00200000;
412	} else
413	if (src->type == P_CONST || src->type == P_IMMD) {
414		struct nv50_reg *temp = temp_temp(pc);
415
416		emit_mov(pc, temp, src);
417		src = temp;
418	}
419
420	alloc_reg(pc, src);
421	e->inst[0] |= (src->hw << 9);
422}
423
424static void
425set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
426{
427	if (src->type == P_ATTR) {
428		struct nv50_reg *temp = temp_temp(pc);
429
430		emit_mov(pc, temp, src);
431		src = temp;
432	} else
433	if (src->type == P_CONST || src->type == P_IMMD) {
434		assert(!(e->inst[0] & 0x00800000));
435		if (e->inst[0] & 0x01000000) {
436			struct nv50_reg *temp = temp_temp(pc);
437
438			emit_mov(pc, temp, src);
439			src = temp;
440		} else {
441			set_data(pc, src, 0x7f, 16, e);
442			e->inst[0] |= 0x00800000;
443		}
444	}
445
446	alloc_reg(pc, src);
447	e->inst[0] |= (src->hw << 16);
448}
449
450static void
451set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
452{
453	set_long(pc, e);
454
455	if (src->type == P_ATTR) {
456		struct nv50_reg *temp = temp_temp(pc);
457
458		emit_mov(pc, temp, src);
459		src = temp;
460	} else
461	if (src->type == P_CONST || src->type == P_IMMD) {
462		assert(!(e->inst[0] & 0x01000000));
463		if (e->inst[0] & 0x00800000) {
464			struct nv50_reg *temp = temp_temp(pc);
465
466			emit_mov(pc, temp, src);
467			src = temp;
468		} else {
469			set_data(pc, src, 0x7f, 32+14, e);
470			e->inst[0] |= 0x01000000;
471		}
472	}
473
474	alloc_reg(pc, src);
475	e->inst[1] |= (src->hw << 14);
476}
477
478static void
479emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
480	 struct nv50_reg *src1)
481{
482	struct nv50_program_exec *e = exec(pc);
483
484	e->inst[0] |= 0xc0000000;
485	set_long(pc, e);
486
487	check_swap_src_0_1(pc, &src0, &src1);
488	set_dst(pc, dst, e);
489	set_src_0(pc, src0, e);
490	set_src_1(pc, src1, e);
491
492	emit(pc, e);
493}
494
495static void
496emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
497	 struct nv50_reg *src0, struct nv50_reg *src1)
498{
499	struct nv50_program_exec *e = exec(pc);
500
501	e->inst[0] |= 0xb0000000;
502
503	check_swap_src_0_1(pc, &src0, &src1);
504	set_dst(pc, dst, e);
505	set_src_0(pc, src0, e);
506	if (is_long(e))
507		set_src_2(pc, src1, e);
508	else
509		set_src_1(pc, src1, e);
510
511	emit(pc, e);
512}
513
514static void
515emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
516	    struct nv50_reg *src0, struct nv50_reg *src1)
517{
518	struct nv50_program_exec *e = exec(pc);
519
520	set_long(pc, e);
521	e->inst[0] |= 0xb0000000;
522	e->inst[1] |= (sub << 29);
523
524	check_swap_src_0_1(pc, &src0, &src1);
525	set_dst(pc, dst, e);
526	set_src_0(pc, src0, e);
527	set_src_1(pc, src1, e);
528
529	emit(pc, e);
530}
531
532static void
533emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
534	 struct nv50_reg *src1)
535{
536	struct nv50_program_exec *e = exec(pc);
537
538	e->inst[0] |= 0xb0000000;
539
540	set_long(pc, e);
541	if (check_swap_src_0_1(pc, &src0, &src1))
542		e->inst[1] |= 0x04000000;
543	else
544		e->inst[1] |= 0x08000000;
545
546	set_dst(pc, dst, e);
547	set_src_0(pc, src0, e);
548	set_src_2(pc, src1, e);
549
550	emit(pc, e);
551}
552
553static void
554emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
555	 struct nv50_reg *src1, struct nv50_reg *src2)
556{
557	struct nv50_program_exec *e = exec(pc);
558
559	e->inst[0] |= 0xe0000000;
560
561	check_swap_src_0_1(pc, &src0, &src1);
562	set_dst(pc, dst, e);
563	set_src_0(pc, src0, e);
564	set_src_1(pc, src1, e);
565	set_src_2(pc, src2, e);
566
567	emit(pc, e);
568}
569
570static void
571emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
572	 struct nv50_reg *src1, struct nv50_reg *src2)
573{
574	struct nv50_program_exec *e = exec(pc);
575
576	e->inst[0] |= 0xe0000000;
577	set_long(pc, e);
578	e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
579
580	check_swap_src_0_1(pc, &src0, &src1);
581	set_dst(pc, dst, e);
582	set_src_0(pc, src0, e);
583	set_src_1(pc, src1, e);
584	set_src_2(pc, src2, e);
585
586	emit(pc, e);
587}
588
589static void
590emit_flop(struct nv50_pc *pc, unsigned sub,
591	  struct nv50_reg *dst, struct nv50_reg *src)
592{
593	struct nv50_program_exec *e = exec(pc);
594
595	e->inst[0] |= 0x90000000;
596	if (sub) {
597		set_long(pc, e);
598		e->inst[1] |= (sub << 29);
599	}
600
601	set_dst(pc, dst, e);
602	set_src_0(pc, src, e);
603
604	emit(pc, e);
605}
606
607static void
608emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
609{
610	struct nv50_program_exec *e = exec(pc);
611
612	e->inst[0] |= 0xb0000000;
613
614	set_dst(pc, dst, e);
615	set_src_0(pc, src, e);
616	set_long(pc, e);
617	e->inst[1] |= (6 << 29) | 0x00004000;
618
619	emit(pc, e);
620}
621
622static void
623emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
624{
625	struct nv50_program_exec *e = exec(pc);
626
627	e->inst[0] |= 0xb0000000;
628
629	set_dst(pc, dst, e);
630	set_src_0(pc, src, e);
631	set_long(pc, e);
632	e->inst[1] |= (6 << 29);
633
634	emit(pc, e);
635}
636
637static void
638emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
639	 struct nv50_reg *src0, struct nv50_reg *src1)
640{
641	struct nv50_program_exec *e = exec(pc);
642	unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
643	struct nv50_reg *rdst;
644
645	assert(c_op <= 7);
646	if (check_swap_src_0_1(pc, &src0, &src1))
647		c_op = inv_cop[c_op];
648
649	rdst = dst;
650	if (dst->type != P_TEMP)
651		dst = alloc_temp(pc, NULL);
652
653	/* set.u32 */
654	set_long(pc, e);
655	e->inst[0] |= 0xb0000000;
656	e->inst[1] |= (3 << 29);
657	e->inst[1] |= (c_op << 14);
658	/*XXX: breaks things, .u32 by default?
659	 *     decuda will disasm as .u16 and use .lo/.hi regs, but this
660	 *     doesn't seem to match what the hw actually does.
661	inst[1] |= 0x04000000; << breaks things.. .u32 by default?
662	 */
663	set_dst(pc, dst, e);
664	set_src_0(pc, src0, e);
665	set_src_1(pc, src1, e);
666	emit(pc, e);
667
668	/* cvt.f32.u32 */
669	e = exec(pc);
670	e->inst[0] = 0xa0000001;
671	e->inst[1] = 0x64014780;
672	set_dst(pc, rdst, e);
673	set_src_0(pc, dst, e);
674	emit(pc, e);
675
676	if (dst != rdst)
677		free_temp(pc, dst);
678}
679
680static void
681emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
682{
683	struct nv50_program_exec *e = exec(pc);
684
685	e->inst[0] = 0xa0000000; /* cvt */
686	set_long(pc, e);
687	e->inst[1] |= (6 << 29); /* cvt */
688	e->inst[1] |= 0x08000000; /* integer mode */
689	e->inst[1] |= 0x04000000; /* 32 bit */
690	e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
691	e->inst[1] |= (1 << 14); /* src .f32 */
692	set_dst(pc, dst, e);
693	set_src_0(pc, src, e);
694
695	emit(pc, e);
696}
697
698static void
699emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
700	 struct nv50_reg *v, struct nv50_reg *e)
701{
702	struct nv50_reg *temp = alloc_temp(pc, NULL);
703
704	emit_flop(pc, 3, temp, v);
705	emit_mul(pc, temp, temp, e);
706	emit_preex2(pc, temp, temp);
707	emit_flop(pc, 6, dst, temp);
708
709	free_temp(pc, temp);
710}
711
712static void
713emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
714{
715	struct nv50_program_exec *e = exec(pc);
716
717	e->inst[0] = 0xa0000000; /* cvt */
718	set_long(pc, e);
719	e->inst[1] |= (6 << 29); /* cvt */
720	e->inst[1] |= 0x04000000; /* 32 bit */
721	e->inst[1] |= (1 << 14); /* src .f32 */
722	e->inst[1] |= ((1 << 6) << 14); /* .abs */
723	set_dst(pc, dst, e);
724	set_src_0(pc, src, e);
725
726	emit(pc, e);
727}
728
729static void
730emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
731	 struct nv50_reg **src)
732{
733	struct nv50_reg *one = alloc_immd(pc, 1.0);
734	struct nv50_reg *zero = alloc_immd(pc, 0.0);
735	struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
736	struct nv50_reg *pos128 = alloc_immd(pc,  127.999999);
737	struct nv50_reg *tmp[4];
738
739	if (mask & (1 << 0))
740		emit_mov(pc, dst[0], one);
741
742	if (mask & (1 << 3))
743		emit_mov(pc, dst[3], one);
744
745	if (mask & (3 << 1)) {
746		if (mask & (1 << 1))
747			tmp[0] = dst[1];
748		else
749			tmp[0] = temp_temp(pc);
750		emit_minmax(pc, 4, tmp[0], src[0], zero);
751	}
752
753	if (mask & (1 << 2)) {
754		set_pred_wr(pc, 1, 0, pc->p->exec_tail);
755
756		tmp[1] = temp_temp(pc);
757		emit_minmax(pc, 4, tmp[1], src[1], zero);
758
759		tmp[3] = temp_temp(pc);
760		emit_minmax(pc, 4, tmp[3], src[3], neg128);
761		emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
762
763		emit_pow(pc, dst[2], tmp[1], tmp[3]);
764		emit_mov(pc, dst[2], zero);
765		set_pred(pc, 3, 0, pc->p->exec_tail);
766	}
767}
768
769static void
770emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
771{
772	struct nv50_program_exec *e = exec(pc);
773
774	set_long(pc, e);
775	e->inst[0] |= 0xa0000000; /* delta */
776	e->inst[1] |= (7 << 29); /* delta */
777	e->inst[1] |= 0x04000000; /* negate arg0? probably not */
778	e->inst[1] |= (1 << 14); /* src .f32 */
779	set_dst(pc, dst, e);
780	set_src_0(pc, src, e);
781
782	emit(pc, e);
783}
784
785static struct nv50_reg *
786tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
787{
788	switch (dst->DstRegister.File) {
789	case TGSI_FILE_TEMPORARY:
790		return &pc->temp[dst->DstRegister.Index * 4 + c];
791	case TGSI_FILE_OUTPUT:
792		return &pc->result[dst->DstRegister.Index * 4 + c];
793	case TGSI_FILE_NULL:
794		return NULL;
795	default:
796		break;
797	}
798
799	return NULL;
800}
801
802static struct nv50_reg *
803tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
804{
805	struct nv50_reg *r = NULL;
806	struct nv50_reg *temp;
807	unsigned c;
808
809	c = tgsi_util_get_full_src_register_extswizzle(src, chan);
810	switch (c) {
811	case TGSI_EXTSWIZZLE_X:
812	case TGSI_EXTSWIZZLE_Y:
813	case TGSI_EXTSWIZZLE_Z:
814	case TGSI_EXTSWIZZLE_W:
815		switch (src->SrcRegister.File) {
816		case TGSI_FILE_INPUT:
817			r = &pc->attr[src->SrcRegister.Index * 4 + c];
818			break;
819		case TGSI_FILE_TEMPORARY:
820			r = &pc->temp[src->SrcRegister.Index * 4 + c];
821			break;
822		case TGSI_FILE_CONSTANT:
823			r = &pc->param[src->SrcRegister.Index * 4 + c];
824			break;
825		case TGSI_FILE_IMMEDIATE:
826			r = &pc->immd[src->SrcRegister.Index * 4 + c];
827			break;
828		default:
829			assert(0);
830			break;
831		}
832		break;
833	case TGSI_EXTSWIZZLE_ZERO:
834		r = alloc_immd(pc, 0.0);
835		break;
836	case TGSI_EXTSWIZZLE_ONE:
837		r = alloc_immd(pc, 1.0);
838		break;
839	default:
840		assert(0);
841		break;
842	}
843
844	switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
845	case TGSI_UTIL_SIGN_KEEP:
846		break;
847	case TGSI_UTIL_SIGN_CLEAR:
848		temp = temp_temp(pc);
849		emit_abs(pc, temp, r);
850		r = temp;
851		break;
852	case TGSI_UTIL_SIGN_TOGGLE:
853		temp = temp_temp(pc);
854		emit_neg(pc, temp, r);
855		r = temp;
856		break;
857	case TGSI_UTIL_SIGN_SET:
858		temp = temp_temp(pc);
859		emit_abs(pc, temp, r);
860		emit_neg(pc, temp, r);
861		r = temp;
862		break;
863	default:
864		assert(0);
865		break;
866	}
867
868	return r;
869}
870
871static boolean
872nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
873{
874	const struct tgsi_full_instruction *inst = &tok->FullInstruction;
875	struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
876	unsigned mask, sat;
877	int i, c;
878
879	NOUVEAU_ERR("insn %p\n", tok);
880
881	mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
882	sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
883
884	for (c = 0; c < 4; c++) {
885		if (mask & (1 << c))
886			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
887		else
888			dst[c] = NULL;
889	}
890
891	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
892		for (c = 0; c < 4; c++)
893			src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
894	}
895
896	if (sat) {
897		for (c = 0; c < 4; c++) {
898			rdst[c] = dst[c];
899			dst[c] = temp_temp(pc);
900		}
901	}
902
903	switch (inst->Instruction.Opcode) {
904	case TGSI_OPCODE_ABS:
905		for (c = 0; c < 4; c++) {
906			if (!(mask & (1 << c)))
907				continue;
908			emit_abs(pc, dst[c], src[0][c]);
909		}
910		break;
911	case TGSI_OPCODE_ADD:
912		for (c = 0; c < 4; c++) {
913			if (!(mask & (1 << c)))
914				continue;
915			emit_add(pc, dst[c], src[0][c], src[1][c]);
916		}
917		break;
918	case TGSI_OPCODE_COS:
919		temp = alloc_temp(pc, NULL);
920		emit_precossin(pc, temp, src[0][0]);
921		emit_flop(pc, 5, temp, temp);
922		for (c = 0; c < 4; c++) {
923			if (!(mask & (1 << c)))
924				continue;
925			emit_mov(pc, dst[c], temp);
926		}
927		break;
928	case TGSI_OPCODE_DP3:
929		temp = alloc_temp(pc, NULL);
930		emit_mul(pc, temp, src[0][0], src[1][0]);
931		emit_mad(pc, temp, src[0][1], src[1][1], temp);
932		emit_mad(pc, temp, src[0][2], src[1][2], temp);
933		for (c = 0; c < 4; c++) {
934			if (!(mask & (1 << c)))
935				continue;
936			emit_mov(pc, dst[c], temp);
937		}
938		free_temp(pc, temp);
939		break;
940	case TGSI_OPCODE_DP4:
941		temp = alloc_temp(pc, NULL);
942		emit_mul(pc, temp, src[0][0], src[1][0]);
943		emit_mad(pc, temp, src[0][1], src[1][1], temp);
944		emit_mad(pc, temp, src[0][2], src[1][2], temp);
945		emit_mad(pc, temp, src[0][3], src[1][3], temp);
946		for (c = 0; c < 4; c++) {
947			if (!(mask & (1 << c)))
948				continue;
949			emit_mov(pc, dst[c], temp);
950		}
951		free_temp(pc, temp);
952		break;
953	case TGSI_OPCODE_DPH:
954		temp = alloc_temp(pc, NULL);
955		emit_mul(pc, temp, src[0][0], src[1][0]);
956		emit_mad(pc, temp, src[0][1], src[1][1], temp);
957		emit_mad(pc, temp, src[0][2], src[1][2], temp);
958		emit_add(pc, temp, src[1][3], temp);
959		for (c = 0; c < 4; c++) {
960			if (!(mask & (1 << c)))
961				continue;
962			emit_mov(pc, dst[c], temp);
963		}
964		free_temp(pc, temp);
965		break;
966	case TGSI_OPCODE_DST:
967	{
968		struct nv50_reg *one = alloc_immd(pc, 1.0);
969		if (mask & (1 << 0))
970			emit_mov(pc, dst[0], one);
971		if (mask & (1 << 1))
972			emit_mul(pc, dst[1], src[0][1], src[1][1]);
973		if (mask & (1 << 2))
974			emit_mov(pc, dst[2], src[0][2]);
975		if (mask & (1 << 3))
976			emit_mov(pc, dst[3], src[1][3]);
977		FREE(one);
978	}
979		break;
980	case TGSI_OPCODE_EX2:
981		temp = alloc_temp(pc, NULL);
982		emit_preex2(pc, temp, src[0][0]);
983		emit_flop(pc, 6, temp, temp);
984		for (c = 0; c < 4; c++) {
985			if (!(mask & (1 << c)))
986				continue;
987			emit_mov(pc, dst[c], temp);
988		}
989		free_temp(pc, temp);
990		break;
991	case TGSI_OPCODE_FLR:
992		for (c = 0; c < 4; c++) {
993			if (!(mask & (1 << c)))
994				continue;
995			emit_flr(pc, dst[c], src[0][c]);
996		}
997		break;
998	case TGSI_OPCODE_FRC:
999		temp = alloc_temp(pc, NULL);
1000		for (c = 0; c < 4; c++) {
1001			if (!(mask & (1 << c)))
1002				continue;
1003			emit_flr(pc, temp, src[0][c]);
1004			emit_sub(pc, dst[c], src[0][c], temp);
1005		}
1006		free_temp(pc, temp);
1007		break;
1008	case TGSI_OPCODE_LIT:
1009		emit_lit(pc, &dst[0], mask, &src[0][0]);
1010		break;
1011	case TGSI_OPCODE_LG2:
1012		temp = alloc_temp(pc, NULL);
1013		emit_flop(pc, 3, temp, src[0][0]);
1014		for (c = 0; c < 4; c++) {
1015			if (!(mask & (1 << c)))
1016				continue;
1017			emit_mov(pc, dst[c], temp);
1018		}
1019		break;
1020	case TGSI_OPCODE_LRP:
1021		for (c = 0; c < 4; c++) {
1022			if (!(mask & (1 << c)))
1023				continue;
1024			/*XXX: we can do better than this */
1025			temp = alloc_temp(pc, NULL);
1026			emit_neg(pc, temp, src[0][c]);
1027			emit_mad(pc, temp, temp, src[2][c], src[2][c]);
1028			emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
1029			free_temp(pc, temp);
1030		}
1031		break;
1032	case TGSI_OPCODE_MAD:
1033		for (c = 0; c < 4; c++) {
1034			if (!(mask & (1 << c)))
1035				continue;
1036			emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1037		}
1038		break;
1039	case TGSI_OPCODE_MAX:
1040		for (c = 0; c < 4; c++) {
1041			if (!(mask & (1 << c)))
1042				continue;
1043			emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1044		}
1045		break;
1046	case TGSI_OPCODE_MIN:
1047		for (c = 0; c < 4; c++) {
1048			if (!(mask & (1 << c)))
1049				continue;
1050			emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1051		}
1052		break;
1053	case TGSI_OPCODE_MOV:
1054		for (c = 0; c < 4; c++) {
1055			if (!(mask & (1 << c)))
1056				continue;
1057			emit_mov(pc, dst[c], src[0][c]);
1058		}
1059		break;
1060	case TGSI_OPCODE_MUL:
1061		for (c = 0; c < 4; c++) {
1062			if (!(mask & (1 << c)))
1063				continue;
1064			emit_mul(pc, dst[c], src[0][c], src[1][c]);
1065		}
1066		break;
1067	case TGSI_OPCODE_POW:
1068		temp = alloc_temp(pc, NULL);
1069		emit_pow(pc, temp, src[0][0], src[1][0]);
1070		for (c = 0; c < 4; c++) {
1071			if (!(mask & (1 << c)))
1072				continue;
1073			emit_mov(pc, dst[c], temp);
1074		}
1075		free_temp(pc, temp);
1076		break;
1077	case TGSI_OPCODE_RCP:
1078		for (c = 0; c < 4; c++) {
1079			if (!(mask & (1 << c)))
1080				continue;
1081			emit_flop(pc, 0, dst[c], src[0][0]);
1082		}
1083		break;
1084	case TGSI_OPCODE_RSQ:
1085		for (c = 0; c < 4; c++) {
1086			if (!(mask & (1 << c)))
1087				continue;
1088			emit_flop(pc, 2, dst[c], src[0][0]);
1089		}
1090		break;
1091	case TGSI_OPCODE_SCS:
1092		temp = alloc_temp(pc, NULL);
1093		emit_precossin(pc, temp, src[0][0]);
1094		if (mask & (1 << 0))
1095			emit_flop(pc, 5, dst[0], temp);
1096		if (mask & (1 << 1))
1097			emit_flop(pc, 4, dst[1], temp);
1098		break;
1099	case TGSI_OPCODE_SGE:
1100		for (c = 0; c < 4; c++) {
1101			if (!(mask & (1 << c)))
1102				continue;
1103			emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1104		}
1105		break;
1106	case TGSI_OPCODE_SIN:
1107		temp = alloc_temp(pc, NULL);
1108		emit_precossin(pc, temp, src[0][0]);
1109		emit_flop(pc, 4, temp, temp);
1110		for (c = 0; c < 4; c++) {
1111			if (!(mask & (1 << c)))
1112				continue;
1113			emit_mov(pc, dst[c], temp);
1114		}
1115		break;
1116	case TGSI_OPCODE_SLT:
1117		for (c = 0; c < 4; c++) {
1118			if (!(mask & (1 << c)))
1119				continue;
1120			emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1121		}
1122		break;
1123	case TGSI_OPCODE_SUB:
1124		for (c = 0; c < 4; c++) {
1125			if (!(mask & (1 << c)))
1126				continue;
1127			emit_sub(pc, dst[c], src[0][c], src[1][c]);
1128		}
1129		break;
1130	case TGSI_OPCODE_XPD:
1131		temp = alloc_temp(pc, NULL);
1132		if (mask & (1 << 0)) {
1133			emit_mul(pc, temp, src[0][2], src[1][1]);
1134			emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1135		}
1136		if (mask & (1 << 1)) {
1137			emit_mul(pc, temp, src[0][0], src[1][2]);
1138			emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1139		}
1140		if (mask & (1 << 2)) {
1141			emit_mul(pc, temp, src[0][1], src[1][0]);
1142			emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1143		}
1144		free_temp(pc, temp);
1145		break;
1146	case TGSI_OPCODE_END:
1147		break;
1148	default:
1149		NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1150		return FALSE;
1151	}
1152
1153	if (sat) {
1154		for (c = 0; c < 4; c++) {
1155			struct nv50_program_exec *e;
1156
1157			if (!(mask & (1 << c)))
1158				continue;
1159			e = exec(pc);
1160
1161			e->inst[0] = 0xa0000000; /* cvt */
1162			set_long(pc, e);
1163			e->inst[1] |= (6 << 29); /* cvt */
1164			e->inst[1] |= 0x04000000; /* 32 bit */
1165			e->inst[1] |= (1 << 14); /* src .f32 */
1166			e->inst[1] |= ((1 << 5) << 14); /* .sat */
1167			set_dst(pc, rdst[c], e);
1168			set_src_0(pc, dst[c], e);
1169			emit(pc, e);
1170		}
1171	}
1172
1173	kill_temp_temp(pc);
1174	return TRUE;
1175}
1176
1177static boolean
1178nv50_program_tx_prep(struct nv50_pc *pc)
1179{
1180	struct tgsi_parse_context p;
1181	boolean ret = FALSE;
1182	unsigned i, c;
1183
1184	tgsi_parse_init(&p, pc->p->pipe.tokens);
1185	while (!tgsi_parse_end_of_tokens(&p)) {
1186		const union tgsi_full_token *tok = &p.FullToken;
1187
1188		tgsi_parse_token(&p);
1189		switch (tok->Token.Type) {
1190		case TGSI_TOKEN_TYPE_IMMEDIATE:
1191		{
1192			const struct tgsi_full_immediate *imm =
1193				&p.FullToken.FullImmediate;
1194
1195			ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1196				      imm->u.ImmediateFloat32[1].Float,
1197				      imm->u.ImmediateFloat32[2].Float,
1198				      imm->u.ImmediateFloat32[3].Float);
1199		}
1200			break;
1201		case TGSI_TOKEN_TYPE_DECLARATION:
1202		{
1203			const struct tgsi_full_declaration *d;
1204			unsigned last;
1205
1206			d = &p.FullToken.FullDeclaration;
1207			last = d->u.DeclarationRange.Last;
1208
1209			switch (d->Declaration.File) {
1210			case TGSI_FILE_TEMPORARY:
1211				if (pc->temp_nr < (last + 1))
1212					pc->temp_nr = last + 1;
1213				break;
1214			case TGSI_FILE_OUTPUT:
1215				if (pc->result_nr < (last + 1))
1216					pc->result_nr = last + 1;
1217				break;
1218			case TGSI_FILE_INPUT:
1219				if (pc->attr_nr < (last + 1))
1220					pc->attr_nr = last + 1;
1221				break;
1222			case TGSI_FILE_CONSTANT:
1223				if (pc->param_nr < (last + 1))
1224					pc->param_nr = last + 1;
1225				break;
1226			default:
1227				NOUVEAU_ERR("bad decl file %d\n",
1228					    d->Declaration.File);
1229				goto out_err;
1230			}
1231		}
1232			break;
1233		case TGSI_TOKEN_TYPE_INSTRUCTION:
1234			break;
1235		default:
1236			break;
1237		}
1238	}
1239
1240	NOUVEAU_ERR("%d temps\n", pc->temp_nr);
1241	if (pc->temp_nr) {
1242		pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
1243		if (!pc->temp)
1244			goto out_err;
1245
1246		for (i = 0; i < pc->temp_nr; i++) {
1247			for (c = 0; c < 4; c++) {
1248				pc->temp[i*4+c].type = P_TEMP;
1249				pc->temp[i*4+c].hw = -1;
1250				pc->temp[i*4+c].index = i;
1251			}
1252		}
1253	}
1254
1255	NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
1256	if (pc->attr_nr) {
1257		struct nv50_reg *iv = NULL, *tmp = NULL;
1258		int aid = 0;
1259
1260		pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
1261		if (!pc->attr)
1262			goto out_err;
1263
1264		if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1265			iv = alloc_temp(pc, NULL);
1266			aid++;
1267		}
1268
1269		for (i = 0; i < pc->attr_nr; i++) {
1270			struct nv50_reg *a = &pc->attr[i*4];
1271
1272			for (c = 0; c < 4; c++) {
1273				if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1274					struct nv50_reg *at =
1275						alloc_temp(pc, NULL);
1276					pc->attr[i*4+c].type = at->type;
1277					pc->attr[i*4+c].hw = at->hw;
1278					pc->attr[i*4+c].index = at->index;
1279				} else {
1280					pc->p->cfg.vp.attr[aid/32] |=
1281						(1 << (aid % 32));
1282					pc->attr[i*4+c].type = P_ATTR;
1283					pc->attr[i*4+c].hw = aid++;
1284					pc->attr[i*4+c].index = i;
1285				}
1286			}
1287
1288			if (pc->p->type != PIPE_SHADER_FRAGMENT)
1289				continue;
1290
1291			emit_interp(pc, iv, iv, iv, FALSE);
1292			tmp = alloc_temp(pc, NULL);
1293			emit_flop(pc, 0, tmp, iv);
1294			emit_interp(pc, &a[0], &a[0], tmp, TRUE);
1295			emit_interp(pc, &a[1], &a[1], tmp, TRUE);
1296			emit_interp(pc, &a[2], &a[2], tmp, TRUE);
1297			emit_interp(pc, &a[3], &a[3], tmp, TRUE);
1298			free_temp(pc, tmp);
1299		}
1300
1301		if (iv)
1302			free_temp(pc, iv);
1303	}
1304
1305	NOUVEAU_ERR("%d result regs\n", pc->result_nr);
1306	if (pc->result_nr) {
1307		int rid = 0;
1308
1309		pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
1310		if (!pc->result)
1311			goto out_err;
1312
1313		for (i = 0; i < pc->result_nr; i++) {
1314			for (c = 0; c < 4; c++) {
1315				if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1316					pc->result[i*4+c].type = P_TEMP;
1317					pc->result[i*4+c].hw = -1;
1318				} else {
1319					pc->result[i*4+c].type = P_RESULT;
1320					pc->result[i*4+c].hw = rid++;
1321				}
1322				pc->result[i*4+c].index = i;
1323			}
1324		}
1325	}
1326
1327	NOUVEAU_ERR("%d param regs\n", pc->param_nr);
1328	if (pc->param_nr) {
1329		int rid = 0;
1330
1331		pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
1332		if (!pc->param)
1333			goto out_err;
1334
1335		for (i = 0; i < pc->param_nr; i++) {
1336			for (c = 0; c < 4; c++) {
1337				pc->param[i*4+c].type = P_CONST;
1338				pc->param[i*4+c].hw = rid++;
1339				pc->param[i*4+c].index = i;
1340			}
1341		}
1342	}
1343
1344	if (pc->immd_nr) {
1345		int rid = pc->param_nr * 4;
1346
1347		pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1348		if (!pc->immd)
1349			goto out_err;
1350
1351		for (i = 0; i < pc->immd_nr; i++) {
1352			for (c = 0; c < 4; c++) {
1353				pc->immd[i*4+c].type = P_IMMD;
1354				pc->immd[i*4+c].hw = rid++;
1355				pc->immd[i*4+c].index = i;
1356			}
1357		}
1358	}
1359
1360	ret = TRUE;
1361out_err:
1362	tgsi_parse_free(&p);
1363	return ret;
1364}
1365
1366static boolean
1367nv50_program_tx(struct nv50_program *p)
1368{
1369	struct tgsi_parse_context parse;
1370	struct nv50_pc *pc;
1371	boolean ret;
1372
1373	pc = CALLOC_STRUCT(nv50_pc);
1374	if (!pc)
1375		return FALSE;
1376	pc->p = p;
1377	pc->p->cfg.high_temp = 4;
1378
1379	ret = nv50_program_tx_prep(pc);
1380	if (ret == FALSE)
1381		goto out_cleanup;
1382
1383	tgsi_parse_init(&parse, pc->p->pipe.tokens);
1384	while (!tgsi_parse_end_of_tokens(&parse)) {
1385		const union tgsi_full_token *tok = &parse.FullToken;
1386
1387		tgsi_parse_token(&parse);
1388
1389		switch (tok->Token.Type) {
1390		case TGSI_TOKEN_TYPE_INSTRUCTION:
1391			ret = nv50_program_tx_insn(pc, tok);
1392			if (ret == FALSE)
1393				goto out_err;
1394			break;
1395		default:
1396			break;
1397		}
1398	}
1399
1400	if (p->type == PIPE_SHADER_FRAGMENT) {
1401		struct nv50_reg out;
1402
1403		out.type = P_TEMP;
1404		for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1405			emit_mov(pc, &out, &pc->result[out.hw]);
1406	}
1407
1408	assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
1409	pc->p->exec_tail->inst[1] |= 0x00000001;
1410
1411	p->param_nr = pc->param_nr * 4;
1412	p->immd_nr = pc->immd_nr * 4;
1413	p->immd = pc->immd_buf;
1414
1415out_err:
1416	tgsi_parse_free(&parse);
1417
1418out_cleanup:
1419	return ret;
1420}
1421
1422static void
1423nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1424{
1425	if (nv50_program_tx(p) == FALSE)
1426		assert(0);
1427	p->translated = TRUE;
1428}
1429
1430static void
1431nv50_program_upload_data(struct nv50_context *nv50, float *map,
1432			 unsigned start, unsigned count)
1433{
1434	while (count) {
1435		unsigned nr = count > 2047 ? 2047 : count;
1436
1437		BEGIN_RING(tesla, 0x00000f00, 1);
1438		OUT_RING  ((NV50_CB_PMISC << 0) | (start << 8));
1439		BEGIN_RING(tesla, 0x40000f04, nr);
1440		OUT_RINGp (map, nr);
1441
1442		map += nr;
1443		start += nr;
1444		count -= nr;
1445	}
1446}
1447
1448static void
1449nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1450{
1451	struct nouveau_winsys *nvws = nv50->screen->nvws;
1452	struct pipe_winsys *ws = nv50->pipe.winsys;
1453	unsigned nr = p->param_nr + p->immd_nr;
1454
1455	if (!p->data && nr) {
1456		struct nouveau_resource *heap = nv50->screen->vp_data_heap;
1457
1458		if (nvws->res_alloc(heap, nr, p, &p->data)) {
1459			while (heap->next && heap->size < nr) {
1460				struct nv50_program *evict = heap->next->priv;
1461				nvws->res_free(&evict->data);
1462			}
1463
1464			if (nvws->res_alloc(heap, nr, p, &p->data))
1465				assert(0);
1466		}
1467	}
1468
1469	if (p->param_nr) {
1470		float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
1471					    PIPE_BUFFER_USAGE_CPU_READ);
1472		nv50_program_upload_data(nv50, map, p->data->start,
1473					 p->param_nr);
1474		ws->buffer_unmap(ws, nv50->constbuf[p->type]);
1475	}
1476
1477	if (p->immd_nr) {
1478		nv50_program_upload_data(nv50, p->immd,
1479					 p->data->start + p->param_nr,
1480					 p->immd_nr);
1481	}
1482}
1483
1484static void
1485nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1486{
1487	struct pipe_winsys *ws = nv50->pipe.winsys;
1488	struct nv50_program_exec *e;
1489	boolean upload = FALSE;
1490	unsigned *map;
1491
1492	if (!p->buffer) {
1493		p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4);
1494		upload = TRUE;
1495	}
1496
1497	if (p->data && p->data->start != p->data_start) {
1498		for (e = p->exec_head; e; e = e->next) {
1499			unsigned ei, ci;
1500
1501			if (e->param.index < 0)
1502				continue;
1503			ei = e->param.shift >> 5;
1504			ci = e->param.index + p->data->start;
1505
1506			e->inst[ei] &= ~e->param.mask;
1507			e->inst[ei] |= (ci << e->param.shift);
1508		}
1509
1510		p->data_start = p->data->start;
1511		upload = TRUE;
1512	}
1513
1514	if (!upload)
1515		return FALSE;
1516
1517	map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
1518	for (e = p->exec_head; e; e = e->next) {
1519#ifdef NV50_PROGRAM_DUMP
1520		NOUVEAU_ERR("0x%08x\n", e->inst[0]);
1521#endif
1522		*(map++) = e->inst[0];
1523		if (is_long(e)) {
1524#ifdef NV50_PROGRAM_DUMP
1525			NOUVEAU_ERR("0x%08x\n", e->inst[1]);
1526#endif
1527			*(map++) = e->inst[1];
1528		}
1529	}
1530	ws->buffer_unmap(ws, p->buffer);
1531}
1532
1533void
1534nv50_vertprog_validate(struct nv50_context *nv50)
1535{
1536	struct nouveau_grobj *tesla = nv50->screen->tesla;
1537	struct nv50_program *p = nv50->vertprog;
1538	struct nouveau_stateobj *so;
1539
1540	if (!p->translated) {
1541		nv50_program_validate(nv50, p);
1542		if (!p->translated)
1543			assert(0);
1544	}
1545
1546	nv50_program_validate_data(nv50, p);
1547	nv50_program_validate_code(nv50, p);
1548
1549	so = so_new(11, 2);
1550	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1551	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1552		  NOUVEAU_BO_HIGH, 0, 0);
1553	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1554		  NOUVEAU_BO_LOW, 0, 0);
1555	so_method(so, tesla, 0x1650, 2);
1556	so_data  (so, p->cfg.vp.attr[0]);
1557	so_data  (so, p->cfg.vp.attr[1]);
1558	so_method(so, tesla, 0x16ac, 2);
1559	so_data  (so, 8);
1560	so_data  (so, p->cfg.high_temp);
1561	so_method(so, tesla, 0x140c, 1);
1562	so_data  (so, 0); /* program start offset */
1563	so_emit(nv50->screen->nvws, so);
1564	so_ref(NULL, &so);
1565}
1566
1567void
1568nv50_fragprog_validate(struct nv50_context *nv50)
1569{
1570	struct nouveau_grobj *tesla = nv50->screen->tesla;
1571	struct nv50_program *p = nv50->fragprog;
1572	struct nouveau_stateobj *so;
1573
1574	if (!p->translated) {
1575		nv50_program_validate(nv50, p);
1576		if (!p->translated)
1577			assert(0);
1578	}
1579
1580	nv50_program_validate_data(nv50, p);
1581	nv50_program_validate_code(nv50, p);
1582
1583	so = so_new(64, 2);
1584	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1585	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1586		  NOUVEAU_BO_HIGH, 0, 0);
1587	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1588		  NOUVEAU_BO_LOW, 0, 0);
1589	so_method(so, tesla, 0x1904, 4);
1590	so_data  (so, 0x01040404); /* p: 0x01000404 */
1591	so_data  (so, 0x00000004);
1592	so_data  (so, 0x00000000);
1593	so_data  (so, 0x00000000);
1594	so_method(so, tesla, 0x16bc, 2); /*XXX: fixme */
1595	so_data  (so, 0x03020100);
1596	so_data  (so, 0x07060504);
1597	so_method(so, tesla, 0x1988, 2);
1598	so_data  (so, 0x08040404); /* p: 0x0f000401 */
1599	so_data  (so, p->cfg.high_temp);
1600	so_method(so, tesla, 0x1414, 1);
1601	so_data  (so, 0); /* program start offset */
1602	so_emit(nv50->screen->nvws, so);
1603	so_ref(NULL, &so);
1604}
1605
1606void
1607nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1608{
1609	struct pipe_winsys *ws = nv50->pipe.winsys;
1610
1611	while (p->exec_head) {
1612		struct nv50_program_exec *e = p->exec_head;
1613
1614		p->exec_head = e->next;
1615		FREE(e);
1616	}
1617	p->exec_tail = NULL;
1618	p->exec_size = 0;
1619
1620	if (p->buffer)
1621		pipe_buffer_reference(ws, &p->buffer, NULL);
1622
1623	p->translated = 0;
1624}
1625
1626