nv30_fragprog.c revision a55e50b082ca068d35d695ff323603507e2b64aa
1#include "pipe/p_context.h"
2#include "pipe/p_defines.h"
3#include "pipe/p_state.h"
4#include "pipe/p_inlines.h"
5
6#include "pipe/p_shader_tokens.h"
7#include "tgsi/tgsi_dump.h"
8#include "tgsi/tgsi_parse.h"
9#include "tgsi/tgsi_util.h"
10
11#include "nv30_context.h"
12
13#define SWZ_X 0
14#define SWZ_Y 1
15#define SWZ_Z 2
16#define SWZ_W 3
17#define MASK_X 1
18#define MASK_Y 2
19#define MASK_Z 4
20#define MASK_W 8
21#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
22#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
23#define DEF_CTEST NV30_FP_OP_COND_TR
24#include "nv30_shader.h"
25
26#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
27#define neg(s) nv30_sr_neg((s))
28#define abs(s) nv30_sr_abs((s))
29#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
30
31#define MAX_CONSTS 128
32#define MAX_IMM 32
33struct nv30_fpc {
34	struct nv30_fragment_program *fp;
35
36	uint attrib_map[PIPE_MAX_SHADER_INPUTS];
37
38	int high_temp;
39	int temp_temp_count;
40	int num_regs;
41
42	uint depth_id;
43	uint colour_id;
44
45	unsigned inst_offset;
46
47	struct {
48		int pipe;
49		float vals[4];
50	} consts[MAX_CONSTS];
51	int nr_consts;
52
53	struct nv30_sreg imm[MAX_IMM];
54	unsigned nr_imm;
55};
56
57static INLINE struct nv30_sreg
58temp(struct nv30_fpc *fpc)
59{
60	int idx;
61
62	idx  = fpc->temp_temp_count++;
63	idx += fpc->high_temp + 1;
64	return nv30_sr(NV30SR_TEMP, idx);
65}
66
67static INLINE struct nv30_sreg
68constant(struct nv30_fpc *fpc, int pipe, float vals[4])
69{
70	int idx;
71
72	if (fpc->nr_consts == MAX_CONSTS)
73		assert(0);
74	idx = fpc->nr_consts++;
75
76	fpc->consts[idx].pipe = pipe;
77	if (pipe == -1)
78		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
79	return nv30_sr(NV30SR_CONST, idx);
80}
81
82#define arith(cc,s,o,d,m,s0,s1,s2) \
83	nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
84			(d), (m), (s0), (s1), (s2))
85#define tex(cc,s,o,u,d,m,s0,s1,s2) \
86	nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
87		    (d), (m), (s0), none, none)
88
89static void
90grow_insns(struct nv30_fpc *fpc, int size)
91{
92	struct nv30_fragment_program *fp = fpc->fp;
93
94	fp->insn_len += size;
95	fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
96}
97
98static void
99emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
100{
101	struct nv30_fragment_program *fp = fpc->fp;
102	uint32_t *hw = &fp->insn[fpc->inst_offset];
103	uint32_t sr = 0;
104
105	switch (src.type) {
106	case NV30SR_INPUT:
107		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
108		hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
109		break;
110	case NV30SR_OUTPUT:
111		sr |= NV30_FP_REG_SRC_HALF;
112		/* fall-through */
113	case NV30SR_TEMP:
114		sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
115		sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
116		break;
117	case NV30SR_CONST:
118		grow_insns(fpc, 4);
119		hw = &fp->insn[fpc->inst_offset];
120		if (fpc->consts[src.index].pipe >= 0) {
121			struct nv30_fragment_program_data *fpd;
122
123			fp->consts = realloc(fp->consts, ++fp->nr_consts *
124					     sizeof(*fpd));
125			fpd = &fp->consts[fp->nr_consts - 1];
126			fpd->offset = fpc->inst_offset + 4;
127			fpd->index = fpc->consts[src.index].pipe;
128			memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
129		} else {
130			memcpy(&fp->insn[fpc->inst_offset + 4],
131				fpc->consts[src.index].vals,
132				sizeof(uint32_t) * 4);
133		}
134
135		sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
136		break;
137	case NV30SR_NONE:
138		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
139		break;
140	default:
141		assert(0);
142	}
143
144	if (src.negate)
145		sr |= NV30_FP_REG_NEGATE;
146
147	if (src.abs)
148		hw[1] |= (1 << (29 + pos));
149
150	sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
151	       (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
152	       (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
153	       (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));
154
155	hw[pos + 1] |= sr;
156}
157
158static void
159emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
160{
161	struct nv30_fragment_program *fp = fpc->fp;
162	uint32_t *hw = &fp->insn[fpc->inst_offset];
163
164	switch (dst.type) {
165	case NV30SR_TEMP:
166		if (fpc->num_regs < (dst.index + 1))
167			fpc->num_regs = dst.index + 1;
168		break;
169	case NV30SR_OUTPUT:
170		if (dst.index == 1) {
171			fp->fp_control |= 0xe;
172		} else {
173			hw[0] |= NV30_FP_OP_OUT_REG_HALF;
174		}
175		break;
176	case NV30SR_NONE:
177		hw[0] |= (1 << 30);
178		break;
179	default:
180		assert(0);
181	}
182
183	hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
184}
185
186static void
187nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
188	      struct nv30_sreg dst, int mask,
189	      struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
190{
191	struct nv30_fragment_program *fp = fpc->fp;
192	uint32_t *hw;
193
194	fpc->inst_offset = fp->insn_len;
195	grow_insns(fpc, 4);
196	hw = &fp->insn[fpc->inst_offset];
197	memset(hw, 0, sizeof(uint32_t) * 4);
198
199	if (op == NV30_FP_OP_OPCODE_KIL)
200		fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
201	hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
202	hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
203	hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);
204
205	if (sat)
206		hw[0] |= NV30_FP_OP_OUT_SAT;
207
208	if (dst.cc_update)
209		hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
210	hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
211	hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
212		  (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
213		  (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
214		  (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));
215
216	emit_dst(fpc, dst);
217	emit_src(fpc, 0, s0);
218	emit_src(fpc, 1, s1);
219	emit_src(fpc, 2, s2);
220}
221
222static void
223nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
224	    struct nv30_sreg dst, int mask,
225	    struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
226{
227	struct nv30_fragment_program *fp = fpc->fp;
228
229	nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
230
231	fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
232	fp->samplers |= (1 << unit);
233}
234
235static INLINE struct nv30_sreg
236tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
237{
238	struct nv30_sreg src;
239
240	switch (fsrc->Register.File) {
241	case TGSI_FILE_INPUT:
242		src = nv30_sr(NV30SR_INPUT,
243			      fpc->attrib_map[fsrc->Register.Index]);
244		break;
245	case TGSI_FILE_CONSTANT:
246		src = constant(fpc, fsrc->Register.Index, NULL);
247		break;
248	case TGSI_FILE_IMMEDIATE:
249		assert(fsrc->Register.Index < fpc->nr_imm);
250		src = fpc->imm[fsrc->Register.Index];
251		break;
252	case TGSI_FILE_TEMPORARY:
253		src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1);
254		if (fpc->high_temp < src.index)
255			fpc->high_temp = src.index;
256		break;
257	/* This is clearly insane, but gallium hands us shaders like this.
258	 * Luckily fragprog results are just temp regs..
259	 */
260	case TGSI_FILE_OUTPUT:
261		if (fsrc->Register.Index == fpc->colour_id)
262			return nv30_sr(NV30SR_OUTPUT, 0);
263		else
264			return nv30_sr(NV30SR_OUTPUT, 1);
265		break;
266	default:
267		NOUVEAU_ERR("bad src file\n");
268		break;
269	}
270
271	src.abs = fsrc->Register.Absolute;
272	src.negate = fsrc->Register.Negate;
273	src.swz[0] = fsrc->Register.SwizzleX;
274	src.swz[1] = fsrc->Register.SwizzleY;
275	src.swz[2] = fsrc->Register.SwizzleZ;
276	src.swz[3] = fsrc->Register.SwizzleW;
277	return src;
278}
279
280static INLINE struct nv30_sreg
281tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
282	int idx;
283
284	switch (fdst->Register.File) {
285	case TGSI_FILE_OUTPUT:
286		if (fdst->Register.Index == fpc->colour_id)
287			return nv30_sr(NV30SR_OUTPUT, 0);
288		else
289			return nv30_sr(NV30SR_OUTPUT, 1);
290		break;
291	case TGSI_FILE_TEMPORARY:
292		idx = fdst->Register.Index + 1;
293		if (fpc->high_temp < idx)
294			fpc->high_temp = idx;
295		return nv30_sr(NV30SR_TEMP, idx);
296	case TGSI_FILE_NULL:
297		return nv30_sr(NV30SR_NONE, 0);
298	default:
299		NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
300		return nv30_sr(NV30SR_NONE, 0);
301	}
302}
303
304static INLINE int
305tgsi_mask(uint tgsi)
306{
307	int mask = 0;
308
309	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
310	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
311	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
312	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
313	return mask;
314}
315
316static boolean
317src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
318	       struct nv30_sreg *src)
319{
320	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
321	struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
322	uint mask = 0;
323	uint c;
324
325	for (c = 0; c < 4; c++) {
326		switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
327		case TGSI_SWIZZLE_X:
328		case TGSI_SWIZZLE_Y:
329		case TGSI_SWIZZLE_Z:
330		case TGSI_SWIZZLE_W:
331			mask |= (1 << c);
332			break;
333		default:
334			assert(0);
335		}
336	}
337
338	if (mask == MASK_ALL)
339		return TRUE;
340
341	*src = temp(fpc);
342
343	if (mask)
344		arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
345
346	return FALSE;
347}
348
349static boolean
350nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
351				const struct tgsi_full_instruction *finst)
352{
353	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
354	struct nv30_sreg src[3], dst, tmp;
355	int mask, sat, unit = 0;
356	int ai = -1, ci = -1;
357	int i;
358
359	if (finst->Instruction.Opcode == TGSI_OPCODE_END)
360		return TRUE;
361
362	fpc->temp_temp_count = 0;
363	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
364		const struct tgsi_full_src_register *fsrc;
365
366		fsrc = &finst->Src[i];
367		if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
368			src[i] = tgsi_src(fpc, fsrc);
369		}
370	}
371
372	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
373		const struct tgsi_full_src_register *fsrc;
374
375		fsrc = &finst->Src[i];
376
377		switch (fsrc->Register.File) {
378		case TGSI_FILE_INPUT:
379		case TGSI_FILE_CONSTANT:
380		case TGSI_FILE_TEMPORARY:
381			if (!src_native_swz(fpc, fsrc, &src[i]))
382				continue;
383			break;
384		default:
385			break;
386		}
387
388		switch (fsrc->Register.File) {
389		case TGSI_FILE_INPUT:
390			if (ai == -1 || ai == fsrc->Register.Index) {
391				ai = fsrc->Register.Index;
392				src[i] = tgsi_src(fpc, fsrc);
393			} else {
394				NOUVEAU_MSG("extra src attr %d\n",
395					 fsrc->Register.Index);
396				src[i] = temp(fpc);
397				arith(fpc, 0, MOV, src[i], MASK_ALL,
398				      tgsi_src(fpc, fsrc), none, none);
399			}
400			break;
401		case TGSI_FILE_CONSTANT:
402		case TGSI_FILE_IMMEDIATE:
403			if (ci == -1 || ci == fsrc->Register.Index) {
404				ci = fsrc->Register.Index;
405				src[i] = tgsi_src(fpc, fsrc);
406			} else {
407				src[i] = temp(fpc);
408				arith(fpc, 0, MOV, src[i], MASK_ALL,
409				      tgsi_src(fpc, fsrc), none, none);
410			}
411			break;
412		case TGSI_FILE_TEMPORARY:
413			/* handled above */
414			break;
415		case TGSI_FILE_SAMPLER:
416			unit = fsrc->Register.Index;
417			break;
418		case TGSI_FILE_OUTPUT:
419			break;
420		default:
421			NOUVEAU_ERR("bad src file\n");
422			return FALSE;
423		}
424	}
425
426	dst  = tgsi_dst(fpc, &finst->Dst[0]);
427	mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
428	sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
429
430	switch (finst->Instruction.Opcode) {
431	case TGSI_OPCODE_ABS:
432		arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
433		break;
434	case TGSI_OPCODE_ADD:
435		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
436		break;
437	case TGSI_OPCODE_CMP:
438		tmp = nv30_sr(NV30SR_NONE, 0);
439		tmp.cc_update = 1;
440		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
441		dst.cc_test = NV30_VP_INST_COND_GE;
442		arith(fpc, sat, MOV, dst, mask, src[2], none, none);
443		dst.cc_test = NV30_VP_INST_COND_LT;
444		arith(fpc, sat, MOV, dst, mask, src[1], none, none);
445		break;
446	case TGSI_OPCODE_COS:
447		arith(fpc, sat, COS, dst, mask, src[0], none, none);
448		break;
449	case TGSI_OPCODE_DP3:
450		arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
451		break;
452	case TGSI_OPCODE_DP4:
453		arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
454		break;
455	case TGSI_OPCODE_DPH:
456		tmp = temp(fpc);
457		arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
458		arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
459		      swz(src[1], W, W, W, W), none);
460		break;
461	case TGSI_OPCODE_DST:
462		arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
463		break;
464	case TGSI_OPCODE_EX2:
465		arith(fpc, sat, EX2, dst, mask, src[0], none, none);
466		break;
467	case TGSI_OPCODE_FLR:
468		arith(fpc, sat, FLR, dst, mask, src[0], none, none);
469		break;
470	case TGSI_OPCODE_FRC:
471		arith(fpc, sat, FRC, dst, mask, src[0], none, none);
472		break;
473	case TGSI_OPCODE_KILP:
474		arith(fpc, 0, KIL, none, 0, none, none, none);
475		break;
476	case TGSI_OPCODE_KIL:
477		dst = nv30_sr(NV30SR_NONE, 0);
478		dst.cc_update = 1;
479		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
480		dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
481		arith(fpc, 0, KIL, dst, 0, none, none, none);
482		break;
483	case TGSI_OPCODE_LG2:
484		arith(fpc, sat, LG2, dst, mask, src[0], none, none);
485		break;
486//	case TGSI_OPCODE_LIT:
487	case TGSI_OPCODE_LRP:
488		arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
489		break;
490	case TGSI_OPCODE_MAD:
491		arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
492		break;
493	case TGSI_OPCODE_MAX:
494		arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
495		break;
496	case TGSI_OPCODE_MIN:
497		arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
498		break;
499	case TGSI_OPCODE_MOV:
500		arith(fpc, sat, MOV, dst, mask, src[0], none, none);
501		break;
502	case TGSI_OPCODE_MUL:
503		arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
504		break;
505	case TGSI_OPCODE_POW:
506		arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
507		break;
508	case TGSI_OPCODE_RCP:
509		arith(fpc, sat, RCP, dst, mask, src[0], none, none);
510		break;
511	case TGSI_OPCODE_RET:
512		assert(0);
513		break;
514	case TGSI_OPCODE_RFL:
515		arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
516		break;
517	case TGSI_OPCODE_RSQ:
518		arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
519		break;
520	case TGSI_OPCODE_SCS:
521		/* avoid overwriting the source */
522		if(src[0].swz[SWZ_X] != SWZ_X)
523		{
524			if (mask & MASK_X) {
525				arith(fpc, sat, COS, dst, MASK_X,
526				      swz(src[0], X, X, X, X), none, none);
527			}
528			if (mask & MASK_Y) {
529				arith(fpc, sat, SIN, dst, MASK_Y,
530				      swz(src[0], X, X, X, X), none, none);
531			}
532		}
533		else
534		{
535			if (mask & MASK_Y) {
536				arith(fpc, sat, SIN, dst, MASK_Y,
537				      swz(src[0], X, X, X, X), none, none);
538			}
539			if (mask & MASK_X) {
540				arith(fpc, sat, COS, dst, MASK_X,
541				      swz(src[0], X, X, X, X), none, none);
542			}
543		}
544		break;
545	case TGSI_OPCODE_SIN:
546		arith(fpc, sat, SIN, dst, mask, src[0], none, none);
547		break;
548	case TGSI_OPCODE_SGE:
549		arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
550		break;
551	case TGSI_OPCODE_SGT:
552		arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
553		break;
554	case TGSI_OPCODE_SLT:
555		arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
556		break;
557	case TGSI_OPCODE_SUB:
558		arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
559		break;
560	case TGSI_OPCODE_TEX:
561		tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
562		break;
563	case TGSI_OPCODE_TXB:
564		tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
565		break;
566	case TGSI_OPCODE_TXP:
567		tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
568		break;
569	case TGSI_OPCODE_XPD:
570		tmp = temp(fpc);
571		arith(fpc, 0, MUL, tmp, mask,
572		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
573		arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
574		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
575		      neg(tmp));
576		break;
577	default:
578		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
579		return FALSE;
580	}
581
582	return TRUE;
583}
584
585static boolean
586nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
587				const struct tgsi_full_declaration *fdec)
588{
589	int hw;
590
591	switch (fdec->Semantic.Name) {
592	case TGSI_SEMANTIC_POSITION:
593		hw = NV30_FP_OP_INPUT_SRC_POSITION;
594		break;
595	case TGSI_SEMANTIC_COLOR:
596		if (fdec->Semantic.Index == 0) {
597			hw = NV30_FP_OP_INPUT_SRC_COL0;
598		} else
599		if (fdec->Semantic.Index == 1) {
600			hw = NV30_FP_OP_INPUT_SRC_COL1;
601		} else {
602			NOUVEAU_ERR("bad colour semantic index\n");
603			return FALSE;
604		}
605		break;
606	case TGSI_SEMANTIC_FOG:
607		hw = NV30_FP_OP_INPUT_SRC_FOGC;
608		break;
609	case TGSI_SEMANTIC_GENERIC:
610		if (fdec->Semantic.Index <= 7) {
611			hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
612						     Index);
613		} else {
614			NOUVEAU_ERR("bad generic semantic index\n");
615			return FALSE;
616		}
617		break;
618	default:
619		NOUVEAU_ERR("bad input semantic\n");
620		return FALSE;
621	}
622
623	fpc->attrib_map[fdec->Range.First] = hw;
624	return TRUE;
625}
626
627static boolean
628nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
629				const struct tgsi_full_declaration *fdec)
630{
631	switch (fdec->Semantic.Name) {
632	case TGSI_SEMANTIC_POSITION:
633		fpc->depth_id = fdec->Range.First;
634		break;
635	case TGSI_SEMANTIC_COLOR:
636		fpc->colour_id = fdec->Range.First;
637		break;
638	default:
639		NOUVEAU_ERR("bad output semantic\n");
640		return FALSE;
641	}
642
643	return TRUE;
644}
645
646static boolean
647nv30_fragprog_prepare(struct nv30_fpc *fpc)
648{
649	struct tgsi_parse_context p;
650	/*int high_temp = -1, i;*/
651
652	tgsi_parse_init(&p, fpc->fp->pipe.tokens);
653	while (!tgsi_parse_end_of_tokens(&p)) {
654		const union tgsi_full_token *tok = &p.FullToken;
655
656		tgsi_parse_token(&p);
657		switch(tok->Token.Type) {
658		case TGSI_TOKEN_TYPE_DECLARATION:
659		{
660			const struct tgsi_full_declaration *fdec;
661			fdec = &p.FullToken.FullDeclaration;
662			switch (fdec->Declaration.File) {
663			case TGSI_FILE_INPUT:
664				if (!nv30_fragprog_parse_decl_attrib(fpc, fdec))
665					goto out_err;
666				break;
667			case TGSI_FILE_OUTPUT:
668				if (!nv30_fragprog_parse_decl_output(fpc, fdec))
669					goto out_err;
670				break;
671			/*case TGSI_FILE_TEMPORARY:
672				if (fdec->Range.Last > high_temp) {
673					high_temp =
674						fdec->Range.Last;
675				}
676				break;*/
677			default:
678				break;
679			}
680		}
681			break;
682		case TGSI_TOKEN_TYPE_IMMEDIATE:
683		{
684			struct tgsi_full_immediate *imm;
685			float vals[4];
686
687			imm = &p.FullToken.FullImmediate;
688			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
689			assert(fpc->nr_imm < MAX_IMM);
690
691			vals[0] = imm->u[0].Float;
692			vals[1] = imm->u[1].Float;
693			vals[2] = imm->u[2].Float;
694			vals[3] = imm->u[3].Float;
695			fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
696		}
697			break;
698		default:
699			break;
700		}
701	}
702	tgsi_parse_free(&p);
703
704	/*if (++high_temp) {
705		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
706		for (i = 0; i < high_temp; i++)
707			fpc->r_temp[i] = temp(fpc);
708		fpc->r_temps_discard = 0;
709	}*/
710
711	return TRUE;
712
713out_err:
714	/*if (fpc->r_temp)
715		FREE(fpc->r_temp);*/
716	tgsi_parse_free(&p);
717	return FALSE;
718}
719
720static void
721nv30_fragprog_translate(struct nv30_context *nv30,
722			struct nv30_fragment_program *fp)
723{
724	struct tgsi_parse_context parse;
725	struct nv30_fpc *fpc = NULL;
726
727	tgsi_dump(fp->pipe.tokens,0);
728
729	fpc = CALLOC(1, sizeof(struct nv30_fpc));
730	if (!fpc)
731		return;
732	fpc->fp = fp;
733	fpc->high_temp = -1;
734	fpc->num_regs = 2;
735
736	if (!nv30_fragprog_prepare(fpc)) {
737		FREE(fpc);
738		return;
739	}
740
741	tgsi_parse_init(&parse, fp->pipe.tokens);
742
743	while (!tgsi_parse_end_of_tokens(&parse)) {
744		tgsi_parse_token(&parse);
745
746		switch (parse.FullToken.Token.Type) {
747		case TGSI_TOKEN_TYPE_INSTRUCTION:
748		{
749			const struct tgsi_full_instruction *finst;
750
751			finst = &parse.FullToken.FullInstruction;
752			if (!nv30_fragprog_parse_instruction(fpc, finst))
753				goto out_err;
754		}
755			break;
756		default:
757			break;
758		}
759	}
760
761	fp->fp_control |= (fpc->num_regs-1)/2;
762	fp->fp_reg_control = (1<<16)|0x4;
763
764	/* Terminate final instruction */
765	fp->insn[fpc->inst_offset] |= 0x00000001;
766
767	/* Append NOP + END instruction, may or may not be necessary. */
768	fpc->inst_offset = fp->insn_len;
769	grow_insns(fpc, 4);
770	fp->insn[fpc->inst_offset + 0] = 0x00000001;
771	fp->insn[fpc->inst_offset + 1] = 0x00000000;
772	fp->insn[fpc->inst_offset + 2] = 0x00000000;
773	fp->insn[fpc->inst_offset + 3] = 0x00000000;
774
775	fp->translated = TRUE;
776	fp->on_hw = FALSE;
777out_err:
778	tgsi_parse_free(&parse);
779	FREE(fpc);
780}
781
782static void
783nv30_fragprog_upload(struct nv30_context *nv30,
784		     struct nv30_fragment_program *fp)
785{
786	struct pipe_screen *pscreen = nv30->pipe.screen;
787	const uint32_t le = 1;
788	uint32_t *map;
789	int i;
790
791	map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
792
793#if 0
794	for (i = 0; i < fp->insn_len; i++) {
795		fflush(stdout); fflush(stderr);
796		NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
797		fflush(stdout); fflush(stderr);
798	}
799#endif
800
801	if ((*(const uint8_t *)&le)) {
802		for (i = 0; i < fp->insn_len; i++) {
803			map[i] = fp->insn[i];
804		}
805	} else {
806		/* Weird swapping for big-endian chips */
807		for (i = 0; i < fp->insn_len; i++) {
808			map[i] = ((fp->insn[i] & 0xffff) << 16) |
809				  ((fp->insn[i] >> 16) & 0xffff);
810		}
811	}
812
813	pipe_buffer_unmap(pscreen, fp->buffer);
814}
815
816static boolean
817nv30_fragprog_validate(struct nv30_context *nv30)
818{
819	struct nv30_fragment_program *fp = nv30->fragprog;
820	struct pipe_buffer *constbuf =
821		nv30->constbuf[PIPE_SHADER_FRAGMENT];
822	struct pipe_screen *pscreen = nv30->pipe.screen;
823	struct nouveau_stateobj *so;
824	boolean new_consts = FALSE;
825	int i;
826
827	if (fp->translated)
828		goto update_constants;
829
830	/*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
831	nv30_fragprog_translate(nv30, fp);
832	if (!fp->translated) {
833		/*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
834		return FALSE;
835	}
836
837	fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
838	nv30_fragprog_upload(nv30, fp);
839
840	so = so_new(8, 1);
841	so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
842	so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
843		      NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
844		      NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
845		      NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
846	so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1);
847	so_data  (so, fp->fp_control);
848	so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1);
849	so_data  (so, fp->fp_reg_control);
850	so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1);
851	so_data  (so, fp->samplers);
852	so_ref(so, &fp->so);
853	so_ref(NULL, &so);
854
855update_constants:
856	if (fp->nr_consts) {
857		float *map;
858
859		map = pipe_buffer_map(pscreen, constbuf,
860				      PIPE_BUFFER_USAGE_CPU_READ);
861		for (i = 0; i < fp->nr_consts; i++) {
862			struct nv30_fragment_program_data *fpd = &fp->consts[i];
863			uint32_t *p = &fp->insn[fpd->offset];
864			uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
865
866			if (!memcmp(p, cb, 4 * sizeof(float)))
867				continue;
868			memcpy(p, cb, 4 * sizeof(float));
869			new_consts = TRUE;
870		}
871		pipe_buffer_unmap(pscreen, constbuf);
872
873		if (new_consts)
874			nv30_fragprog_upload(nv30, fp);
875	}
876
877	if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) {
878		so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]);
879		return TRUE;
880	}
881
882	return FALSE;
883}
884
885void
886nv30_fragprog_destroy(struct nv30_context *nv30,
887		      struct nv30_fragment_program *fp)
888{
889	if (fp->insn_len)
890		FREE(fp->insn);
891}
892
893struct nv30_state_entry nv30_state_fragprog = {
894	.validate = nv30_fragprog_validate,
895	.dirty = {
896		.pipe = NV30_NEW_FRAGPROG,
897		.hw = NV30_STATE_FRAGPROG
898	}
899};
900