nv30_fragprog.c revision 7d6c8f980d1e23ad6f557d650e89c715861a3b0c
1#include "pipe/p_context.h"
2#include "pipe/p_defines.h"
3#include "pipe/p_state.h"
4#include "pipe/p_inlines.h"
5
6#include "pipe/p_shader_tokens.h"
7#include "tgsi/tgsi_dump.h"
8#include "tgsi/tgsi_parse.h"
9#include "tgsi/tgsi_util.h"
10
11#include "nv30_context.h"
12
13#define SWZ_X 0
14#define SWZ_Y 1
15#define SWZ_Z 2
16#define SWZ_W 3
17#define MASK_X 1
18#define MASK_Y 2
19#define MASK_Z 4
20#define MASK_W 8
21#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
22#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
23#define DEF_CTEST NV30_FP_OP_COND_TR
24#include "nv30_shader.h"
25
26#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
27#define neg(s) nv30_sr_neg((s))
28#define abs(s) nv30_sr_abs((s))
29#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
30
31#define MAX_CONSTS 128
32#define MAX_IMM 32
33struct nv30_fpc {
34	struct nv30_fragment_program *fp;
35
36	uint attrib_map[PIPE_MAX_SHADER_INPUTS];
37
38	int high_temp;
39	int temp_temp_count;
40	int num_regs;
41
42	uint depth_id;
43	uint colour_id;
44
45	unsigned inst_offset;
46
47	struct {
48		int pipe;
49		float vals[4];
50	} consts[MAX_CONSTS];
51	int nr_consts;
52
53	struct nv30_sreg imm[MAX_IMM];
54	unsigned nr_imm;
55};
56
57static INLINE struct nv30_sreg
58temp(struct nv30_fpc *fpc)
59{
60	int idx;
61
62	idx  = fpc->temp_temp_count++;
63	idx += fpc->high_temp + 1;
64	return nv30_sr(NV30SR_TEMP, idx);
65}
66
67static INLINE struct nv30_sreg
68constant(struct nv30_fpc *fpc, int pipe, float vals[4])
69{
70	int idx;
71
72	if (fpc->nr_consts == MAX_CONSTS)
73		assert(0);
74	idx = fpc->nr_consts++;
75
76	fpc->consts[idx].pipe = pipe;
77	if (pipe == -1)
78		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
79	return nv30_sr(NV30SR_CONST, idx);
80}
81
82#define arith(cc,s,o,d,m,s0,s1,s2) \
83	nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
84			(d), (m), (s0), (s1), (s2))
85#define tex(cc,s,o,u,d,m,s0,s1,s2) \
86	nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
87		    (d), (m), (s0), none, none)
88
89static void
90grow_insns(struct nv30_fpc *fpc, int size)
91{
92	struct nv30_fragment_program *fp = fpc->fp;
93
94	fp->insn_len += size;
95	fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
96}
97
98static void
99emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
100{
101	struct nv30_fragment_program *fp = fpc->fp;
102	uint32_t *hw = &fp->insn[fpc->inst_offset];
103	uint32_t sr = 0;
104
105	switch (src.type) {
106	case NV30SR_INPUT:
107		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
108		hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
109		break;
110	case NV30SR_OUTPUT:
111		sr |= NV30_FP_REG_SRC_HALF;
112		/* fall-through */
113	case NV30SR_TEMP:
114		sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
115		sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
116		break;
117	case NV30SR_CONST:
118		grow_insns(fpc, 4);
119		hw = &fp->insn[fpc->inst_offset];
120		if (fpc->consts[src.index].pipe >= 0) {
121			struct nv30_fragment_program_data *fpd;
122
123			fp->consts = realloc(fp->consts, ++fp->nr_consts *
124					     sizeof(*fpd));
125			fpd = &fp->consts[fp->nr_consts - 1];
126			fpd->offset = fpc->inst_offset + 4;
127			fpd->index = fpc->consts[src.index].pipe;
128			memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
129		} else {
130			memcpy(&fp->insn[fpc->inst_offset + 4],
131				fpc->consts[src.index].vals,
132				sizeof(uint32_t) * 4);
133		}
134
135		sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
136		break;
137	case NV30SR_NONE:
138		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
139		break;
140	default:
141		assert(0);
142	}
143
144	if (src.negate)
145		sr |= NV30_FP_REG_NEGATE;
146
147	if (src.abs)
148		hw[1] |= (1 << (29 + pos));
149
150	sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
151	       (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
152	       (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
153	       (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));
154
155	hw[pos + 1] |= sr;
156}
157
158static void
159emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
160{
161	struct nv30_fragment_program *fp = fpc->fp;
162	uint32_t *hw = &fp->insn[fpc->inst_offset];
163
164	switch (dst.type) {
165	case NV30SR_TEMP:
166		if (fpc->num_regs < (dst.index + 1))
167			fpc->num_regs = dst.index + 1;
168		break;
169	case NV30SR_OUTPUT:
170		if (dst.index == 1) {
171			fp->fp_control |= 0xe;
172		} else {
173			hw[0] |= NV30_FP_OP_OUT_REG_HALF;
174		}
175		break;
176	case NV30SR_NONE:
177		hw[0] |= (1 << 30);
178		break;
179	default:
180		assert(0);
181	}
182
183	hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
184}
185
186static void
187nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
188	      struct nv30_sreg dst, int mask,
189	      struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
190{
191	struct nv30_fragment_program *fp = fpc->fp;
192	uint32_t *hw;
193
194	fpc->inst_offset = fp->insn_len;
195	grow_insns(fpc, 4);
196	hw = &fp->insn[fpc->inst_offset];
197	memset(hw, 0, sizeof(uint32_t) * 4);
198
199	if (op == NV30_FP_OP_OPCODE_KIL)
200		fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
201	hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
202	hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
203	hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);
204
205	if (sat)
206		hw[0] |= NV30_FP_OP_OUT_SAT;
207
208	if (dst.cc_update)
209		hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
210	hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
211	hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
212		  (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
213		  (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
214		  (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));
215
216	emit_dst(fpc, dst);
217	emit_src(fpc, 0, s0);
218	emit_src(fpc, 1, s1);
219	emit_src(fpc, 2, s2);
220}
221
222static void
223nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
224	    struct nv30_sreg dst, int mask,
225	    struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
226{
227	struct nv30_fragment_program *fp = fpc->fp;
228
229	nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
230
231	fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
232	fp->samplers |= (1 << unit);
233}
234
235static INLINE struct nv30_sreg
236tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
237{
238	struct nv30_sreg src;
239
240	switch (fsrc->SrcRegister.File) {
241	case TGSI_FILE_INPUT:
242		src = nv30_sr(NV30SR_INPUT,
243			      fpc->attrib_map[fsrc->SrcRegister.Index]);
244		break;
245	case TGSI_FILE_CONSTANT:
246		src = constant(fpc, fsrc->SrcRegister.Index, NULL);
247		break;
248	case TGSI_FILE_IMMEDIATE:
249		assert(fsrc->SrcRegister.Index < fpc->nr_imm);
250		src = fpc->imm[fsrc->SrcRegister.Index];
251		break;
252	case TGSI_FILE_TEMPORARY:
253		src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1);
254		if (fpc->high_temp < src.index)
255			fpc->high_temp = src.index;
256		break;
257	/* This is clearly insane, but gallium hands us shaders like this.
258	 * Luckily fragprog results are just temp regs..
259	 */
260	case TGSI_FILE_OUTPUT:
261		if (fsrc->SrcRegister.Index == fpc->colour_id)
262			return nv30_sr(NV30SR_OUTPUT, 0);
263		else
264			return nv30_sr(NV30SR_OUTPUT, 1);
265		break;
266	default:
267		NOUVEAU_ERR("bad src file\n");
268		break;
269	}
270
271	src.abs = fsrc->SrcRegister.Absolute;
272	src.negate = fsrc->SrcRegister.Negate;
273	src.swz[0] = fsrc->SrcRegister.SwizzleX;
274	src.swz[1] = fsrc->SrcRegister.SwizzleY;
275	src.swz[2] = fsrc->SrcRegister.SwizzleZ;
276	src.swz[3] = fsrc->SrcRegister.SwizzleW;
277	return src;
278}
279
280static INLINE struct nv30_sreg
281tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
282	int idx;
283
284	switch (fdst->DstRegister.File) {
285	case TGSI_FILE_OUTPUT:
286		if (fdst->DstRegister.Index == fpc->colour_id)
287			return nv30_sr(NV30SR_OUTPUT, 0);
288		else
289			return nv30_sr(NV30SR_OUTPUT, 1);
290		break;
291	case TGSI_FILE_TEMPORARY:
292		idx = fdst->DstRegister.Index + 1;
293		if (fpc->high_temp < idx)
294			fpc->high_temp = idx;
295		return nv30_sr(NV30SR_TEMP, idx);
296	case TGSI_FILE_NULL:
297		return nv30_sr(NV30SR_NONE, 0);
298	default:
299		NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
300		return nv30_sr(NV30SR_NONE, 0);
301	}
302}
303
304static INLINE int
305tgsi_mask(uint tgsi)
306{
307	int mask = 0;
308
309	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
310	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
311	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
312	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
313	return mask;
314}
315
316static boolean
317src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
318	       struct nv30_sreg *src)
319{
320	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
321	struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
322	uint mask = 0;
323	uint c;
324
325	for (c = 0; c < 4; c++) {
326		switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
327		case TGSI_SWIZZLE_X:
328		case TGSI_SWIZZLE_Y:
329		case TGSI_SWIZZLE_Z:
330		case TGSI_SWIZZLE_W:
331			mask |= (1 << c);
332			break;
333		default:
334			assert(0);
335		}
336	}
337
338	if (mask == MASK_ALL)
339		return TRUE;
340
341	*src = temp(fpc);
342
343	if (mask)
344		arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
345
346	return FALSE;
347}
348
349static boolean
350nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
351				const struct tgsi_full_instruction *finst)
352{
353	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
354	struct nv30_sreg src[3], dst, tmp;
355	int mask, sat, unit = 0;
356	int ai = -1, ci = -1;
357	int i;
358
359	if (finst->Instruction.Opcode == TGSI_OPCODE_END)
360		return TRUE;
361
362	fpc->temp_temp_count = 0;
363	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
364		const struct tgsi_full_src_register *fsrc;
365
366		fsrc = &finst->Src[i];
367		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
368			src[i] = tgsi_src(fpc, fsrc);
369		}
370	}
371
372	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
373		const struct tgsi_full_src_register *fsrc;
374
375		fsrc = &finst->Src[i];
376
377		switch (fsrc->SrcRegister.File) {
378		case TGSI_FILE_INPUT:
379		case TGSI_FILE_CONSTANT:
380		case TGSI_FILE_TEMPORARY:
381			if (!src_native_swz(fpc, fsrc, &src[i]))
382				continue;
383			break;
384		default:
385			break;
386		}
387
388		switch (fsrc->SrcRegister.File) {
389		case TGSI_FILE_INPUT:
390			if (ai == -1 || ai == fsrc->SrcRegister.Index) {
391				ai = fsrc->SrcRegister.Index;
392				src[i] = tgsi_src(fpc, fsrc);
393			} else {
394				NOUVEAU_MSG("extra src attr %d\n",
395					 fsrc->SrcRegister.Index);
396				src[i] = temp(fpc);
397				arith(fpc, 0, MOV, src[i], MASK_ALL,
398				      tgsi_src(fpc, fsrc), none, none);
399			}
400			break;
401		case TGSI_FILE_CONSTANT:
402		case TGSI_FILE_IMMEDIATE:
403			if (ci == -1 || ci == fsrc->SrcRegister.Index) {
404				ci = fsrc->SrcRegister.Index;
405				src[i] = tgsi_src(fpc, fsrc);
406			} else {
407				src[i] = temp(fpc);
408				arith(fpc, 0, MOV, src[i], MASK_ALL,
409				      tgsi_src(fpc, fsrc), none, none);
410			}
411			break;
412		case TGSI_FILE_TEMPORARY:
413			/* handled above */
414			break;
415		case TGSI_FILE_SAMPLER:
416			unit = fsrc->SrcRegister.Index;
417			break;
418		case TGSI_FILE_OUTPUT:
419			break;
420		default:
421			NOUVEAU_ERR("bad src file\n");
422			return FALSE;
423		}
424	}
425
426	dst  = tgsi_dst(fpc, &finst->Dst[0]);
427	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
428	sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
429
430	switch (finst->Instruction.Opcode) {
431	case TGSI_OPCODE_ABS:
432		arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
433		break;
434	case TGSI_OPCODE_ADD:
435		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
436		break;
437	case TGSI_OPCODE_CMP:
438		tmp = temp(fpc);
439		arith(fpc, sat, MOV, dst, mask, src[2], none, none);
440		tmp.cc_update = 1;
441		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
442		dst.cc_test = NV30_VP_INST_COND_LT;
443		arith(fpc, sat, MOV, dst, mask, src[1], none, none);
444		break;
445	case TGSI_OPCODE_COS:
446		arith(fpc, sat, COS, dst, mask, src[0], none, none);
447		break;
448	case TGSI_OPCODE_DP3:
449		arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
450		break;
451	case TGSI_OPCODE_DP4:
452		arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
453		break;
454	case TGSI_OPCODE_DPH:
455		tmp = temp(fpc);
456		arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
457		arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
458		      swz(src[1], W, W, W, W), none);
459		break;
460	case TGSI_OPCODE_DST:
461		arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
462		break;
463	case TGSI_OPCODE_EX2:
464		arith(fpc, sat, EX2, dst, mask, src[0], none, none);
465		break;
466	case TGSI_OPCODE_FLR:
467		arith(fpc, sat, FLR, dst, mask, src[0], none, none);
468		break;
469	case TGSI_OPCODE_FRC:
470		arith(fpc, sat, FRC, dst, mask, src[0], none, none);
471		break;
472	case TGSI_OPCODE_KILP:
473		arith(fpc, 0, KIL, none, 0, none, none, none);
474		break;
475	case TGSI_OPCODE_KIL:
476		dst = nv30_sr(NV30SR_NONE, 0);
477		dst.cc_update = 1;
478		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
479		dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
480		arith(fpc, 0, KIL, dst, 0, none, none, none);
481		break;
482	case TGSI_OPCODE_LG2:
483		arith(fpc, sat, LG2, dst, mask, src[0], none, none);
484		break;
485//	case TGSI_OPCODE_LIT:
486	case TGSI_OPCODE_LRP:
487		arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
488		break;
489	case TGSI_OPCODE_MAD:
490		arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
491		break;
492	case TGSI_OPCODE_MAX:
493		arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
494		break;
495	case TGSI_OPCODE_MIN:
496		arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
497		break;
498	case TGSI_OPCODE_MOV:
499		arith(fpc, sat, MOV, dst, mask, src[0], none, none);
500		break;
501	case TGSI_OPCODE_MUL:
502		arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
503		break;
504	case TGSI_OPCODE_POW:
505		arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
506		break;
507	case TGSI_OPCODE_RCP:
508		arith(fpc, sat, RCP, dst, mask, src[0], none, none);
509		break;
510	case TGSI_OPCODE_RET:
511		assert(0);
512		break;
513	case TGSI_OPCODE_RFL:
514		arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
515		break;
516	case TGSI_OPCODE_RSQ:
517		arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
518		break;
519	case TGSI_OPCODE_SCS:
520		if (mask & MASK_X) {
521			arith(fpc, sat, COS, dst, MASK_X,
522			      swz(src[0], X, X, X, X), none, none);
523		}
524		if (mask & MASK_Y) {
525			arith(fpc, sat, SIN, dst, MASK_Y,
526			      swz(src[0], X, X, X, X), none, none);
527		}
528		break;
529	case TGSI_OPCODE_SIN:
530		arith(fpc, sat, SIN, dst, mask, src[0], none, none);
531		break;
532	case TGSI_OPCODE_SGE:
533		arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
534		break;
535	case TGSI_OPCODE_SGT:
536		arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
537		break;
538	case TGSI_OPCODE_SLT:
539		arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
540		break;
541	case TGSI_OPCODE_SUB:
542		arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
543		break;
544	case TGSI_OPCODE_TEX:
545		tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
546		break;
547	case TGSI_OPCODE_TXB:
548		tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
549		break;
550	case TGSI_OPCODE_TXP:
551		tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
552		break;
553	case TGSI_OPCODE_XPD:
554		tmp = temp(fpc);
555		arith(fpc, 0, MUL, tmp, mask,
556		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
557		arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
558		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
559		      neg(tmp));
560		break;
561	default:
562		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
563		return FALSE;
564	}
565
566	return TRUE;
567}
568
569static boolean
570nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
571				const struct tgsi_full_declaration *fdec)
572{
573	int hw;
574
575	switch (fdec->Semantic.Name) {
576	case TGSI_SEMANTIC_POSITION:
577		hw = NV30_FP_OP_INPUT_SRC_POSITION;
578		break;
579	case TGSI_SEMANTIC_COLOR:
580		if (fdec->Semantic.Index == 0) {
581			hw = NV30_FP_OP_INPUT_SRC_COL0;
582		} else
583		if (fdec->Semantic.Index == 1) {
584			hw = NV30_FP_OP_INPUT_SRC_COL1;
585		} else {
586			NOUVEAU_ERR("bad colour semantic index\n");
587			return FALSE;
588		}
589		break;
590	case TGSI_SEMANTIC_FOG:
591		hw = NV30_FP_OP_INPUT_SRC_FOGC;
592		break;
593	case TGSI_SEMANTIC_GENERIC:
594		if (fdec->Semantic.Index <= 7) {
595			hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
596						     Index);
597		} else {
598			NOUVEAU_ERR("bad generic semantic index\n");
599			return FALSE;
600		}
601		break;
602	default:
603		NOUVEAU_ERR("bad input semantic\n");
604		return FALSE;
605	}
606
607	fpc->attrib_map[fdec->DeclarationRange.First] = hw;
608	return TRUE;
609}
610
611static boolean
612nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
613				const struct tgsi_full_declaration *fdec)
614{
615	switch (fdec->Semantic.Name) {
616	case TGSI_SEMANTIC_POSITION:
617		fpc->depth_id = fdec->DeclarationRange.First;
618		break;
619	case TGSI_SEMANTIC_COLOR:
620		fpc->colour_id = fdec->DeclarationRange.First;
621		break;
622	default:
623		NOUVEAU_ERR("bad output semantic\n");
624		return FALSE;
625	}
626
627	return TRUE;
628}
629
630static boolean
631nv30_fragprog_prepare(struct nv30_fpc *fpc)
632{
633	struct tgsi_parse_context p;
634	/*int high_temp = -1, i;*/
635
636	tgsi_parse_init(&p, fpc->fp->pipe.tokens);
637	while (!tgsi_parse_end_of_tokens(&p)) {
638		const union tgsi_full_token *tok = &p.FullToken;
639
640		tgsi_parse_token(&p);
641		switch(tok->Token.Type) {
642		case TGSI_TOKEN_TYPE_DECLARATION:
643		{
644			const struct tgsi_full_declaration *fdec;
645			fdec = &p.FullToken.FullDeclaration;
646			switch (fdec->Declaration.File) {
647			case TGSI_FILE_INPUT:
648				if (!nv30_fragprog_parse_decl_attrib(fpc, fdec))
649					goto out_err;
650				break;
651			case TGSI_FILE_OUTPUT:
652				if (!nv30_fragprog_parse_decl_output(fpc, fdec))
653					goto out_err;
654				break;
655			/*case TGSI_FILE_TEMPORARY:
656				if (fdec->DeclarationRange.Last > high_temp) {
657					high_temp =
658						fdec->DeclarationRange.Last;
659				}
660				break;*/
661			default:
662				break;
663			}
664		}
665			break;
666		case TGSI_TOKEN_TYPE_IMMEDIATE:
667		{
668			struct tgsi_full_immediate *imm;
669			float vals[4];
670
671			imm = &p.FullToken.FullImmediate;
672			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
673			assert(fpc->nr_imm < MAX_IMM);
674
675			vals[0] = imm->u[0].Float;
676			vals[1] = imm->u[1].Float;
677			vals[2] = imm->u[2].Float;
678			vals[3] = imm->u[3].Float;
679			fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
680		}
681			break;
682		default:
683			break;
684		}
685	}
686	tgsi_parse_free(&p);
687
688	/*if (++high_temp) {
689		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
690		for (i = 0; i < high_temp; i++)
691			fpc->r_temp[i] = temp(fpc);
692		fpc->r_temps_discard = 0;
693	}*/
694
695	return TRUE;
696
697out_err:
698	/*if (fpc->r_temp)
699		FREE(fpc->r_temp);*/
700	tgsi_parse_free(&p);
701	return FALSE;
702}
703
704static void
705nv30_fragprog_translate(struct nv30_context *nv30,
706			struct nv30_fragment_program *fp)
707{
708	struct tgsi_parse_context parse;
709	struct nv30_fpc *fpc = NULL;
710
711	tgsi_dump(fp->pipe.tokens,0);
712
713	fpc = CALLOC(1, sizeof(struct nv30_fpc));
714	if (!fpc)
715		return;
716	fpc->fp = fp;
717	fpc->high_temp = -1;
718	fpc->num_regs = 2;
719
720	if (!nv30_fragprog_prepare(fpc)) {
721		FREE(fpc);
722		return;
723	}
724
725	tgsi_parse_init(&parse, fp->pipe.tokens);
726
727	while (!tgsi_parse_end_of_tokens(&parse)) {
728		tgsi_parse_token(&parse);
729
730		switch (parse.FullToken.Token.Type) {
731		case TGSI_TOKEN_TYPE_INSTRUCTION:
732		{
733			const struct tgsi_full_instruction *finst;
734
735			finst = &parse.FullToken.FullInstruction;
736			if (!nv30_fragprog_parse_instruction(fpc, finst))
737				goto out_err;
738		}
739			break;
740		default:
741			break;
742		}
743	}
744
745	fp->fp_control |= (fpc->num_regs-1)/2;
746	fp->fp_reg_control = (1<<16)|0x4;
747
748	/* Terminate final instruction */
749	fp->insn[fpc->inst_offset] |= 0x00000001;
750
751	/* Append NOP + END instruction, may or may not be necessary. */
752	fpc->inst_offset = fp->insn_len;
753	grow_insns(fpc, 4);
754	fp->insn[fpc->inst_offset + 0] = 0x00000001;
755	fp->insn[fpc->inst_offset + 1] = 0x00000000;
756	fp->insn[fpc->inst_offset + 2] = 0x00000000;
757	fp->insn[fpc->inst_offset + 3] = 0x00000000;
758
759	fp->translated = TRUE;
760	fp->on_hw = FALSE;
761out_err:
762	tgsi_parse_free(&parse);
763	FREE(fpc);
764}
765
766static void
767nv30_fragprog_upload(struct nv30_context *nv30,
768		     struct nv30_fragment_program *fp)
769{
770	struct pipe_screen *pscreen = nv30->pipe.screen;
771	const uint32_t le = 1;
772	uint32_t *map;
773	int i;
774
775	map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
776
777#if 0
778	for (i = 0; i < fp->insn_len; i++) {
779		fflush(stdout); fflush(stderr);
780		NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
781		fflush(stdout); fflush(stderr);
782	}
783#endif
784
785	if ((*(const uint8_t *)&le)) {
786		for (i = 0; i < fp->insn_len; i++) {
787			map[i] = fp->insn[i];
788		}
789	} else {
790		/* Weird swapping for big-endian chips */
791		for (i = 0; i < fp->insn_len; i++) {
792			map[i] = ((fp->insn[i] & 0xffff) << 16) |
793				  ((fp->insn[i] >> 16) & 0xffff);
794		}
795	}
796
797	pipe_buffer_unmap(pscreen, fp->buffer);
798}
799
800static boolean
801nv30_fragprog_validate(struct nv30_context *nv30)
802{
803	struct nv30_fragment_program *fp = nv30->fragprog;
804	struct pipe_buffer *constbuf =
805		nv30->constbuf[PIPE_SHADER_FRAGMENT];
806	struct pipe_screen *pscreen = nv30->pipe.screen;
807	struct nouveau_stateobj *so;
808	boolean new_consts = FALSE;
809	int i;
810
811	if (fp->translated)
812		goto update_constants;
813
814	/*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
815	nv30_fragprog_translate(nv30, fp);
816	if (!fp->translated) {
817		/*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
818		return FALSE;
819	}
820
821	fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
822	nv30_fragprog_upload(nv30, fp);
823
824	so = so_new(8, 1);
825	so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
826	so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
827		      NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
828		      NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
829		      NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
830	so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1);
831	so_data  (so, fp->fp_control);
832	so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1);
833	so_data  (so, fp->fp_reg_control);
834	so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1);
835	so_data  (so, fp->samplers);
836	so_ref(so, &fp->so);
837	so_ref(NULL, &so);
838
839update_constants:
840	if (fp->nr_consts) {
841		float *map;
842
843		map = pipe_buffer_map(pscreen, constbuf,
844				      PIPE_BUFFER_USAGE_CPU_READ);
845		for (i = 0; i < fp->nr_consts; i++) {
846			struct nv30_fragment_program_data *fpd = &fp->consts[i];
847			uint32_t *p = &fp->insn[fpd->offset];
848			uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
849
850			if (!memcmp(p, cb, 4 * sizeof(float)))
851				continue;
852			memcpy(p, cb, 4 * sizeof(float));
853			new_consts = TRUE;
854		}
855		pipe_buffer_unmap(pscreen, constbuf);
856
857		if (new_consts)
858			nv30_fragprog_upload(nv30, fp);
859	}
860
861	if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) {
862		so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]);
863		return TRUE;
864	}
865
866	return FALSE;
867}
868
869void
870nv30_fragprog_destroy(struct nv30_context *nv30,
871		      struct nv30_fragment_program *fp)
872{
873	if (fp->insn_len)
874		FREE(fp->insn);
875}
876
877struct nv30_state_entry nv30_state_fragprog = {
878	.validate = nv30_fragprog_validate,
879	.dirty = {
880		.pipe = NV30_NEW_FRAGPROG,
881		.hw = NV30_STATE_FRAGPROG
882	}
883};
884