nv30_vertprog.c revision fd31f92cea0ce8613a22d8f4b3c75b340bcc5689
1#include "pipe/p_context.h"
2#include "pipe/p_defines.h"
3#include "pipe/p_state.h"
4#include "pipe/p_inlines.h"
5
6#include "pipe/p_shader_tokens.h"
7#include "tgsi/tgsi_parse.h"
8#include "tgsi/tgsi_dump.h"
9
10#include "nv30_context.h"
11#include "nv30_state.h"
12
13/* TODO (at least...):
14 *  1. Indexed consts  + ARL
15 *  2. Arb. swz/negation
16 *  3. NV_vp11, NV_vp2, NV_vp3 features
17 *       - extra arith opcodes
18 *       - branching
19 *       - texture sampling
20 *       - indexed attribs
21 *       - indexed results
22 *  4. bugs
23 */
24
25#define SWZ_X 0
26#define SWZ_Y 1
27#define SWZ_Z 2
28#define SWZ_W 3
29#define MASK_X 8
30#define MASK_Y 4
31#define MASK_Z 2
32#define MASK_W 1
33#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
34#define DEF_SCALE 0
35#define DEF_CTEST 0
36#include "nv30_shader.h"
37
38#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
39#define neg(s) nv30_sr_neg((s))
40#define abs(s) nv30_sr_abs((s))
41
42struct nv30_vpc {
43	struct nv30_vertex_program *vp;
44
45	struct nv30_vertex_program_exec *vpi;
46
47	unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
48
49	int high_temp;
50	int temp_temp_count;
51
52	struct nv30_sreg *imm;
53	unsigned nr_imm;
54};
55
56static struct nv30_sreg
57temp(struct nv30_vpc *vpc)
58{
59	int idx;
60
61	idx  = vpc->temp_temp_count++;
62	idx += vpc->high_temp + 1;
63	return nv30_sr(NV30SR_TEMP, idx);
64}
65
66static struct nv30_sreg
67constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
68{
69	struct nv30_vertex_program *vp = vpc->vp;
70	struct nv30_vertex_program_data *vpd;
71	int idx;
72
73	if (pipe >= 0) {
74		for (idx = 0; idx < vp->nr_consts; idx++) {
75			if (vp->consts[idx].index == pipe)
76				return nv30_sr(NV30SR_CONST, idx);
77		}
78	}
79
80	idx = vp->nr_consts++;
81	vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
82	vpd = &vp->consts[idx];
83
84	vpd->index = pipe;
85	vpd->value[0] = x;
86	vpd->value[1] = y;
87	vpd->value[2] = z;
88	vpd->value[3] = w;
89	return nv30_sr(NV30SR_CONST, idx);
90}
91
92#define arith(cc,s,o,d,m,s0,s1,s2) \
93	nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
94
95static void
96emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)
97{
98	struct nv30_vertex_program *vp = vpc->vp;
99	uint32_t sr = 0;
100
101	switch (src.type) {
102	case NV30SR_TEMP:
103		sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
104		sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
105		break;
106	case NV30SR_INPUT:
107		sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
108		       NV30_VP_SRC_REG_TYPE_SHIFT);
109		vp->ir |= (1 << src.index);
110		hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
111		break;
112	case NV30SR_CONST:
113		sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
114		       NV30_VP_SRC_REG_TYPE_SHIFT);
115		assert(vpc->vpi->const_index == -1 ||
116		       vpc->vpi->const_index == src.index);
117		vpc->vpi->const_index = src.index;
118		break;
119	case NV30SR_NONE:
120		sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
121		       NV30_VP_SRC_REG_TYPE_SHIFT);
122		break;
123	default:
124		assert(0);
125	}
126
127	if (src.negate)
128		sr |= NV30_VP_SRC_NEGATE;
129
130	if (src.abs)
131		hw[0] |= (1 << (21 + pos));
132
133	sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
134	       (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
135	       (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
136	       (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));
137
138/*
139 * |VVV|
140 * d�.�b
141 *  \u/
142 *
143 */
144
145	switch (pos) {
146	case 0:
147		hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
148			  NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
149		hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
150			  NV30_VP_INST_SRC0L_SHIFT;
151		break;
152	case 1:
153		hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
154		break;
155	case 2:
156		hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
157			  NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
158		hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
159			  NV30_VP_INST_SRC2L_SHIFT;
160		break;
161	default:
162		assert(0);
163	}
164}
165
166static void
167emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)
168{
169	struct nv30_vertex_program *vp = vpc->vp;
170
171	switch (dst.type) {
172	case NV30SR_TEMP:
173		hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
174		break;
175	case NV30SR_OUTPUT:
176		switch (dst.index) {
177		case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
178		case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
179		case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
180		case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
181		case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
182		case NV30_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break;
183		case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
184		case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
185		case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
186		case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
187		case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
188		case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
189		case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
190		case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
191		default:
192			break;
193		}
194
195		hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
196		hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
197
198		/*XXX: no way this is entirely correct, someone needs to
199		 *     figure out what exactly it is.
200		 */
201		hw[3] |= 0x800;
202		break;
203	default:
204		assert(0);
205	}
206}
207
208static void
209nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
210	      struct nv30_sreg dst, int mask,
211	      struct nv30_sreg s0, struct nv30_sreg s1,
212	      struct nv30_sreg s2)
213{
214	struct nv30_vertex_program *vp = vpc->vp;
215	uint32_t *hw;
216
217	vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
218	vpc->vpi = &vp->insns[vp->nr_insns - 1];
219	memset(vpc->vpi, 0, sizeof(*vpc->vpi));
220	vpc->vpi->const_index = -1;
221
222	hw = vpc->vpi->data;
223
224	hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
225	hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
226		  (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
227		  (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
228		  (3 << NV30_VP_INST_COND_SWZ_W_SHIFT));
229
230	hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
231//	hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
232//	hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
233
234	if (dst.type == NV30SR_OUTPUT) {
235		if (slot)
236			hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
237		else
238			hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
239	} else {
240		if (slot)
241			hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
242		else
243			hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
244	}
245
246	emit_dst(vpc, hw, slot, dst);
247	emit_src(vpc, hw, 0, s0);
248	emit_src(vpc, hw, 1, s1);
249	emit_src(vpc, hw, 2, s2);
250}
251
252static INLINE struct nv30_sreg
253tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
254	struct nv30_sreg src;
255
256	switch (fsrc->SrcRegister.File) {
257	case TGSI_FILE_INPUT:
258		src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
259		break;
260	case TGSI_FILE_CONSTANT:
261		src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
262		break;
263	case TGSI_FILE_IMMEDIATE:
264		src = vpc->imm[fsrc->SrcRegister.Index];
265		break;
266	case TGSI_FILE_TEMPORARY:
267		if (vpc->high_temp < fsrc->SrcRegister.Index)
268			vpc->high_temp = fsrc->SrcRegister.Index;
269		src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
270		break;
271	default:
272		NOUVEAU_ERR("bad src file\n");
273		break;
274	}
275
276	src.abs = fsrc->SrcRegisterExtMod.Absolute;
277	src.negate = fsrc->SrcRegister.Negate;
278	src.swz[0] = fsrc->SrcRegister.SwizzleX;
279	src.swz[1] = fsrc->SrcRegister.SwizzleY;
280	src.swz[2] = fsrc->SrcRegister.SwizzleZ;
281	src.swz[3] = fsrc->SrcRegister.SwizzleW;
282	return src;
283}
284
285static INLINE struct nv30_sreg
286tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
287	struct nv30_sreg dst;
288
289	switch (fdst->DstRegister.File) {
290	case TGSI_FILE_OUTPUT:
291		dst = nv30_sr(NV30SR_OUTPUT,
292			      vpc->output_map[fdst->DstRegister.Index]);
293
294		break;
295	case TGSI_FILE_TEMPORARY:
296		dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index);
297		if (vpc->high_temp < dst.index)
298			vpc->high_temp = dst.index;
299		break;
300	default:
301		NOUVEAU_ERR("bad dst file\n");
302		break;
303	}
304
305	return dst;
306}
307
308static INLINE int
309tgsi_mask(uint tgsi)
310{
311	int mask = 0;
312
313	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
314	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
315	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
316	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
317	return mask;
318}
319
320static boolean
321nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
322				const struct tgsi_full_instruction *finst)
323{
324	struct nv30_sreg src[3], dst, tmp;
325	struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
326	int mask;
327	int ai = -1, ci = -1;
328	int i;
329
330	if (finst->Instruction.Opcode == TGSI_OPCODE_END)
331		return TRUE;
332
333	vpc->temp_temp_count = 0;
334	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
335		const struct tgsi_full_src_register *fsrc;
336
337		fsrc = &finst->FullSrcRegisters[i];
338		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
339			src[i] = tgsi_src(vpc, fsrc);
340		}
341	}
342
343	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
344		const struct tgsi_full_src_register *fsrc;
345
346		fsrc = &finst->FullSrcRegisters[i];
347		switch (fsrc->SrcRegister.File) {
348		case TGSI_FILE_INPUT:
349			if (ai == -1 || ai == fsrc->SrcRegister.Index) {
350				ai = fsrc->SrcRegister.Index;
351				src[i] = tgsi_src(vpc, fsrc);
352			} else {
353				src[i] = temp(vpc);
354				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
355				      tgsi_src(vpc, fsrc), none, none);
356			}
357			break;
358		/*XXX: index comparison is broken now that consts come from
359		 *     two different register files.
360		 */
361		case TGSI_FILE_CONSTANT:
362		case TGSI_FILE_IMMEDIATE:
363			if (ci == -1 || ci == fsrc->SrcRegister.Index) {
364				ci = fsrc->SrcRegister.Index;
365				src[i] = tgsi_src(vpc, fsrc);
366			} else {
367				src[i] = temp(vpc);
368				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
369				      tgsi_src(vpc, fsrc), none, none);
370			}
371			break;
372		case TGSI_FILE_TEMPORARY:
373			/* handled above */
374			break;
375		default:
376			NOUVEAU_ERR("bad src file\n");
377			return FALSE;
378		}
379	}
380
381	dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
382	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
383
384	switch (finst->Instruction.Opcode) {
385	case TGSI_OPCODE_ABS:
386		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
387		break;
388	case TGSI_OPCODE_ADD:
389		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
390		break;
391	case TGSI_OPCODE_ARL:
392		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
393		break;
394	case TGSI_OPCODE_DP3:
395		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
396		break;
397	case TGSI_OPCODE_DP4:
398		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
399		break;
400	case TGSI_OPCODE_DPH:
401		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
402		break;
403	case TGSI_OPCODE_DST:
404		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
405		break;
406	case TGSI_OPCODE_EX2:
407		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
408		break;
409	case TGSI_OPCODE_EXP:
410		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
411		break;
412	case TGSI_OPCODE_FLR:
413		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
414		break;
415	case TGSI_OPCODE_FRC:
416		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
417		break;
418	case TGSI_OPCODE_LG2:
419		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
420		break;
421	case TGSI_OPCODE_LIT:
422		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
423		break;
424	case TGSI_OPCODE_LOG:
425		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
426		break;
427	case TGSI_OPCODE_MAD:
428		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
429		break;
430	case TGSI_OPCODE_MAX:
431		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
432		break;
433	case TGSI_OPCODE_MIN:
434		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
435		break;
436	case TGSI_OPCODE_MOV:
437		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
438		break;
439	case TGSI_OPCODE_MUL:
440		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
441		break;
442	case TGSI_OPCODE_POW:
443		tmp = temp(vpc);
444		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
445		      swz(src[0], X, X, X, X));
446		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
447		      swz(src[1], X, X, X, X), none);
448		arith(vpc, 1, OP_EX2, dst, mask, none, none,
449		      swz(tmp, X, X, X, X));
450		break;
451	case TGSI_OPCODE_RCP:
452		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
453		break;
454	case TGSI_OPCODE_RET:
455		break;
456	case TGSI_OPCODE_RSQ:
457		arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
458		break;
459	case TGSI_OPCODE_SGE:
460		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
461		break;
462	case TGSI_OPCODE_SGT:
463		arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
464		break;
465	case TGSI_OPCODE_SLT:
466		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
467		break;
468	case TGSI_OPCODE_SUB:
469		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
470		break;
471	case TGSI_OPCODE_XPD:
472		tmp = temp(vpc);
473		arith(vpc, 0, OP_MUL, tmp, mask,
474		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
475		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
476		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
477		      neg(tmp));
478		break;
479	default:
480		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
481		return FALSE;
482	}
483
484	return TRUE;
485}
486
487static boolean
488nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
489				const struct tgsi_full_declaration *fdec)
490{
491	int hw;
492
493	switch (fdec->Semantic.SemanticName) {
494	case TGSI_SEMANTIC_POSITION:
495		hw = NV30_VP_INST_DEST_POS;
496		break;
497	case TGSI_SEMANTIC_COLOR:
498		if (fdec->Semantic.SemanticIndex == 0) {
499			hw = NV30_VP_INST_DEST_COL0;
500		} else
501		if (fdec->Semantic.SemanticIndex == 1) {
502			hw = NV30_VP_INST_DEST_COL1;
503		} else {
504			NOUVEAU_ERR("bad colour semantic index\n");
505			return FALSE;
506		}
507		break;
508	case TGSI_SEMANTIC_BCOLOR:
509		if (fdec->Semantic.SemanticIndex == 0) {
510			hw = NV30_VP_INST_DEST_BFC0;
511		} else
512		if (fdec->Semantic.SemanticIndex == 1) {
513			hw = NV30_VP_INST_DEST_BFC1;
514		} else {
515			NOUVEAU_ERR("bad bcolour semantic index\n");
516			return FALSE;
517		}
518		break;
519	case TGSI_SEMANTIC_FOG:
520		hw = NV30_VP_INST_DEST_FOGC;
521		break;
522	case TGSI_SEMANTIC_PSIZE:
523		hw = NV30_VP_INST_DEST_PSZ;
524		break;
525	case TGSI_SEMANTIC_GENERIC:
526		if (fdec->Semantic.SemanticIndex <= 7) {
527			hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
528		} else {
529			NOUVEAU_ERR("bad generic semantic index\n");
530			return FALSE;
531		}
532		break;
533	default:
534		NOUVEAU_ERR("bad output semantic\n");
535		return FALSE;
536	}
537
538	vpc->output_map[fdec->DeclarationRange.First] = hw;
539	return TRUE;
540}
541
542static boolean
543nv30_vertprog_prepare(struct nv30_vpc *vpc)
544{
545	struct tgsi_parse_context p;
546	int nr_imm = 0;
547
548	tgsi_parse_init(&p, vpc->vp->pipe.tokens);
549	while (!tgsi_parse_end_of_tokens(&p)) {
550		const union tgsi_full_token *tok = &p.FullToken;
551
552		tgsi_parse_token(&p);
553		switch(tok->Token.Type) {
554		case TGSI_TOKEN_TYPE_IMMEDIATE:
555			nr_imm++;
556			break;
557		default:
558			break;
559		}
560	}
561	tgsi_parse_free(&p);
562
563	if (nr_imm) {
564		vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg));
565		assert(vpc->imm);
566	}
567
568	return TRUE;
569}
570
571static void
572nv30_vertprog_translate(struct nv30_context *nv30,
573			struct nv30_vertex_program *vp)
574{
575	struct tgsi_parse_context parse;
576	struct nv30_vpc *vpc = NULL;
577
578	tgsi_dump(vp->pipe.tokens,0);
579
580	vpc = CALLOC(1, sizeof(struct nv30_vpc));
581	if (!vpc)
582		return;
583	vpc->vp = vp;
584	vpc->high_temp = -1;
585
586	if (!nv30_vertprog_prepare(vpc)) {
587		FREE(vpc);
588		return;
589	}
590
591	tgsi_parse_init(&parse, vp->pipe.tokens);
592
593	while (!tgsi_parse_end_of_tokens(&parse)) {
594		tgsi_parse_token(&parse);
595
596		switch (parse.FullToken.Token.Type) {
597		case TGSI_TOKEN_TYPE_DECLARATION:
598		{
599			const struct tgsi_full_declaration *fdec;
600			fdec = &parse.FullToken.FullDeclaration;
601			switch (fdec->Declaration.File) {
602			case TGSI_FILE_OUTPUT:
603				if (!nv30_vertprog_parse_decl_output(vpc, fdec))
604					goto out_err;
605				break;
606			default:
607				break;
608			}
609		}
610			break;
611		case TGSI_TOKEN_TYPE_IMMEDIATE:
612		{
613			const struct tgsi_full_immediate *imm;
614
615			imm = &parse.FullToken.FullImmediate;
616			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
617			assert(imm->Immediate.NrTokens == 4 + 1);
618			vpc->imm[vpc->nr_imm++] =
619				constant(vpc, -1,
620					 imm->u[0].Float,
621					 imm->u[1].Float,
622					 imm->u[2].Float,
623					 imm->u[3].Float);
624		}
625			break;
626		case TGSI_TOKEN_TYPE_INSTRUCTION:
627		{
628			const struct tgsi_full_instruction *finst;
629			finst = &parse.FullToken.FullInstruction;
630			if (!nv30_vertprog_parse_instruction(vpc, finst))
631				goto out_err;
632		}
633			break;
634		default:
635			break;
636		}
637	}
638
639	vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
640	vp->translated = TRUE;
641out_err:
642	tgsi_parse_free(&parse);
643	FREE(vpc);
644}
645
646static boolean
647nv30_vertprog_validate(struct nv30_context *nv30)
648{
649	struct pipe_screen *pscreen = nv30->pipe.screen;
650	struct nouveau_grobj *rankine = nv30->screen->rankine;
651	struct nv30_vertex_program *vp;
652	struct pipe_buffer *constbuf;
653	boolean upload_code = FALSE, upload_data = FALSE;
654	int i;
655
656	vp = nv30->vertprog;
657	constbuf = nv30->constbuf[PIPE_SHADER_VERTEX];
658
659	/* Translate TGSI shader into hw bytecode */
660	if (!vp->translated) {
661		nv30_vertprog_translate(nv30, vp);
662		if (!vp->translated)
663			return FALSE;
664	}
665
666	/* Allocate hw vtxprog exec slots */
667	if (!vp->exec) {
668		struct nouveau_resource *heap = nv30->screen->vp_exec_heap;
669		struct nouveau_stateobj *so;
670		uint vplen = vp->nr_insns;
671
672		if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
673			while (heap->next && heap->size < vplen) {
674				struct nv30_vertex_program *evict;
675
676				evict = heap->next->priv;
677				nouveau_resource_free(&evict->exec);
678			}
679
680			if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
681				assert(0);
682		}
683
684		so = so_new(2, 0);
685		so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
686		so_data  (so, vp->exec->start);
687		so_ref(so, &vp->so);
688		so_ref(NULL, &so);
689
690		upload_code = TRUE;
691	}
692
693	/* Allocate hw vtxprog const slots */
694	if (vp->nr_consts && !vp->data) {
695		struct nouveau_resource *heap = nv30->screen->vp_data_heap;
696
697		if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) {
698			while (heap->next && heap->size < vp->nr_consts) {
699				struct nv30_vertex_program *evict;
700
701				evict = heap->next->priv;
702				nouveau_resource_free(&evict->data);
703			}
704
705			if (nouveau_resource_alloc(heap, vp->nr_consts, vp,
706						   &vp->data))
707				assert(0);
708		}
709
710		/*XXX: handle this some day */
711		assert(vp->data->start >= vp->data_start_min);
712
713		upload_data = TRUE;
714		if (vp->data_start != vp->data->start)
715			upload_code = TRUE;
716	}
717
718	/* If exec or data segments moved we need to patch the program to
719	 * fixup offsets and register IDs.
720	 */
721	if (vp->exec_start != vp->exec->start) {
722		for (i = 0; i < vp->nr_insns; i++) {
723			struct nv30_vertex_program_exec *vpi = &vp->insns[i];
724
725			if (vpi->has_branch_offset) {
726				assert(0);
727			}
728		}
729
730		vp->exec_start = vp->exec->start;
731	}
732
733	if (vp->nr_consts && vp->data_start != vp->data->start) {
734		for (i = 0; i < vp->nr_insns; i++) {
735			struct nv30_vertex_program_exec *vpi = &vp->insns[i];
736
737			if (vpi->const_index >= 0) {
738				vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
739				vpi->data[1] |=
740					(vpi->const_index + vp->data->start) <<
741					NV30_VP_INST_CONST_SRC_SHIFT;
742
743			}
744		}
745
746		vp->data_start = vp->data->start;
747	}
748
749	/* Update + Upload constant values */
750	if (vp->nr_consts) {
751		float *map = NULL;
752
753		if (constbuf) {
754			map = pipe_buffer_map(pscreen, constbuf,
755					      PIPE_BUFFER_USAGE_CPU_READ);
756		}
757
758		for (i = 0; i < vp->nr_consts; i++) {
759			struct nv30_vertex_program_data *vpd = &vp->consts[i];
760
761			if (vpd->index >= 0) {
762				if (!upload_data &&
763				    !memcmp(vpd->value, &map[vpd->index * 4],
764					    4 * sizeof(float)))
765					continue;
766				memcpy(vpd->value, &map[vpd->index * 4],
767				       4 * sizeof(float));
768			}
769
770			BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
771			OUT_RING  (i + vp->data->start);
772			OUT_RINGp ((uint32_t *)vpd->value, 4);
773		}
774
775		if (constbuf)
776			pipe_buffer_unmap(pscreen, constbuf);
777	}
778
779	/* Upload vtxprog */
780	if (upload_code) {
781#if 0
782		for (i = 0; i < vp->nr_insns; i++) {
783			NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n",
784				i, vp->insns[i].data[0], vp->insns[i].data[1],
785				vp->insns[i].data[2], vp->insns[i].data[3]);
786		}
787#endif
788		BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
789		OUT_RING  (vp->exec->start);
790		for (i = 0; i < vp->nr_insns; i++) {
791			BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
792			OUT_RINGp (vp->insns[i].data, 4);
793		}
794	}
795
796	if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) {
797		so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]);
798		return TRUE;
799	}
800
801	return FALSE;
802}
803
804void
805nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp)
806{
807	vp->translated = FALSE;
808
809	if (vp->nr_insns) {
810		FREE(vp->insns);
811		vp->insns = NULL;
812		vp->nr_insns = 0;
813	}
814
815	if (vp->nr_consts) {
816		FREE(vp->consts);
817		vp->consts = NULL;
818		vp->nr_consts = 0;
819	}
820
821	nouveau_resource_free(&vp->exec);
822	vp->exec_start = 0;
823	nouveau_resource_free(&vp->data);
824	vp->data_start = 0;
825	vp->data_start_min = 0;
826
827	vp->ir = vp->or = 0;
828	so_ref(NULL, &vp->so);
829}
830
831struct nv30_state_entry nv30_state_vertprog = {
832	.validate = nv30_vertprog_validate,
833	.dirty = {
834		.pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/,
835		.hw = NV30_STATE_VERTPROG,
836	}
837};
838