r200_vertprog.c revision 122629f27925a9dc50029bebc5079f87f416a7e1
1/**************************************************************************
2
3Copyright (C) 2005 Aapo Tahkola.
4
5All Rights Reserved.
6
7Permission is hereby granted, free of charge, to any person obtaining a
8copy of this software and associated documentation files (the "Software"),
9to deal in the Software without restriction, including without limitation
10on the rights to use, copy, modify, merge, publish, distribute, sub
11license, and/or sell copies of the Software, and to permit persons to whom
12the Software is furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice (including the next
15paragraph) shall be included in all copies or substantial portions of the
16Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26**************************************************************************/
27
28/*
29 * Authors:
30 *   Aapo Tahkola <aet@rasterburn.org>
31 */
32#include "glheader.h"
33#include "macros.h"
34#include "enums.h"
35#include "program.h"
36
37#include "r200_context.h"
38#include "r200_vertprog.h"
39#include "r200_ioctl.h"
40#include "r200_tcl.h"
41#include "program_instruction.h"
42#include "tnl/tnl.h"
43
44#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
45    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
46    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
47    SWIZZLE_W != VSF_IN_COMPONENT_W || \
48    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
49    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
50    WRITEMASK_X != VSF_FLAG_X || \
51    WRITEMASK_Y != VSF_FLAG_Y || \
52    WRITEMASK_Z != VSF_FLAG_Z || \
53    WRITEMASK_W != VSF_FLAG_W
54#error Cannot change these!
55#endif
56
57#define SCALAR_FLAG (1<<31)
58#define FLAG_MASK (1<<31)
59#define OP_MASK (0xf)  /* we are unlikely to have more than 15 */
60#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
61
62static struct{
63   char *name;
64   int opcode;
65   unsigned long ip; /* number of input operands and flags */
66}op_names[]={
67   OPN(ABS, 1),
68   OPN(ADD, 2),
69   OPN(ARL, 1|SCALAR_FLAG),
70   OPN(DP3, 2),
71   OPN(DP4, 2),
72   OPN(DPH, 2),
73   OPN(DST, 2),
74   OPN(EX2, 1|SCALAR_FLAG),
75   OPN(EXP, 1|SCALAR_FLAG),
76   OPN(FLR, 1),
77   OPN(FRC, 1),
78   OPN(LG2, 1|SCALAR_FLAG),
79   OPN(LIT, 1),
80   OPN(LOG, 1|SCALAR_FLAG),
81   OPN(MAD, 3),
82   OPN(MAX, 2),
83   OPN(MIN, 2),
84   OPN(MOV, 1),
85   OPN(MUL, 2),
86   OPN(POW, 2|SCALAR_FLAG),
87   OPN(RCP, 1|SCALAR_FLAG),
88   OPN(RSQ, 1|SCALAR_FLAG),
89   OPN(SGE, 2),
90   OPN(SLT, 2),
91   OPN(SUB, 2),
92   OPN(SWZ, 1),
93   OPN(XPD, 2),
94   OPN(PRINT, 0),
95   OPN(END, 0),
96};
97#undef OPN
98
99static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
100{
101   r200ContextPtr rmesa = R200_CONTEXT( ctx );
102   GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
103   int pi;
104   struct gl_vertex_program *mesa_vp = (void *)vp;
105   struct gl_program_parameter_list *paramList;
106   drm_radeon_cmd_header_t tmp;
107
108   R200_STATECHANGE( rmesa, vpp[0] );
109   R200_STATECHANGE( rmesa, vpp[1] );
110   assert(mesa_vp->Base.Parameters);
111   _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
112   paramList = mesa_vp->Base.Parameters;
113
114   if(paramList->NumParameters > R200_VSF_MAX_PARAM){
115      fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
116      return GL_FALSE;
117   }
118
119   for(pi = 0; pi < paramList->NumParameters; pi++) {
120      switch(paramList->Parameters[pi].Type) {
121      case PROGRAM_STATE_VAR:
122      case PROGRAM_NAMED_PARAM:
123      //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
124      case PROGRAM_CONSTANT:
125	 *fcmd++ = paramList->ParameterValues[pi][0];
126	 *fcmd++ = paramList->ParameterValues[pi][1];
127	 *fcmd++ = paramList->ParameterValues[pi][2];
128	 *fcmd++ = paramList->ParameterValues[pi][3];
129	 break;
130      default:
131	 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
132	 break;
133      }
134      if (pi == 95) {
135	 fcmd = (GLfloat *)rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
136      }
137   }
138   /* hack up the cmd_size so not the whole state atom is emitted always. */
139   rmesa->hw.vpp[0].cmd_size =
140      1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
141   tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
142   tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
143   rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
144   if (paramList->NumParameters > 96) {
145      rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
146      tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
147      tmp.veclinear.count = paramList->NumParameters - 96;
148      rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
149   }
150   return GL_TRUE;
151}
152
153static __inline unsigned long t_dst_mask(GLuint mask)
154{
155   /* WRITEMASK_* is equivalent to VSF_FLAG_* */
156   return mask & VSF_FLAG_ALL;
157}
158
159static unsigned long t_dst(struct prog_dst_register *dst)
160{
161   switch(dst->File) {
162   case PROGRAM_TEMPORARY:
163      return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
164	 | R200_VSF_OUT_CLASS_TMP);
165   case PROGRAM_OUTPUT:
166      switch (dst->Index) {
167      case VERT_RESULT_HPOS:
168	 return R200_VSF_OUT_CLASS_RESULT_POS;
169      case VERT_RESULT_COL0:
170	 return R200_VSF_OUT_CLASS_RESULT_COLOR;
171      case VERT_RESULT_COL1:
172	 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
173	    | R200_VSF_OUT_CLASS_RESULT_COLOR);
174      case VERT_RESULT_FOGC:
175	 return R200_VSF_OUT_CLASS_RESULT_FOGC;
176      case VERT_RESULT_TEX0:
177      case VERT_RESULT_TEX1:
178      case VERT_RESULT_TEX2:
179      case VERT_RESULT_TEX3:
180      case VERT_RESULT_TEX4:
181      case VERT_RESULT_TEX5:
182	 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
183	    | R200_VSF_OUT_CLASS_RESULT_TEXC);
184      case VERT_RESULT_PSIZ:
185	 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
186      default:
187	 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
188	 exit(0);
189	 return 0;
190      }
191   case PROGRAM_ADDRESS:
192      assert (dst->Index == 0);
193      return R200_VSF_OUT_CLASS_ADDR;
194   default:
195      fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
196      exit(0);
197      return 0;
198   }
199}
200
201static unsigned long t_src_class(enum register_file file)
202{
203
204   switch(file){
205   case PROGRAM_TEMPORARY:
206      return VSF_IN_CLASS_TMP;
207
208   case PROGRAM_INPUT:
209      return VSF_IN_CLASS_ATTR;
210
211   case PROGRAM_LOCAL_PARAM:
212   case PROGRAM_ENV_PARAM:
213   case PROGRAM_NAMED_PARAM:
214   case PROGRAM_STATE_VAR:
215      return VSF_IN_CLASS_PARAM;
216   /*
217   case PROGRAM_OUTPUT:
218   case PROGRAM_WRITE_ONLY:
219   case PROGRAM_ADDRESS:
220   */
221   default:
222      fprintf(stderr, "problem in %s", __FUNCTION__);
223      exit(0);
224   }
225}
226
227static __inline unsigned long t_swizzle(GLubyte swizzle)
228{
229/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
230   return swizzle;
231}
232
233#if 0
234static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
235{
236   int i;
237
238   if(vp == NULL){
239      fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
240      return ;
241   }
242
243   fprintf(stderr, "%s:<", caller);
244   for(i=0; i < VERT_ATTRIB_MAX; i++)
245   fprintf(stderr, "%d ", vp->inputs[i]);
246   fprintf(stderr, ">\n");
247
248}
249#endif
250
251static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
252{
253/*
254   int i;
255   int max_reg = -1;
256*/
257   if(src->File == PROGRAM_INPUT){
258/*      if(vp->inputs[src->Index] != -1)
259	 return vp->inputs[src->Index];
260
261      for(i=0; i < VERT_ATTRIB_MAX; i++)
262	 if(vp->inputs[i] > max_reg)
263	    max_reg = vp->inputs[i];
264
265      vp->inputs[src->Index] = max_reg+1;*/
266
267      //vp_dump_inputs(vp, __FUNCTION__);
268      assert(vp->inputs[src->Index] != -1);
269      return vp->inputs[src->Index];
270   } else {
271      if (src->Index < 0) {
272	 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
273	 return 0;
274      }
275      return src->Index;
276   }
277}
278
279static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
280{
281
282   return MAKE_VSF_SOURCE(t_src_index(vp, src),
283			t_swizzle(GET_SWZ(src->Swizzle, 0)),
284			t_swizzle(GET_SWZ(src->Swizzle, 1)),
285			t_swizzle(GET_SWZ(src->Swizzle, 2)),
286			t_swizzle(GET_SWZ(src->Swizzle, 3)),
287			t_src_class(src->File),
288			src->NegateBase) | (src->RelAddr << 4);
289}
290
291static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
292{
293
294   return MAKE_VSF_SOURCE(t_src_index(vp, src),
295			t_swizzle(GET_SWZ(src->Swizzle, 0)),
296			t_swizzle(GET_SWZ(src->Swizzle, 0)),
297			t_swizzle(GET_SWZ(src->Swizzle, 0)),
298			t_swizzle(GET_SWZ(src->Swizzle, 0)),
299			t_src_class(src->File),
300			src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
301}
302
303static unsigned long t_opcode(enum prog_opcode opcode)
304{
305
306   switch(opcode){
307   case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
308   /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
309    * seems to ignore neg offsets which isn't quite correct...
310    */
311   case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
312   case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
313   case OPCODE_DST: return R200_VPI_OUT_OP_DST;
314   case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
315   case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
316   case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
317   case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
318   case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
319   case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
320   case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
321   case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
322   case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
323   case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
324   case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
325   case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
326   case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
327
328   default:
329      fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
330   }
331   exit(-1);
332   return 0;
333}
334
335static unsigned long op_operands(enum prog_opcode opcode)
336{
337   int i;
338
339   /* Can we trust mesas opcodes to be in order ? */
340   for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
341      if(op_names[i].opcode == opcode)
342	 return op_names[i].ip;
343
344   fprintf(stderr, "op %d not found in op_names\n", opcode);
345   exit(-1);
346   return 0;
347}
348
349/* TODO: Get rid of t_src_class call */
350#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
351		       ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
352			 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
353			(t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
354			 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
355
356/* fglrx on rv250 codes up unused sources as follows:
357   unused but necessary sources are same as previous source, zero-ed out.
358   unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
359   i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
360   set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
361
362/* use these simpler definitions. Must obviously not be used with not yet set up regs.
363   Those are NOT semantically equivalent to the r300 ones, requires code changes */
364#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
365				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
366				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
367				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
368				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
369
370#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
371				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
372				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
373				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
374				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
375
376#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
377				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
378				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
379				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
380				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
381
382#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
383
384#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
385
386#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
387
388
389/* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */
390#define PREFER_DP4
391
392static GLboolean r200_translate_vertex_program(struct r200_vertex_program *vp)
393{
394   struct gl_vertex_program *mesa_vp = (void *)vp;
395   struct prog_instruction *vpi;
396   int i;
397   VERTEX_SHADER_INSTRUCTION *o_inst;
398   unsigned long operands;
399   int are_srcs_scalar;
400   unsigned long hw_op;
401
402   vp->native = GL_FALSE;
403
404   if ((mesa_vp->Base.InputsRead &
405      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
406      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
407      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
408      if (R200_DEBUG & DEBUG_FALLBACKS) {
409	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
410	    mesa_vp->Base.InputsRead);
411      }
412      return GL_FALSE;
413   }
414
415   if (mesa_vp->IsNVProgram) {
416   /* subtle differences in spec like guaranteed initialized regs could cause
417      headaches. Might want to remove the driconf option to enable it completely */
418      return GL_FALSE;
419   }
420   /* Initial value should be last tmp reg that hw supports.
421      Strangely enough r300 doesnt mind even though these would be out of range.
422      Smart enough to realize that it doesnt need it? */
423   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
424   struct prog_src_register src[3];
425
426/*   if (getenv("R300_VP_SAFETY")) {
427      WARN_ONCE("R300_VP_SAFETY enabled.\n");
428
429      vpi = malloc((mesa_vp->Base.NumInstructions + VSF_MAX_FRAGMENT_TEMPS) * sizeof(struct prog_instruction));
430      memset(vpi, 0, VSF_MAX_FRAGMENT_TEMPS * sizeof(struct prog_instruction));
431
432      for (i=0; i < VSF_MAX_FRAGMENT_TEMPS; i++) {
433	 vpi[i].Opcode = OPCODE_MOV;
434	 vpi[i].StringPos = 0;
435	 vpi[i].Data = 0;
436
437	 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
438	 vpi[i].DstReg.Index = i;
439	 vpi[i].DstReg.WriteMask = WRITEMASK_XYZW;
440	 vpi[i].DstReg.CondMask = COND_TR;
441
442	 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
443	 vpi[i].SrcReg[0].Index = 0;
444	 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
445      }
446
447      memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
448
449      free(mesa_vp->Base.Instructions);
450
451      mesa_vp->Base.Instructions = vpi;
452
453      mesa_vp->Base.NumInstructions += VSF_MAX_FRAGMENT_TEMPS;
454      vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
455
456      assert(vpi->Opcode == OPCODE_END);
457   }*/
458/* FIXME: is changing the prog safe to do here? */
459   if (mesa_vp->IsPositionInvariant) {
460      struct gl_program_parameter_list *paramList;
461      GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX };
462
463#ifdef PREFER_DP4
464      tokens[5] = STATE_MATRIX;
465#else
466      tokens[5] = STATE_MATRIX_TRANSPOSE;
467#endif
468      paramList = mesa_vp->Base.Parameters;
469
470      vpi = malloc((mesa_vp->Base.NumInstructions + 4) * sizeof(struct prog_instruction));
471      memset(vpi, 0, 4 * sizeof(struct prog_instruction));
472
473      for (i=0; i < 4; i++) {
474	 GLint idx;
475	 tokens[3] = tokens[4] = i;
476	 idx = _mesa_add_state_reference(paramList, tokens);
477#ifdef PREFER_DP4
478	 vpi[i].Opcode = OPCODE_DP4;
479	 vpi[i].StringPos = 0;
480	 vpi[i].Data = 0;
481
482	 vpi[i].DstReg.File = PROGRAM_OUTPUT;
483	 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
484	 vpi[i].DstReg.WriteMask = 1 << i;
485	 vpi[i].DstReg.CondMask = COND_TR;
486
487	 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
488	 vpi[i].SrcReg[0].Index = idx;
489	 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
490
491	 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
492	 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
493	 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
494#else
495	 if (i == 0)
496	    vpi[i].Opcode = OPCODE_MUL;
497	 else
498	    vpi[i].Opcode = OPCODE_MAD;
499
500	 vpi[i].StringPos = 0;
501	 vpi[i].Data = 0;
502
503	 if (i == 3)
504	    vpi[i].DstReg.File = PROGRAM_OUTPUT;
505	 else
506	    vpi[i].DstReg.File = PROGRAM_TEMPORARY;
507	 vpi[i].DstReg.Index = 0;
508	 vpi[i].DstReg.WriteMask = 0xf;
509	 vpi[i].DstReg.CondMask = COND_TR;
510
511	 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
512	 vpi[i].SrcReg[0].Index = idx;
513	 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
514
515	 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
516	 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
517	 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
518
519	 if (i > 0) {
520	    vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
521	    vpi[i].SrcReg[2].Index = 0;
522	    vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
523	 }
524#endif
525      }
526
527      memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
528
529      free(mesa_vp->Base.Instructions);
530
531      mesa_vp->Base.Instructions = vpi;
532
533      mesa_vp->Base.NumInstructions += 4;
534      vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
535
536      assert(vpi->Opcode == OPCODE_END);
537
538      mesa_vp->Base.InputsRead |= (1 << VERT_ATTRIB_POS);
539      mesa_vp->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS);
540
541      //fprintf(stderr, "IsPositionInvariant is set!\n");
542      //_mesa_print_program(&mesa_vp->Base);
543   }
544
545   vp->pos_end = 0;
546   mesa_vp->Base.NumNativeInstructions = 0;
547   mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
548
549   for(i=0; i < VERT_ATTRIB_MAX; i++)
550      vp->inputs[i] = -1;
551/* fglrx uses fixed inputs as follows for conventional attribs.
552   generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
553   There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
554   attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
555   vertex normal/weight)
556   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
557   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
558   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
559   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
560   generic attribs would require some more work (dma regions, renaming). */
561
562/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
563   vp->inputs[VERT_ATTRIB_POS] = 0;
564   vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
565   vp->inputs[VERT_ATTRIB_NORMAL] = 1;
566   vp->inputs[VERT_ATTRIB_COLOR0] = 2;
567   vp->inputs[VERT_ATTRIB_COLOR1] = 3;
568   vp->inputs[VERT_ATTRIB_FOG] = 15;
569   vp->inputs[VERT_ATTRIB_TEX0] = 6;
570   vp->inputs[VERT_ATTRIB_TEX1] = 7;
571   vp->inputs[VERT_ATTRIB_TEX2] = 8;
572   vp->inputs[VERT_ATTRIB_TEX3] = 9;
573   vp->inputs[VERT_ATTRIB_TEX4] = 10;
574   vp->inputs[VERT_ATTRIB_TEX5] = 11;
575/* attr 4,5 and 13 are only used with generic attribs.
576   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
577   not possibe to use with vertex progs as it is lacking in vert prog specification) */
578
579   assert(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS));
580
581   vp->translated = GL_TRUE;
582
583   o_inst = vp->instr;
584   for(vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
585      if (u_temp_i < mesa_vp->Base.NumTemporaries) {
586	 if (R200_DEBUG & DEBUG_FALLBACKS) {
587	    fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
588	 }
589	 return GL_FALSE;
590      }
591      u_temp_i = R200_VSF_MAX_TEMPS - 1;
592      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
593	 mesa_vp->Base.NumNativeInstructions = 129;
594	 if (R200_DEBUG & DEBUG_FALLBACKS) {
595	    fprintf(stderr, "more than 128 native instructions\n");
596	 }
597	 return GL_FALSE;
598      }
599
600      operands = op_operands(vpi->Opcode);
601      are_srcs_scalar = operands & SCALAR_FLAG;
602      operands &= OP_MASK;
603
604      for(i = 0; i < operands; i++)
605	 src[i] = vpi->SrcReg[i];
606
607      if(operands == 3){
608	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
609	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
610		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
611		VSF_FLAG_ALL);
612
613	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
614		  SWIZZLE_X, SWIZZLE_Y,
615		  SWIZZLE_Z, SWIZZLE_W,
616		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
617
618	    o_inst->src1 = ZERO_SRC_0;
619	    o_inst->src2 = UNUSED_SRC_1;
620	    o_inst++;
621
622	    src[2].File = PROGRAM_TEMPORARY;
623	    src[2].Index = u_temp_i;
624	    src[2].RelAddr = 0;
625	    u_temp_i--;
626	 }
627      }
628
629      if(operands >= 2){
630	 if( CMP_SRCS(src[1], src[0]) ){
631	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
632		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
633		VSF_FLAG_ALL);
634
635	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
636		  SWIZZLE_X, SWIZZLE_Y,
637		  SWIZZLE_Z, SWIZZLE_W,
638		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
639
640	    o_inst->src1 = ZERO_SRC_0;
641	    o_inst->src2 = UNUSED_SRC_1;
642	    o_inst++;
643
644	    src[0].File = PROGRAM_TEMPORARY;
645	    src[0].Index = u_temp_i;
646	    src[0].RelAddr = 0;
647	    u_temp_i--;
648	 }
649      }
650
651      /* These ops need special handling. */
652      switch(vpi->Opcode){
653      case OPCODE_POW:
654/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
655   So may need to insert additional instruction */
656	 if ((src[0].File == src[1].File) &&
657	     (src[0].Index == src[1].Index)) {
658	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
659		   t_dst_mask(vpi->DstReg.WriteMask));
660	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
661		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
662		   SWIZZLE_ZERO,
663		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
664		   SWIZZLE_ZERO,
665		   t_src_class(src[0].File),
666		   src[0].NegateBase) | (src[0].RelAddr << 4);
667	    o_inst->src1 = UNUSED_SRC_0;
668	    o_inst->src2 = UNUSED_SRC_0;
669	 }
670	 else {
671	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
672		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
673		   VSF_FLAG_ALL);
674	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
675		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
676		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
677		   t_src_class(src[0].File),
678		   src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
679	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
680		   SWIZZLE_ZERO, SWIZZLE_ZERO,
681		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
682		   t_src_class(src[1].File),
683		   src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
684	    o_inst->src2 = UNUSED_SRC_1;
685	    o_inst++;
686
687	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
688		   t_dst_mask(vpi->DstReg.WriteMask));
689	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
690		   VSF_IN_COMPONENT_X,
691		   VSF_IN_COMPONENT_Y,
692		   VSF_IN_COMPONENT_Z,
693		   VSF_IN_COMPONENT_W,
694		   VSF_IN_CLASS_TMP,
695		   VSF_FLAG_NONE);
696	    o_inst->src1 = UNUSED_SRC_0;
697	    o_inst->src2 = UNUSED_SRC_0;
698	    u_temp_i--;
699	 }
700	 goto next;
701
702      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
703      case OPCODE_SWZ:
704	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
705		t_dst_mask(vpi->DstReg.WriteMask));
706	 o_inst->src0 = t_src(vp, &src[0]);
707	 o_inst->src1 = ZERO_SRC_0;
708	 o_inst->src2 = UNUSED_SRC_1;
709	 goto next;
710
711      case OPCODE_MAD:
712	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
713	    src[1].File == PROGRAM_TEMPORARY &&
714	    src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
715
716	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&vpi->DstReg),
717	    t_dst_mask(vpi->DstReg.WriteMask));
718	 o_inst->src0 = t_src(vp, &src[0]);
719#if 0
720if ((o_inst - vp->instr) == 31) {
721/* fix up the broken vertex program of quake4 demo... */
722o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
723			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
724			t_src_class(src[1].File),
725			src[1].NegateBase) | (src[1].RelAddr << 4);
726o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
727			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
728			t_src_class(src[1].File),
729			src[1].NegateBase) | (src[1].RelAddr << 4);
730}
731else {
732	 o_inst->src1 = t_src(vp, &src[1]);
733	 o_inst->src2 = t_src(vp, &src[2]);
734}
735#else
736	 o_inst->src1 = t_src(vp, &src[1]);
737	 o_inst->src2 = t_src(vp, &src[2]);
738#endif
739	 goto next;
740
741      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
742	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
743		t_dst_mask(vpi->DstReg.WriteMask));
744
745	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
746		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
747		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
748		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
749		SWIZZLE_ZERO,
750		t_src_class(src[0].File),
751		src[0].NegateBase) | (src[0].RelAddr << 4);
752
753	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
754		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
755		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
756		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
757		SWIZZLE_ZERO,
758		t_src_class(src[1].File),
759		src[1].NegateBase) | (src[1].RelAddr << 4);
760
761	 o_inst->src2 = UNUSED_SRC_1;
762	 goto next;
763
764      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
765	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
766		t_dst_mask(vpi->DstReg.WriteMask));
767
768	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
769		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
770		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
771		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
772		VSF_IN_COMPONENT_ONE,
773		t_src_class(src[0].File),
774		src[0].NegateBase) | (src[0].RelAddr << 4);
775	 o_inst->src1 = t_src(vp, &src[1]);
776	 o_inst->src2 = UNUSED_SRC_1;
777	 goto next;
778
779      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
780	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
781		t_dst_mask(vpi->DstReg.WriteMask));
782
783	 o_inst->src0 = t_src(vp, &src[0]);
784	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
785		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
786		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
787		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
788		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
789		t_src_class(src[1].File),
790		(!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
791	 o_inst->src2 = UNUSED_SRC_1;
792	 goto next;
793
794      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
795	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&vpi->DstReg),
796		t_dst_mask(vpi->DstReg.WriteMask));
797
798	 o_inst->src0=t_src(vp, &src[0]);
799	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
800		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
801		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
802		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
803		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
804		t_src_class(src[0].File),
805		(!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
806	 o_inst->src2 = UNUSED_SRC_1;
807	 goto next;
808
809      case OPCODE_FLR:
810      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
811         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
812
813	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
814	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
815	    t_dst_mask(vpi->DstReg.WriteMask));
816
817	 o_inst->src0 = t_src(vp, &src[0]);
818	 o_inst->src1 = UNUSED_SRC_0;
819	 o_inst->src2 = UNUSED_SRC_1;
820	 o_inst++;
821
822	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
823		t_dst_mask(vpi->DstReg.WriteMask));
824
825	 o_inst->src0 = t_src(vp, &src[0]);
826	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
827		VSF_IN_COMPONENT_X,
828		VSF_IN_COMPONENT_Y,
829		VSF_IN_COMPONENT_Z,
830		VSF_IN_COMPONENT_W,
831		VSF_IN_CLASS_TMP,
832		/* Not 100% sure about this */
833		(!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
834
835	 o_inst->src2 = UNUSED_SRC_0;
836	 u_temp_i--;
837	 goto next;
838
839      case OPCODE_XPD:
840	 /* mul r0, r1.yzxw, r2.zxyw
841	    mad r0, -r2.yzxw, r1.zxyw, r0
842	    NOTE: might need MAD_2
843	  */
844
845	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
846	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
847	    t_dst_mask(vpi->DstReg.WriteMask));
848
849	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
850		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
851		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
852		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
853		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
854		t_src_class(src[0].File),
855		src[0].NegateBase) | (src[0].RelAddr << 4);
856
857	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
858		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
859		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
860		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
861		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
862		t_src_class(src[1].File),
863		src[1].NegateBase) | (src[1].RelAddr << 4);
864
865	 o_inst->src2 = UNUSED_SRC_1;
866	 o_inst++;
867	 u_temp_i--;
868
869	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&vpi->DstReg),
870		t_dst_mask(vpi->DstReg.WriteMask));
871
872	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
873		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
874		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
875		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
876		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
877		t_src_class(src[1].File),
878		(!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
879
880	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
881		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
882		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
883		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
884		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
885		t_src_class(src[0].File),
886		src[0].NegateBase) | (src[0].RelAddr << 4);
887
888	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
889		VSF_IN_COMPONENT_X,
890		VSF_IN_COMPONENT_Y,
891		VSF_IN_COMPONENT_Z,
892		VSF_IN_COMPONENT_W,
893		VSF_IN_CLASS_TMP,
894		VSF_FLAG_NONE);
895	 goto next;
896
897      case OPCODE_END:
898	 break;
899      default:
900	 break;
901      }
902
903      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&vpi->DstReg),
904	    t_dst_mask(vpi->DstReg.WriteMask));
905
906      if(are_srcs_scalar){
907	 switch(operands){
908	    case 1:
909		o_inst->src0 = t_src_scalar(vp, &src[0]);
910		o_inst->src1 = UNUSED_SRC_0;
911		o_inst->src2 = UNUSED_SRC_1;
912	    break;
913
914	    case 2:
915		o_inst->src0 = t_src_scalar(vp, &src[0]);
916		o_inst->src1 = t_src_scalar(vp, &src[1]);
917		o_inst->src2 = UNUSED_SRC_1;
918	    break;
919
920	    case 3:
921		o_inst->src0 = t_src_scalar(vp, &src[0]);
922		o_inst->src1 = t_src_scalar(vp, &src[1]);
923		o_inst->src2 = t_src_scalar(vp, &src[2]);
924	    break;
925
926	    default:
927		fprintf(stderr, "illegal number of operands %lu\n", operands);
928		exit(-1);
929	    break;
930	 }
931      } else {
932	 switch(operands){
933	    case 1:
934		o_inst->src0 = t_src(vp, &src[0]);
935		o_inst->src1 = UNUSED_SRC_0;
936		o_inst->src2 = UNUSED_SRC_1;
937	    break;
938
939	    case 2:
940		o_inst->src0 = t_src(vp, &src[0]);
941		o_inst->src1 = t_src(vp, &src[1]);
942		o_inst->src2 = UNUSED_SRC_1;
943	    break;
944
945	    case 3:
946		o_inst->src0 = t_src(vp, &src[0]);
947		o_inst->src1 = t_src(vp, &src[1]);
948		o_inst->src2 = t_src(vp, &src[2]);
949	    break;
950
951	    default:
952		fprintf(stderr, "illegal number of operands %lu\n", operands);
953		exit(-1);
954	    break;
955	 }
956      }
957      next:
958      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
959	 vp->pos_end = (o_inst - vp->instr);
960      }
961   }
962
963   /* need to test again since some instructions require more than one (up to 3) native inst */
964   if(o_inst - vp->instr > R200_VSF_MAX_INST) {
965      mesa_vp->Base.NumNativeInstructions = 129;
966      if (R200_DEBUG & DEBUG_FALLBACKS) {
967	 fprintf(stderr, "more than 128 native instructions\n");
968      }
969      return GL_FALSE;
970   }
971   vp->native = GL_TRUE;
972   mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
973#if 0
974   fprintf(stderr, "hw program:\n");
975   for(i=0; i < vp->program.length; i++)
976      fprintf(stderr, "%08x\n", vp->instr[i]);
977#endif
978   return GL_TRUE;
979}
980
981void r200SetupVertexProg( GLcontext *ctx ) {
982   r200ContextPtr rmesa = R200_CONTEXT(ctx);
983   struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
984   GLboolean fallback;
985   GLint i;
986
987   if (!vp->translated) {
988      rmesa->curr_vp_hw = NULL;
989      r200_translate_vertex_program(vp);
990   }
991   /* could optimize setting up vertex progs away for non-tcl hw */
992   fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
993      rmesa->r200Screen->drmSupportsVertexProgram);
994   TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
995   if (fallback) return;
996
997   R200_STATECHANGE( rmesa, pvs );
998
999   rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1000      ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1001      (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1002   rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1003      (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1004
1005   /* maybe user clip planes just work with vertex progs... untested */
1006   if (ctx->Transform.ClipPlanesEnabled) {
1007      R200_STATECHANGE( rmesa, tcl );
1008      if (vp->mesa_program.IsPositionInvariant) {
1009	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1010      }
1011      else {
1012	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1013      }
1014   }
1015
1016   if (vp != rmesa->curr_vp_hw) {
1017      GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1018      drm_radeon_cmd_header_t tmp;
1019
1020      R200_STATECHANGE( rmesa, vpi[0] );
1021      R200_STATECHANGE( rmesa, vpi[1] );
1022
1023      /* FIXME: what about using a memcopy... */
1024      for (i = 0; (i < 64) && i < count; i++) {
1025	 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1026	 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1027	 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1028	 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1029      }
1030      /* hack up the cmd_size so not the whole state atom is emitted always.
1031         This may require some more thought, we may emit half progs on lost state, but
1032         hopefully it won't matter?
1033         WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1034         packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1035      rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1036      tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1037      tmp.veclinear.count = (count > 64) ? 64 : count;
1038      rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1039      if (count > 64) {
1040	 for (i = 0; i < (count - 64); i++) {
1041	    rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1042	    rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1043	    rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1044	    rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1045	 }
1046	 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1047	 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1048	 tmp.veclinear.count = count - 64;
1049	 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1050      }
1051      rmesa->curr_vp_hw = vp;
1052   }
1053}
1054
1055
1056static void
1057r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1058{
1059   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1060
1061   switch(target){
1062   case GL_VERTEX_PROGRAM_ARB:
1063      rmesa->curr_vp_hw = NULL;
1064      break;
1065   default:
1066      _mesa_problem(ctx, "Target not supported yet!");
1067      break;
1068   }
1069}
1070
1071static struct gl_program *
1072r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1073{
1074   struct r200_vertex_program *vp;
1075
1076   switch(target){
1077   case GL_VERTEX_PROGRAM_ARB:
1078      vp = CALLOC_STRUCT(r200_vertex_program);
1079      return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1080   case GL_FRAGMENT_PROGRAM_ARB:
1081   case GL_FRAGMENT_PROGRAM_NV:
1082      return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1083   default:
1084      _mesa_problem(ctx, "Bad target in r200NewProgram");
1085   }
1086   return NULL;
1087}
1088
1089
1090static void
1091r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1092{
1093   _mesa_delete_program(ctx, prog);
1094}
1095
1096static void
1097r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1098{
1099   struct r200_vertex_program *vp = (void *)prog;
1100
1101   switch(target) {
1102   case GL_VERTEX_PROGRAM_ARB:
1103      vp->translated = GL_FALSE;
1104      memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));
1105      /*r200_translate_vertex_shader(vp);*/
1106      break;
1107   }
1108   /* need this for tcl fallbacks */
1109   _tnl_program_string(ctx, target, prog);
1110}
1111
1112static GLboolean
1113r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1114{
1115   struct r200_vertex_program *vp = (void *)prog;
1116
1117   switch(target){
1118   case GL_VERTEX_STATE_PROGRAM_NV:
1119   case GL_VERTEX_PROGRAM_ARB:
1120      if (!vp->translated) {
1121	 r200_translate_vertex_program(vp);
1122      }
1123     /* does not take parameters etc. into account */
1124      return vp->native;
1125   default:
1126      _mesa_problem(ctx, "Bad target in r200NewProgram");
1127   }
1128   return 0;
1129}
1130
1131void r200InitShaderFuncs(struct dd_function_table *functions)
1132{
1133   functions->NewProgram = r200NewProgram;
1134   functions->BindProgram = r200BindProgram;
1135   functions->DeleteProgram = r200DeleteProgram;
1136   functions->ProgramStringNotify = r200ProgramStringNotify;
1137   functions->IsProgramNative = r200IsProgramNative;
1138}
1139