r200_vertprog.c revision b4026d9be828bd0b6f60158456edf24994efb053
1/**************************************************************************
2
3Copyright (C) 2005 Aapo Tahkola.
4
5All Rights Reserved.
6
7Permission is hereby granted, free of charge, to any person obtaining a
8copy of this software and associated documentation files (the "Software"),
9to deal in the Software without restriction, including without limitation
10on the rights to use, copy, modify, merge, publish, distribute, sub
11license, and/or sell copies of the Software, and to permit persons to whom
12the Software is furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice (including the next
15paragraph) shall be included in all copies or substantial portions of the
16Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26**************************************************************************/
27
28/*
29 * Authors:
30 *   Aapo Tahkola <aet@rasterburn.org>
31 *   Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36#include "shader/program.h"
37#include "shader/prog_instruction.h"
38#include "shader/prog_parameter.h"
39#include "shader/prog_statevars.h"
40#include "shader/programopt.h"
41#include "tnl/tnl.h"
42
43#include "r200_context.h"
44#include "r200_vertprog.h"
45#include "r200_ioctl.h"
46#include "r200_tcl.h"
47
48#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51    SWIZZLE_W != VSF_IN_COMPONENT_W || \
52    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54    WRITEMASK_X != VSF_FLAG_X || \
55    WRITEMASK_Y != VSF_FLAG_Y || \
56    WRITEMASK_Z != VSF_FLAG_Z || \
57    WRITEMASK_W != VSF_FLAG_W
58#error Cannot change these!
59#endif
60
61#define SCALAR_FLAG (1<<31)
62#define FLAG_MASK (1<<31)
63#define OP_MASK (0xf)  /* we are unlikely to have more than 15 */
64#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66static struct{
67   char *name;
68   int opcode;
69   unsigned long ip; /* number of input operands and flags */
70}op_names[]={
71   OPN(ABS, 1),
72   OPN(ADD, 2),
73   OPN(ARL, 1|SCALAR_FLAG),
74   OPN(DP3, 2),
75   OPN(DP4, 2),
76   OPN(DPH, 2),
77   OPN(DST, 2),
78   OPN(EX2, 1|SCALAR_FLAG),
79   OPN(EXP, 1|SCALAR_FLAG),
80   OPN(FLR, 1),
81   OPN(FRC, 1),
82   OPN(LG2, 1|SCALAR_FLAG),
83   OPN(LIT, 1),
84   OPN(LOG, 1|SCALAR_FLAG),
85   OPN(MAD, 3),
86   OPN(MAX, 2),
87   OPN(MIN, 2),
88   OPN(MOV, 1),
89   OPN(MUL, 2),
90   OPN(POW, 2|SCALAR_FLAG),
91   OPN(RCP, 1|SCALAR_FLAG),
92   OPN(RSQ, 1|SCALAR_FLAG),
93   OPN(SGE, 2),
94   OPN(SLT, 2),
95   OPN(SUB, 2),
96   OPN(SWZ, 1),
97   OPN(XPD, 2),
98   OPN(PRINT, 0),
99   OPN(END, 0),
100};
101#undef OPN
102
103static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
104{
105   r200ContextPtr rmesa = R200_CONTEXT( ctx );
106   GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
107   int pi;
108   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
109   struct gl_program_parameter_list *paramList;
110   drm_radeon_cmd_header_t tmp;
111
112   R200_STATECHANGE( rmesa, vpp[0] );
113   R200_STATECHANGE( rmesa, vpp[1] );
114   assert(mesa_vp->Base.Parameters);
115   _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
116   paramList = mesa_vp->Base.Parameters;
117
118   if(paramList->NumParameters > R200_VSF_MAX_PARAM){
119      fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
120      return GL_FALSE;
121   }
122
123   for(pi = 0; pi < paramList->NumParameters; pi++) {
124      switch(paramList->Parameters[pi].Type) {
125      case PROGRAM_STATE_VAR:
126      case PROGRAM_NAMED_PARAM:
127      //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
128      case PROGRAM_CONSTANT:
129	 *fcmd++ = paramList->ParameterValues[pi][0];
130	 *fcmd++ = paramList->ParameterValues[pi][1];
131	 *fcmd++ = paramList->ParameterValues[pi][2];
132	 *fcmd++ = paramList->ParameterValues[pi][3];
133	 break;
134      default:
135	 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
136	 break;
137      }
138      if (pi == 95) {
139	 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
140      }
141   }
142   /* hack up the cmd_size so not the whole state atom is emitted always. */
143   rmesa->hw.vpp[0].cmd_size =
144      1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
145   tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
146   tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
147   rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
148   if (paramList->NumParameters > 96) {
149      rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
150      tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
151      tmp.veclinear.count = paramList->NumParameters - 96;
152      rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
153   }
154   return GL_TRUE;
155}
156
157static INLINE unsigned long t_dst_mask(GLuint mask)
158{
159   /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160   return mask & VSF_FLAG_ALL;
161}
162
163static unsigned long t_dst(struct prog_dst_register *dst)
164{
165   switch(dst->File) {
166   case PROGRAM_TEMPORARY:
167      return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
168	 | R200_VSF_OUT_CLASS_TMP);
169   case PROGRAM_OUTPUT:
170      switch (dst->Index) {
171      case VERT_RESULT_HPOS:
172	 return R200_VSF_OUT_CLASS_RESULT_POS;
173      case VERT_RESULT_COL0:
174	 return R200_VSF_OUT_CLASS_RESULT_COLOR;
175      case VERT_RESULT_COL1:
176	 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
177	    | R200_VSF_OUT_CLASS_RESULT_COLOR);
178      case VERT_RESULT_FOGC:
179	 return R200_VSF_OUT_CLASS_RESULT_FOGC;
180      case VERT_RESULT_TEX0:
181      case VERT_RESULT_TEX1:
182      case VERT_RESULT_TEX2:
183      case VERT_RESULT_TEX3:
184      case VERT_RESULT_TEX4:
185      case VERT_RESULT_TEX5:
186	 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
187	    | R200_VSF_OUT_CLASS_RESULT_TEXC);
188      case VERT_RESULT_PSIZ:
189	 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
190      default:
191	 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
192	 exit(0);
193	 return 0;
194      }
195   case PROGRAM_ADDRESS:
196      assert (dst->Index == 0);
197      return R200_VSF_OUT_CLASS_ADDR;
198   default:
199      fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
200      exit(0);
201      return 0;
202   }
203}
204
205static unsigned long t_src_class(gl_register_file file)
206{
207
208   switch(file){
209   case PROGRAM_TEMPORARY:
210      return VSF_IN_CLASS_TMP;
211
212   case PROGRAM_INPUT:
213      return VSF_IN_CLASS_ATTR;
214
215   case PROGRAM_LOCAL_PARAM:
216   case PROGRAM_ENV_PARAM:
217   case PROGRAM_NAMED_PARAM:
218   case PROGRAM_CONSTANT:
219   case PROGRAM_STATE_VAR:
220      return VSF_IN_CLASS_PARAM;
221   /*
222   case PROGRAM_OUTPUT:
223   case PROGRAM_WRITE_ONLY:
224   case PROGRAM_ADDRESS:
225   */
226   default:
227      fprintf(stderr, "problem in %s", __FUNCTION__);
228      exit(0);
229   }
230}
231
232static INLINE unsigned long t_swizzle(GLubyte swizzle)
233{
234/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
235   return swizzle;
236}
237
238#if 0
239static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
240{
241   int i;
242
243   if(vp == NULL){
244      fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
245      return ;
246   }
247
248   fprintf(stderr, "%s:<", caller);
249   for(i=0; i < VERT_ATTRIB_MAX; i++)
250   fprintf(stderr, "%d ", vp->inputs[i]);
251   fprintf(stderr, ">\n");
252
253}
254#endif
255
256static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
257{
258/*
259   int i;
260   int max_reg = -1;
261*/
262   if(src->File == PROGRAM_INPUT){
263/*      if(vp->inputs[src->Index] != -1)
264	 return vp->inputs[src->Index];
265
266      for(i=0; i < VERT_ATTRIB_MAX; i++)
267	 if(vp->inputs[i] > max_reg)
268	    max_reg = vp->inputs[i];
269
270      vp->inputs[src->Index] = max_reg+1;*/
271
272      //vp_dump_inputs(vp, __FUNCTION__);
273      assert(vp->inputs[src->Index] != -1);
274      return vp->inputs[src->Index];
275   } else {
276      if (src->Index < 0) {
277	 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
278	 return 0;
279      }
280      return src->Index;
281   }
282}
283
284static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
285{
286
287   return MAKE_VSF_SOURCE(t_src_index(vp, src),
288			t_swizzle(GET_SWZ(src->Swizzle, 0)),
289			t_swizzle(GET_SWZ(src->Swizzle, 1)),
290			t_swizzle(GET_SWZ(src->Swizzle, 2)),
291			t_swizzle(GET_SWZ(src->Swizzle, 3)),
292			t_src_class(src->File),
293			src->NegateBase) | (src->RelAddr << 4);
294}
295
296static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
297{
298
299   return MAKE_VSF_SOURCE(t_src_index(vp, src),
300			t_swizzle(GET_SWZ(src->Swizzle, 0)),
301			t_swizzle(GET_SWZ(src->Swizzle, 0)),
302			t_swizzle(GET_SWZ(src->Swizzle, 0)),
303			t_swizzle(GET_SWZ(src->Swizzle, 0)),
304			t_src_class(src->File),
305			src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
306}
307
308static unsigned long t_opcode(enum prog_opcode opcode)
309{
310
311   switch(opcode){
312   case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
313   /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
314    * seems to ignore neg offsets which isn't quite correct...
315    */
316   case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
317   case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
318   case OPCODE_DST: return R200_VPI_OUT_OP_DST;
319   case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
320   case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
321   case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
322   case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
323   case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
324   case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
325   case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
326   case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
327   case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
328   case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
329   case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
330   case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
331   case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
332
333   default:
334      fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
335   }
336   exit(-1);
337   return 0;
338}
339
340static unsigned long op_operands(enum prog_opcode opcode)
341{
342   int i;
343
344   /* Can we trust mesas opcodes to be in order ? */
345   for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
346      if(op_names[i].opcode == opcode)
347	 return op_names[i].ip;
348
349   fprintf(stderr, "op %d not found in op_names\n", opcode);
350   exit(-1);
351   return 0;
352}
353
354/* TODO: Get rid of t_src_class call */
355#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
356		       ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
357			 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
358			(t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
359			 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
360
361/* fglrx on rv250 codes up unused sources as follows:
362   unused but necessary sources are same as previous source, zero-ed out.
363   unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
364   i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
365   set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
366
367/* use these simpler definitions. Must obviously not be used with not yet set up regs.
368   Those are NOT semantically equivalent to the r300 ones, requires code changes */
369#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
370				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
371				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
372				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
373				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
374
375#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
376				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
377				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
378				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
379				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
380
381#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
382				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
383				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
384				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
385				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
386
387#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
388
389#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
390
391#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
392
393
394/**
395 * Generate an R200 vertex program from Mesa's internal representation.
396 *
397 * \return  GL_TRUE for success, GL_FALSE for failure.
398 */
399static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_vertex_program *vp)
400{
401   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
402   struct prog_instruction *vpi;
403   int i;
404   VERTEX_SHADER_INSTRUCTION *o_inst;
405   unsigned long operands;
406   int are_srcs_scalar;
407   unsigned long hw_op;
408   int dofogfix = 0;
409   int fog_temp_i = 0;
410   int free_inputs;
411   int array_count = 0;
412   int u_temp_used;
413
414   vp->native = GL_FALSE;
415   vp->translated = GL_TRUE;
416   vp->fogmode = ctx->Fog.Mode;
417
418   if (mesa_vp->Base.NumInstructions == 0)
419      return GL_FALSE;
420
421#if 0
422   if ((mesa_vp->Base.InputsRead &
423      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
424      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
425      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
426      if (R200_DEBUG & DEBUG_FALLBACKS) {
427	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
428	    mesa_vp->Base.InputsRead);
429      }
430      return GL_FALSE;
431   }
432#endif
433
434   if ((mesa_vp->Base.OutputsWritten &
435      ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) |
436      (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
437      (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
438      (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
439      if (R200_DEBUG & DEBUG_FALLBACKS) {
440	 fprintf(stderr, "can't handle vert prog outputs 0x%x\n",
441	    mesa_vp->Base.OutputsWritten);
442      }
443      return GL_FALSE;
444   }
445
446   if (mesa_vp->IsNVProgram) {
447   /* subtle differences in spec like guaranteed initialized regs could cause
448      headaches. Might want to remove the driconf option to enable it completely */
449      return GL_FALSE;
450   }
451   /* Initial value should be last tmp reg that hw supports.
452      Strangely enough r300 doesnt mind even though these would be out of range.
453      Smart enough to realize that it doesnt need it? */
454   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
455   struct prog_src_register src[3];
456   struct prog_dst_register dst;
457
458/* FIXME: is changing the prog safe to do here? */
459   if (mesa_vp->IsPositionInvariant &&
460      /* make sure we only do this once */
461       !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
462	 _mesa_insert_mvp_code(ctx, mesa_vp);
463      }
464
465   /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
466      base e isn't directly available neither. */
467   if ((mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) && !vp->fogpidx) {
468      struct gl_program_parameter_list *paramList;
469      gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
470      paramList = mesa_vp->Base.Parameters;
471      vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
472   }
473
474   vp->pos_end = 0;
475   mesa_vp->Base.NumNativeInstructions = 0;
476   if (mesa_vp->Base.Parameters)
477      mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
478   else
479      mesa_vp->Base.NumNativeParameters = 0;
480
481   for(i = 0; i < VERT_ATTRIB_MAX; i++)
482      vp->inputs[i] = -1;
483   for(i = 0; i < 15; i++)
484      vp->inputmap_rev[i] = 255;
485   free_inputs = 0x2ffd;
486
487/* fglrx uses fixed inputs as follows for conventional attribs.
488   generic attribs use non-fixed assignment, fglrx will always use the
489   lowest attrib values available. We'll just do the same.
490   There are 12 generic attribs possible, corresponding to attrib 0, 2-11
491   and 13 in a hw vertex prog.
492   attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
493   (correspond to vertex normal/weight - maybe weight actually could be made vec4).
494   Additionally, not more than 12 arrays in total are possible I think.
495   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
496   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
497   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
498   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
499*/
500
501/* attr 4,5 and 13 are only used with generic attribs.
502   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
503   not possibe to use with vertex progs as it is lacking in vert prog specification) */
504/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
505   if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
506      vp->inputs[VERT_ATTRIB_POS] = 0;
507      vp->inputmap_rev[0] = VERT_ATTRIB_POS;
508      free_inputs &= ~(1 << 0);
509      array_count++;
510   }
511   if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
512      vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
513      vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
514      array_count++;
515   }
516   if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
517      vp->inputs[VERT_ATTRIB_NORMAL] = 1;
518      vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
519      array_count++;
520   }
521   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
522      vp->inputs[VERT_ATTRIB_COLOR0] = 2;
523      vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
524      free_inputs &= ~(1 << 2);
525      array_count++;
526   }
527   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
528      vp->inputs[VERT_ATTRIB_COLOR1] = 3;
529      vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
530      free_inputs &= ~(1 << 3);
531      array_count++;
532   }
533   if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
534      vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
535      vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
536      array_count++;
537   }
538   for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) {
539      if (mesa_vp->Base.InputsRead & (1 << i)) {
540	 vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6;
541	 vp->inputmap_rev[8 + i - VERT_ATTRIB_TEX0] = i;
542	 free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6));
543	 array_count++;
544      }
545   }
546   /* using VERT_ATTRIB_TEX6/7 would be illegal */
547   /* completely ignore aliasing? */
548   for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
549      int j;
550   /* completely ignore aliasing? */
551      if (mesa_vp->Base.InputsRead & (1 << i)) {
552	 array_count++;
553	 if (array_count > 12) {
554	    if (R200_DEBUG & DEBUG_FALLBACKS) {
555	       fprintf(stderr, "more than 12 attribs used in vert prog\n");
556	    }
557	    return GL_FALSE;
558	 }
559	 for (j = 0; j < 14; j++) {
560	    /* will always find one due to limited array_count */
561	    if (free_inputs & (1 << j)) {
562	       free_inputs &= ~(1 << j);
563	       vp->inputs[i] = j;
564	       if (j == 0) vp->inputmap_rev[j] = i; /* mapped to pos */
565	       else if (j < 12) vp->inputmap_rev[j + 2] = i; /* mapped to col/tex */
566	       else vp->inputmap_rev[j + 1] = i; /* mapped to pos1 */
567	       break;
568	    }
569	 }
570      }
571   }
572
573   if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
574      if (R200_DEBUG & DEBUG_FALLBACKS) {
575	 fprintf(stderr, "can't handle vert prog without position output\n");
576      }
577      return GL_FALSE;
578   }
579   if (free_inputs & 1) {
580      if (R200_DEBUG & DEBUG_FALLBACKS) {
581	 fprintf(stderr, "can't handle vert prog without position input\n");
582      }
583      return GL_FALSE;
584   }
585
586   o_inst = vp->instr;
587   for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
588      operands = op_operands(vpi->Opcode);
589      are_srcs_scalar = operands & SCALAR_FLAG;
590      operands &= OP_MASK;
591
592      for(i = 0; i < operands; i++) {
593	 src[i] = vpi->SrcReg[i];
594	 /* hack up default attrib values as per spec as swizzling.
595	    normal, fog, secondary color. Crazy?
596	    May need more if we don't submit vec4 elements? */
597	 if (src[i].File == PROGRAM_INPUT) {
598	    if (src[i].Index == VERT_ATTRIB_NORMAL) {
599	       int j;
600	       for (j = 0; j < 4; j++) {
601		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
602		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
603		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
604		  }
605	       }
606	    }
607	    else if (src[i].Index == VERT_ATTRIB_COLOR1) {
608	       int j;
609	       for (j = 0; j < 4; j++) {
610		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
611		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
612		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
613		  }
614	       }
615	    }
616	    else if (src[i].Index == VERT_ATTRIB_FOG) {
617	       int j;
618	       for (j = 0; j < 4; j++) {
619		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
620		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
621		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
622		  }
623		  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
624			    GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
625		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
626		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
627		  }
628	       }
629	    }
630	 }
631      }
632
633      if(operands == 3){
634	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
635	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
636		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
637		VSF_FLAG_ALL);
638
639	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
640		  SWIZZLE_X, SWIZZLE_Y,
641		  SWIZZLE_Z, SWIZZLE_W,
642		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
643
644	    o_inst->src1 = ZERO_SRC_0;
645	    o_inst->src2 = UNUSED_SRC_1;
646	    o_inst++;
647
648	    src[2].File = PROGRAM_TEMPORARY;
649	    src[2].Index = u_temp_i;
650	    src[2].RelAddr = 0;
651	    u_temp_i--;
652	 }
653      }
654
655      if(operands >= 2){
656	 if( CMP_SRCS(src[1], src[0]) ){
657	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
658		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
659		VSF_FLAG_ALL);
660
661	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
662		  SWIZZLE_X, SWIZZLE_Y,
663		  SWIZZLE_Z, SWIZZLE_W,
664		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
665
666	    o_inst->src1 = ZERO_SRC_0;
667	    o_inst->src2 = UNUSED_SRC_1;
668	    o_inst++;
669
670	    src[0].File = PROGRAM_TEMPORARY;
671	    src[0].Index = u_temp_i;
672	    src[0].RelAddr = 0;
673	    u_temp_i--;
674	 }
675      }
676
677      dst = vpi->DstReg;
678      if (dst.File == PROGRAM_OUTPUT &&
679	  dst.Index == VERT_RESULT_FOGC &&
680	  dst.WriteMask & WRITEMASK_X) {
681	  fog_temp_i = u_temp_i;
682	  dst.File = PROGRAM_TEMPORARY;
683	  dst.Index = fog_temp_i;
684	  dofogfix = 1;
685	  u_temp_i--;
686      }
687
688      /* These ops need special handling. */
689      switch(vpi->Opcode){
690      case OPCODE_POW:
691/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
692   So may need to insert additional instruction */
693	 if ((src[0].File == src[1].File) &&
694	     (src[0].Index == src[1].Index)) {
695	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
696		   t_dst_mask(dst.WriteMask));
697	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
698		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
699		   SWIZZLE_ZERO,
700		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
701		   SWIZZLE_ZERO,
702		   t_src_class(src[0].File),
703		   src[0].NegateBase) | (src[0].RelAddr << 4);
704	    o_inst->src1 = UNUSED_SRC_0;
705	    o_inst->src2 = UNUSED_SRC_0;
706	 }
707	 else {
708	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
709		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
710		   VSF_FLAG_ALL);
711	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
712		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
713		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
714		   t_src_class(src[0].File),
715		   src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
716	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
717		   SWIZZLE_ZERO, SWIZZLE_ZERO,
718		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
719		   t_src_class(src[1].File),
720		   src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
721	    o_inst->src2 = UNUSED_SRC_1;
722	    o_inst++;
723
724	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
725		   t_dst_mask(dst.WriteMask));
726	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
727		   VSF_IN_COMPONENT_X,
728		   VSF_IN_COMPONENT_Y,
729		   VSF_IN_COMPONENT_Z,
730		   VSF_IN_COMPONENT_W,
731		   VSF_IN_CLASS_TMP,
732		   VSF_FLAG_NONE);
733	    o_inst->src1 = UNUSED_SRC_0;
734	    o_inst->src2 = UNUSED_SRC_0;
735	    u_temp_i--;
736	 }
737	 goto next;
738
739      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
740      case OPCODE_SWZ:
741	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
742		t_dst_mask(dst.WriteMask));
743	 o_inst->src0 = t_src(vp, &src[0]);
744	 o_inst->src1 = ZERO_SRC_0;
745	 o_inst->src2 = UNUSED_SRC_1;
746	 goto next;
747
748      case OPCODE_MAD:
749	 /* only 2 read ports into temp memory thus may need the macro op MAD_2
750	    instead (requiring 2 clocks) if all inputs are in temp memory
751	    (and, only if they actually reference 3 distinct temps) */
752	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
753	    src[1].File == PROGRAM_TEMPORARY &&
754	    src[2].File == PROGRAM_TEMPORARY &&
755	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
756	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
757	    (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
758	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
759
760	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
761	    t_dst_mask(dst.WriteMask));
762	 o_inst->src0 = t_src(vp, &src[0]);
763#if 0
764if ((o_inst - vp->instr) == 31) {
765/* fix up the broken vertex program of quake4 demo... */
766o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
767			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
768			t_src_class(src[1].File),
769			src[1].NegateBase) | (src[1].RelAddr << 4);
770o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
771			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
772			t_src_class(src[1].File),
773			src[1].NegateBase) | (src[1].RelAddr << 4);
774}
775else {
776	 o_inst->src1 = t_src(vp, &src[1]);
777	 o_inst->src2 = t_src(vp, &src[2]);
778}
779#else
780	 o_inst->src1 = t_src(vp, &src[1]);
781	 o_inst->src2 = t_src(vp, &src[2]);
782#endif
783	 goto next;
784
785      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
786	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
787		t_dst_mask(dst.WriteMask));
788
789	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
790		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
791		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
792		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
793		SWIZZLE_ZERO,
794		t_src_class(src[0].File),
795		src[0].NegateBase) | (src[0].RelAddr << 4);
796
797	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
798		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
799		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
800		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
801		SWIZZLE_ZERO,
802		t_src_class(src[1].File),
803		src[1].NegateBase) | (src[1].RelAddr << 4);
804
805	 o_inst->src2 = UNUSED_SRC_1;
806	 goto next;
807
808      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
809	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
810		t_dst_mask(dst.WriteMask));
811
812	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
813		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
814		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
815		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
816		VSF_IN_COMPONENT_ONE,
817		t_src_class(src[0].File),
818		src[0].NegateBase) | (src[0].RelAddr << 4);
819	 o_inst->src1 = t_src(vp, &src[1]);
820	 o_inst->src2 = UNUSED_SRC_1;
821	 goto next;
822
823      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
824	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
825		t_dst_mask(dst.WriteMask));
826
827	 o_inst->src0 = t_src(vp, &src[0]);
828	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
829		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
830		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
831		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
832		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
833		t_src_class(src[1].File),
834		(!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
835	 o_inst->src2 = UNUSED_SRC_1;
836	 goto next;
837
838      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
839	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
840		t_dst_mask(dst.WriteMask));
841
842	 o_inst->src0=t_src(vp, &src[0]);
843	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
844		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
845		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
846		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
847		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
848		t_src_class(src[0].File),
849		(!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
850	 o_inst->src2 = UNUSED_SRC_1;
851	 goto next;
852
853      case OPCODE_FLR:
854      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
855         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
856
857	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
858	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
859	    t_dst_mask(dst.WriteMask));
860
861	 o_inst->src0 = t_src(vp, &src[0]);
862	 o_inst->src1 = UNUSED_SRC_0;
863	 o_inst->src2 = UNUSED_SRC_1;
864	 o_inst++;
865
866	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
867		t_dst_mask(dst.WriteMask));
868
869	 o_inst->src0 = t_src(vp, &src[0]);
870	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
871		VSF_IN_COMPONENT_X,
872		VSF_IN_COMPONENT_Y,
873		VSF_IN_COMPONENT_Z,
874		VSF_IN_COMPONENT_W,
875		VSF_IN_CLASS_TMP,
876		/* Not 100% sure about this */
877		(!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
878
879	 o_inst->src2 = UNUSED_SRC_0;
880	 u_temp_i--;
881	 goto next;
882
883      case OPCODE_XPD:
884	 /* mul r0, r1.yzxw, r2.zxyw
885	    mad r0, -r2.yzxw, r1.zxyw, r0
886	  */
887	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
888	    src[1].File == PROGRAM_TEMPORARY &&
889	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
890	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
891
892	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
893	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
894	    t_dst_mask(dst.WriteMask));
895
896	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
897		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
898		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
899		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
900		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
901		t_src_class(src[0].File),
902		src[0].NegateBase) | (src[0].RelAddr << 4);
903
904	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
905		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
906		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
907		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
908		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
909		t_src_class(src[1].File),
910		src[1].NegateBase) | (src[1].RelAddr << 4);
911
912	 o_inst->src2 = UNUSED_SRC_1;
913	 o_inst++;
914	 u_temp_i--;
915
916	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
917		t_dst_mask(dst.WriteMask));
918
919	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
920		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
921		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
922		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
923		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
924		t_src_class(src[1].File),
925		(!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
926
927	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
928		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
929		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
930		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
931		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
932		t_src_class(src[0].File),
933		src[0].NegateBase) | (src[0].RelAddr << 4);
934
935	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
936		VSF_IN_COMPONENT_X,
937		VSF_IN_COMPONENT_Y,
938		VSF_IN_COMPONENT_Z,
939		VSF_IN_COMPONENT_W,
940		VSF_IN_CLASS_TMP,
941		VSF_FLAG_NONE);
942	 goto next;
943
944      case OPCODE_END:
945	 assert(0);
946      default:
947	 break;
948      }
949
950      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
951	    t_dst_mask(dst.WriteMask));
952
953      if(are_srcs_scalar){
954	 switch(operands){
955	    case 1:
956		o_inst->src0 = t_src_scalar(vp, &src[0]);
957		o_inst->src1 = UNUSED_SRC_0;
958		o_inst->src2 = UNUSED_SRC_1;
959	    break;
960
961	    case 2:
962		o_inst->src0 = t_src_scalar(vp, &src[0]);
963		o_inst->src1 = t_src_scalar(vp, &src[1]);
964		o_inst->src2 = UNUSED_SRC_1;
965	    break;
966
967	    case 3:
968		o_inst->src0 = t_src_scalar(vp, &src[0]);
969		o_inst->src1 = t_src_scalar(vp, &src[1]);
970		o_inst->src2 = t_src_scalar(vp, &src[2]);
971	    break;
972
973	    default:
974		fprintf(stderr, "illegal number of operands %lu\n", operands);
975		exit(-1);
976	    break;
977	 }
978      } else {
979	 switch(operands){
980	    case 1:
981		o_inst->src0 = t_src(vp, &src[0]);
982		o_inst->src1 = UNUSED_SRC_0;
983		o_inst->src2 = UNUSED_SRC_1;
984	    break;
985
986	    case 2:
987		o_inst->src0 = t_src(vp, &src[0]);
988		o_inst->src1 = t_src(vp, &src[1]);
989		o_inst->src2 = UNUSED_SRC_1;
990	    break;
991
992	    case 3:
993		o_inst->src0 = t_src(vp, &src[0]);
994		o_inst->src1 = t_src(vp, &src[1]);
995		o_inst->src2 = t_src(vp, &src[2]);
996	    break;
997
998	    default:
999		fprintf(stderr, "illegal number of operands %lu\n", operands);
1000		exit(-1);
1001	    break;
1002	 }
1003      }
1004      next:
1005
1006      if (dofogfix) {
1007	 o_inst++;
1008	 if (vp->fogmode == GL_EXP) {
1009	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1010		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1011		VSF_FLAG_X);
1012	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1013	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1014	    o_inst->src2 = UNUSED_SRC_1;
1015	    o_inst++;
1016	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1017		R200_VSF_OUT_CLASS_RESULT_FOGC,
1018		VSF_FLAG_X);
1019	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1020	    o_inst->src1 = UNUSED_SRC_0;
1021	    o_inst->src2 = UNUSED_SRC_1;
1022	 }
1023	 else if (vp->fogmode == GL_EXP2) {
1024	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1025		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1026		VSF_FLAG_X);
1027	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1028	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1029	    o_inst->src2 = UNUSED_SRC_1;
1030	    o_inst++;
1031	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1032		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1033		VSF_FLAG_X);
1034	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1035	    o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1036	    o_inst->src2 = UNUSED_SRC_1;
1037	    o_inst++;
1038	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1039		R200_VSF_OUT_CLASS_RESULT_FOGC,
1040		VSF_FLAG_X);
1041	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1042	    o_inst->src1 = UNUSED_SRC_0;
1043	    o_inst->src2 = UNUSED_SRC_1;
1044	 }
1045	 else { /* fogmode == GL_LINEAR */
1046		/* could do that with single op (dot) if using params like
1047		   with fixed function pipeline fog */
1048	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1049		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1050		VSF_FLAG_X);
1051	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1052	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1053	    o_inst->src2 = UNUSED_SRC_1;
1054	    o_inst++;
1055	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1056		R200_VSF_OUT_CLASS_RESULT_FOGC,
1057		VSF_FLAG_X);
1058	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1059	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1060	    o_inst->src2 = UNUSED_SRC_1;
1061
1062	 }
1063         dofogfix = 0;
1064      }
1065
1066      u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1067      if (mesa_vp->Base.NumNativeTemporaries <
1068	 (mesa_vp->Base.NumTemporaries + u_temp_used)) {
1069	 mesa_vp->Base.NumNativeTemporaries =
1070	    mesa_vp->Base.NumTemporaries + u_temp_used;
1071      }
1072      if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1073	 if (R200_DEBUG & DEBUG_FALLBACKS) {
1074	    fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
1075	 }
1076	 return GL_FALSE;
1077      }
1078      u_temp_i = R200_VSF_MAX_TEMPS - 1;
1079      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1080	 mesa_vp->Base.NumNativeInstructions = 129;
1081	 if (R200_DEBUG & DEBUG_FALLBACKS) {
1082	    fprintf(stderr, "more than 128 native instructions\n");
1083	 }
1084	 return GL_FALSE;
1085      }
1086      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1087	 vp->pos_end = (o_inst - vp->instr);
1088      }
1089   }
1090
1091   vp->native = GL_TRUE;
1092   mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1093#if 0
1094   fprintf(stderr, "hw program:\n");
1095   for(i=0; i < vp->program.length; i++)
1096      fprintf(stderr, "%08x\n", vp->instr[i]);
1097#endif
1098   return GL_TRUE;
1099}
1100
1101void r200SetupVertexProg( GLcontext *ctx ) {
1102   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1103   struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1104   GLboolean fallback;
1105   GLint i;
1106
1107   if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1108      rmesa->curr_vp_hw = NULL;
1109      r200_translate_vertex_program(ctx, vp);
1110   }
1111   /* could optimize setting up vertex progs away for non-tcl hw */
1112   fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
1113      rmesa->r200Screen->drmSupportsVertexProgram);
1114   TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1115   if (rmesa->TclFallback) return;
1116
1117   R200_STATECHANGE( rmesa, vap );
1118   /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1119             maybe only when using more than 64 inst / 96 param? */
1120   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1121
1122   R200_STATECHANGE( rmesa, pvs );
1123
1124   rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1125      ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1126      (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1127   rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1128      (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1129
1130   /* maybe user clip planes just work with vertex progs... untested */
1131   if (ctx->Transform.ClipPlanesEnabled) {
1132      R200_STATECHANGE( rmesa, tcl );
1133      if (vp->mesa_program.IsPositionInvariant) {
1134	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1135      }
1136      else {
1137	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1138      }
1139   }
1140
1141   if (vp != rmesa->curr_vp_hw) {
1142      GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1143      drm_radeon_cmd_header_t tmp;
1144
1145      R200_STATECHANGE( rmesa, vpi[0] );
1146      R200_STATECHANGE( rmesa, vpi[1] );
1147
1148      /* FIXME: what about using a memcopy... */
1149      for (i = 0; (i < 64) && i < count; i++) {
1150	 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1151	 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1152	 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1153	 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1154      }
1155      /* hack up the cmd_size so not the whole state atom is emitted always.
1156         This may require some more thought, we may emit half progs on lost state, but
1157         hopefully it won't matter?
1158         WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1159         packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1160      rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1161      tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1162      tmp.veclinear.count = (count > 64) ? 64 : count;
1163      rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1164      if (count > 64) {
1165	 for (i = 0; i < (count - 64); i++) {
1166	    rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1167	    rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1168	    rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1169	    rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1170	 }
1171	 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1172	 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1173	 tmp.veclinear.count = count - 64;
1174	 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1175      }
1176      rmesa->curr_vp_hw = vp;
1177   }
1178}
1179
1180
1181static void
1182r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1183{
1184   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1185
1186   switch(target){
1187   case GL_VERTEX_PROGRAM_ARB:
1188      rmesa->curr_vp_hw = NULL;
1189      break;
1190   default:
1191      _mesa_problem(ctx, "Target not supported yet!");
1192      break;
1193   }
1194}
1195
1196static struct gl_program *
1197r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1198{
1199   struct r200_vertex_program *vp;
1200
1201   switch(target){
1202   case GL_VERTEX_PROGRAM_ARB:
1203      vp = CALLOC_STRUCT(r200_vertex_program);
1204      return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1205   case GL_FRAGMENT_PROGRAM_ARB:
1206   case GL_FRAGMENT_PROGRAM_NV:
1207      return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1208   default:
1209      _mesa_problem(ctx, "Bad target in r200NewProgram");
1210   }
1211   return NULL;
1212}
1213
1214
1215static void
1216r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1217{
1218   _mesa_delete_program(ctx, prog);
1219}
1220
1221static void
1222r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1223{
1224   struct r200_vertex_program *vp = (void *)prog;
1225   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1226
1227   switch(target) {
1228   case GL_VERTEX_PROGRAM_ARB:
1229      vp->translated = GL_FALSE;
1230      vp->fogpidx = 0;
1231/*      memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1232      r200_translate_vertex_program(ctx, vp);
1233      rmesa->curr_vp_hw = NULL;
1234      break;
1235   case GL_FRAGMENT_SHADER_ATI:
1236      rmesa->afs_loaded = NULL;
1237      break;
1238   }
1239   /* need this for tcl fallbacks */
1240   _tnl_program_string(ctx, target, prog);
1241}
1242
1243static GLboolean
1244r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1245{
1246   struct r200_vertex_program *vp = (void *)prog;
1247
1248   switch(target){
1249   case GL_VERTEX_STATE_PROGRAM_NV:
1250   case GL_VERTEX_PROGRAM_ARB:
1251      if (!vp->translated) {
1252	 r200_translate_vertex_program(ctx, vp);
1253      }
1254     /* does not take parameters etc. into account */
1255      return vp->native;
1256   default:
1257      _mesa_problem(ctx, "Bad target in r200NewProgram");
1258   }
1259   return 0;
1260}
1261
1262void r200InitShaderFuncs(struct dd_function_table *functions)
1263{
1264   functions->NewProgram = r200NewProgram;
1265   functions->BindProgram = r200BindProgram;
1266   functions->DeleteProgram = r200DeleteProgram;
1267   functions->ProgramStringNotify = r200ProgramStringNotify;
1268   functions->IsProgramNative = r200IsProgramNative;
1269}
1270