1/**************************************************************************
2
3Copyright (C) 2005 Aapo Tahkola.
4
5All Rights Reserved.
6
7Permission is hereby granted, free of charge, to any person obtaining a
8copy of this software and associated documentation files (the "Software"),
9to deal in the Software without restriction, including without limitation
10on the rights to use, copy, modify, merge, publish, distribute, sub
11license, and/or sell copies of the Software, and to permit persons to whom
12the Software is furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice (including the next
15paragraph) shall be included in all copies or substantial portions of the
16Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26**************************************************************************/
27
28/*
29 * Authors:
30 *   Aapo Tahkola <aet@rasterburn.org>
31 *   Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36#include "program/program.h"
37#include "program/prog_instruction.h"
38#include "program/prog_parameter.h"
39#include "program/prog_statevars.h"
40#include "program/programopt.h"
41#include "tnl/tnl.h"
42
43#include "r200_context.h"
44#include "r200_vertprog.h"
45#include "r200_ioctl.h"
46#include "r200_tcl.h"
47
48#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51    SWIZZLE_W != VSF_IN_COMPONENT_W || \
52    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54    WRITEMASK_X != VSF_FLAG_X || \
55    WRITEMASK_Y != VSF_FLAG_Y || \
56    WRITEMASK_Z != VSF_FLAG_Z || \
57    WRITEMASK_W != VSF_FLAG_W
58#error Cannot change these!
59#endif
60
61#define SCALAR_FLAG (1<<31)
62#define FLAG_MASK (1<<31)
63#define OP_MASK (0xf)  /* we are unlikely to have more than 15 */
64#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66static struct{
67   char *name;
68   int opcode;
69   unsigned long ip; /* number of input operands and flags */
70}op_names[]={
71   OPN(ABS, 1),
72   OPN(ADD, 2),
73   OPN(ARL, 1|SCALAR_FLAG),
74   OPN(DP3, 2),
75   OPN(DP4, 2),
76   OPN(DPH, 2),
77   OPN(DST, 2),
78   OPN(EX2, 1|SCALAR_FLAG),
79   OPN(EXP, 1|SCALAR_FLAG),
80   OPN(FLR, 1),
81   OPN(FRC, 1),
82   OPN(LG2, 1|SCALAR_FLAG),
83   OPN(LIT, 1),
84   OPN(LOG, 1|SCALAR_FLAG),
85   OPN(MAD, 3),
86   OPN(MAX, 2),
87   OPN(MIN, 2),
88   OPN(MOV, 1),
89   OPN(MUL, 2),
90   OPN(POW, 2|SCALAR_FLAG),
91   OPN(RCP, 1|SCALAR_FLAG),
92   OPN(RSQ, 1|SCALAR_FLAG),
93   OPN(SGE, 2),
94   OPN(SLT, 2),
95   OPN(SUB, 2),
96   OPN(SWZ, 1),
97   OPN(XPD, 2),
98   OPN(PRINT, 0),
99   OPN(END, 0),
100};
101#undef OPN
102
103static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
104{
105   r200ContextPtr rmesa = R200_CONTEXT( ctx );
106   GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
107   int pi;
108   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
109   struct gl_program_parameter_list *paramList;
110   drm_radeon_cmd_header_t tmp;
111
112   R200_STATECHANGE( rmesa, vpp[0] );
113   R200_STATECHANGE( rmesa, vpp[1] );
114   assert(mesa_vp->Base.Parameters);
115   _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
116   paramList = mesa_vp->Base.Parameters;
117
118   if(paramList->NumParameters > R200_VSF_MAX_PARAM){
119      fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
120      return GL_FALSE;
121   }
122
123   for(pi = 0; pi < paramList->NumParameters; pi++) {
124      switch(paramList->Parameters[pi].Type) {
125      case PROGRAM_STATE_VAR:
126      case PROGRAM_NAMED_PARAM:
127      //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
128      case PROGRAM_CONSTANT:
129	 *fcmd++ = paramList->ParameterValues[pi][0].f;
130	 *fcmd++ = paramList->ParameterValues[pi][1].f;
131	 *fcmd++ = paramList->ParameterValues[pi][2].f;
132	 *fcmd++ = paramList->ParameterValues[pi][3].f;
133	 break;
134      default:
135	 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
136	 break;
137      }
138      if (pi == 95) {
139	 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
140      }
141   }
142   /* hack up the cmd_size so not the whole state atom is emitted always. */
143   rmesa->hw.vpp[0].cmd_size =
144      1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
145   tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
146   tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
147   rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
148   if (paramList->NumParameters > 96) {
149      rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
150      tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
151      tmp.veclinear.count = paramList->NumParameters - 96;
152      rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
153   }
154   return GL_TRUE;
155}
156
157static INLINE unsigned long t_dst_mask(GLuint mask)
158{
159   /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160   return mask & VSF_FLAG_ALL;
161}
162
163static unsigned long t_dst(struct prog_dst_register *dst)
164{
165   switch(dst->File) {
166   case PROGRAM_TEMPORARY:
167      return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
168	 | R200_VSF_OUT_CLASS_TMP);
169   case PROGRAM_OUTPUT:
170      switch (dst->Index) {
171      case VERT_RESULT_HPOS:
172	 return R200_VSF_OUT_CLASS_RESULT_POS;
173      case VERT_RESULT_COL0:
174	 return R200_VSF_OUT_CLASS_RESULT_COLOR;
175      case VERT_RESULT_COL1:
176	 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
177	    | R200_VSF_OUT_CLASS_RESULT_COLOR);
178      case VERT_RESULT_FOGC:
179	 return R200_VSF_OUT_CLASS_RESULT_FOGC;
180      case VERT_RESULT_TEX0:
181      case VERT_RESULT_TEX1:
182      case VERT_RESULT_TEX2:
183      case VERT_RESULT_TEX3:
184      case VERT_RESULT_TEX4:
185      case VERT_RESULT_TEX5:
186	 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
187	    | R200_VSF_OUT_CLASS_RESULT_TEXC);
188      case VERT_RESULT_PSIZ:
189	 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
190      default:
191	 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
192	 exit(0);
193	 return 0;
194      }
195   case PROGRAM_ADDRESS:
196      assert (dst->Index == 0);
197      return R200_VSF_OUT_CLASS_ADDR;
198   default:
199      fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
200      exit(0);
201      return 0;
202   }
203}
204
205static unsigned long t_src_class(gl_register_file file)
206{
207
208   switch(file){
209   case PROGRAM_TEMPORARY:
210      return VSF_IN_CLASS_TMP;
211
212   case PROGRAM_INPUT:
213      return VSF_IN_CLASS_ATTR;
214
215   case PROGRAM_LOCAL_PARAM:
216   case PROGRAM_ENV_PARAM:
217   case PROGRAM_NAMED_PARAM:
218   case PROGRAM_CONSTANT:
219   case PROGRAM_STATE_VAR:
220      return VSF_IN_CLASS_PARAM;
221   /*
222   case PROGRAM_OUTPUT:
223   case PROGRAM_WRITE_ONLY:
224   case PROGRAM_ADDRESS:
225   */
226   default:
227      fprintf(stderr, "problem in %s", __FUNCTION__);
228      exit(0);
229   }
230}
231
232static INLINE unsigned long t_swizzle(GLubyte swizzle)
233{
234/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
235   return swizzle;
236}
237
238#if 0
239static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
240{
241   int i;
242
243   if(vp == NULL){
244      fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
245      return ;
246   }
247
248   fprintf(stderr, "%s:<", caller);
249   for(i=0; i < VERT_ATTRIB_MAX; i++)
250   fprintf(stderr, "%d ", vp->inputs[i]);
251   fprintf(stderr, ">\n");
252
253}
254#endif
255
256static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
257{
258/*
259   int i;
260   int max_reg = -1;
261*/
262   if(src->File == PROGRAM_INPUT){
263/*      if(vp->inputs[src->Index] != -1)
264	 return vp->inputs[src->Index];
265
266      for(i=0; i < VERT_ATTRIB_MAX; i++)
267	 if(vp->inputs[i] > max_reg)
268	    max_reg = vp->inputs[i];
269
270      vp->inputs[src->Index] = max_reg+1;*/
271
272      //vp_dump_inputs(vp, __FUNCTION__);
273      assert(vp->inputs[src->Index] != -1);
274      return vp->inputs[src->Index];
275   } else {
276      if (src->Index < 0) {
277	 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
278	 return 0;
279      }
280      return src->Index;
281   }
282}
283
284static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
285{
286
287   return MAKE_VSF_SOURCE(t_src_index(vp, src),
288			t_swizzle(GET_SWZ(src->Swizzle, 0)),
289			t_swizzle(GET_SWZ(src->Swizzle, 1)),
290			t_swizzle(GET_SWZ(src->Swizzle, 2)),
291			t_swizzle(GET_SWZ(src->Swizzle, 3)),
292			t_src_class(src->File),
293			src->Negate) | (src->RelAddr << 4);
294}
295
296static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
297{
298
299   return MAKE_VSF_SOURCE(t_src_index(vp, src),
300			t_swizzle(GET_SWZ(src->Swizzle, 0)),
301			t_swizzle(GET_SWZ(src->Swizzle, 0)),
302			t_swizzle(GET_SWZ(src->Swizzle, 0)),
303			t_swizzle(GET_SWZ(src->Swizzle, 0)),
304			t_src_class(src->File),
305			src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
306}
307
308static unsigned long t_opcode(enum prog_opcode opcode)
309{
310
311   switch(opcode){
312   case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
313   /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
314    * seems to ignore neg offsets which isn't quite correct...
315    */
316   case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
317   case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
318   case OPCODE_DST: return R200_VPI_OUT_OP_DST;
319   case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
320   case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
321   case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
322   case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
323   case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
324   case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
325   case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
326   case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
327   case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
328   case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
329   case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
330   case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
331   case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
332
333   default:
334      fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
335   }
336   exit(-1);
337   return 0;
338}
339
340static unsigned long op_operands(enum prog_opcode opcode)
341{
342   int i;
343
344   /* Can we trust mesas opcodes to be in order ? */
345   for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
346      if(op_names[i].opcode == opcode)
347	 return op_names[i].ip;
348
349   fprintf(stderr, "op %d not found in op_names\n", opcode);
350   exit(-1);
351   return 0;
352}
353
354/* TODO: Get rid of t_src_class call */
355#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
356		       ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
357			 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
358			(t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
359			 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
360
361/* fglrx on rv250 codes up unused sources as follows:
362   unused but necessary sources are same as previous source, zero-ed out.
363   unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
364   i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
365   set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
366
367/* use these simpler definitions. Must obviously not be used with not yet set up regs.
368   Those are NOT semantically equivalent to the r300 ones, requires code changes */
369#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
370				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
371				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
372				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
373				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
374
375#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
376				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
377				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
378				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
379				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
380
381#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
382				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
383				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
384				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
385				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
386
387#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
388
389#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
390
391#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
392
393
394/**
395 * Generate an R200 vertex program from Mesa's internal representation.
396 *
397 * \return  GL_TRUE for success, GL_FALSE for failure.
398 */
399static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
400{
401   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
402   struct prog_instruction *vpi;
403   int i;
404   VERTEX_SHADER_INSTRUCTION *o_inst;
405   unsigned long operands;
406   int are_srcs_scalar;
407   unsigned long hw_op;
408   int dofogfix = 0;
409   int fog_temp_i = 0;
410   int free_inputs;
411   int array_count = 0;
412   int u_temp_used;
413
414   vp->native = GL_FALSE;
415   vp->translated = GL_TRUE;
416   vp->fogmode = ctx->Fog.Mode;
417
418   if (mesa_vp->Base.NumInstructions == 0)
419      return GL_FALSE;
420
421#if 0
422   if ((mesa_vp->Base.InputsRead &
423      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
424      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
425      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
426      if (R200_DEBUG & RADEON_FALLBACKS) {
427	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
428	    mesa_vp->Base.InputsRead);
429      }
430      return GL_FALSE;
431   }
432#endif
433
434   if ((mesa_vp->Base.OutputsWritten &
435      ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) |
436      (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
437      (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
438      (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
439      if (R200_DEBUG & RADEON_FALLBACKS) {
440	 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
441                 (unsigned long long) mesa_vp->Base.OutputsWritten);
442      }
443      return GL_FALSE;
444   }
445
446   if (mesa_vp->IsNVProgram) {
447   /* subtle differences in spec like guaranteed initialized regs could cause
448      headaches. Might want to remove the driconf option to enable it completely */
449      return GL_FALSE;
450   }
451   /* Initial value should be last tmp reg that hw supports.
452      Strangely enough r300 doesnt mind even though these would be out of range.
453      Smart enough to realize that it doesnt need it? */
454   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
455   struct prog_src_register src[3];
456   struct prog_dst_register dst;
457
458/* FIXME: is changing the prog safe to do here? */
459   if (mesa_vp->IsPositionInvariant &&
460      /* make sure we only do this once */
461       !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
462	 _mesa_insert_mvp_code(ctx, mesa_vp);
463      }
464
465   /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
466      base e isn't directly available neither. */
467   if ((mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) && !vp->fogpidx) {
468      struct gl_program_parameter_list *paramList;
469      gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
470      paramList = mesa_vp->Base.Parameters;
471      vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
472   }
473
474   vp->pos_end = 0;
475   mesa_vp->Base.NumNativeInstructions = 0;
476   if (mesa_vp->Base.Parameters)
477      mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
478   else
479      mesa_vp->Base.NumNativeParameters = 0;
480
481   for(i = 0; i < VERT_ATTRIB_MAX; i++)
482      vp->inputs[i] = -1;
483   for(i = 0; i < 15; i++)
484      vp->inputmap_rev[i] = 255;
485   free_inputs = 0x2ffd;
486
487/* fglrx uses fixed inputs as follows for conventional attribs.
488   generic attribs use non-fixed assignment, fglrx will always use the
489   lowest attrib values available. We'll just do the same.
490   There are 12 generic attribs possible, corresponding to attrib 0, 2-11
491   and 13 in a hw vertex prog.
492   attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
493   (correspond to vertex normal/weight - maybe weight actually could be made vec4).
494   Additionally, not more than 12 arrays in total are possible I think.
495   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
496   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
497   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
498   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
499*/
500
501/* attr 4,5 and 13 are only used with generic attribs.
502   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
503   not possibe to use with vertex progs as it is lacking in vert prog specification) */
504/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
505   if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
506      vp->inputs[VERT_ATTRIB_POS] = 0;
507      vp->inputmap_rev[0] = VERT_ATTRIB_POS;
508      free_inputs &= ~(1 << 0);
509      array_count++;
510   }
511   if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
512      vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
513      vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
514      array_count++;
515   }
516   if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
517      vp->inputs[VERT_ATTRIB_NORMAL] = 1;
518      vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
519      array_count++;
520   }
521   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
522      vp->inputs[VERT_ATTRIB_COLOR0] = 2;
523      vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
524      free_inputs &= ~(1 << 2);
525      array_count++;
526   }
527   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
528      vp->inputs[VERT_ATTRIB_COLOR1] = 3;
529      vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
530      free_inputs &= ~(1 << 3);
531      array_count++;
532   }
533   if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
534      vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
535      vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
536      array_count++;
537   }
538   /* VERT_ATTRIB_TEX0-5 */
539   for (i = 0; i <= 5; i++) {
540      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
541	 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
542	 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
543	 free_inputs &= ~(1 << (i + 6));
544	 array_count++;
545      }
546   }
547   /* using VERT_ATTRIB_TEX6/7 would be illegal */
548   for (; i < VERT_ATTRIB_TEX_MAX; i++) {
549      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
550          if (R200_DEBUG & RADEON_FALLBACKS) {
551              fprintf(stderr, "texture attribute %d in vert prog\n", i);
552          }
553          return GL_FALSE;
554      }
555   }
556   /* completely ignore aliasing? */
557   for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
558      int j;
559   /* completely ignore aliasing? */
560      if (mesa_vp->Base.InputsRead & VERT_BIT_GENERIC(i)) {
561	 array_count++;
562	 if (array_count > 12) {
563	    if (R200_DEBUG & RADEON_FALLBACKS) {
564	       fprintf(stderr, "more than 12 attribs used in vert prog\n");
565	    }
566	    return GL_FALSE;
567	 }
568	 for (j = 0; j < 14; j++) {
569	    /* will always find one due to limited array_count */
570	    if (free_inputs & (1 << j)) {
571	       free_inputs &= ~(1 << j);
572	       vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
573	       if (j == 0) {
574                  /* mapped to pos */
575                  vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
576	       } else if (j < 12) {
577                  /* mapped to col/tex */
578                  vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
579	       } else {
580                  /* mapped to pos1 */
581                  vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
582               }
583	       break;
584	    }
585	 }
586      }
587   }
588
589   if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
590      if (R200_DEBUG & RADEON_FALLBACKS) {
591	 fprintf(stderr, "can't handle vert prog without position output\n");
592      }
593      return GL_FALSE;
594   }
595   if (free_inputs & 1) {
596      if (R200_DEBUG & RADEON_FALLBACKS) {
597	 fprintf(stderr, "can't handle vert prog without position input\n");
598      }
599      return GL_FALSE;
600   }
601
602   o_inst = vp->instr;
603   for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
604      operands = op_operands(vpi->Opcode);
605      are_srcs_scalar = operands & SCALAR_FLAG;
606      operands &= OP_MASK;
607
608      for(i = 0; i < operands; i++) {
609	 src[i] = vpi->SrcReg[i];
610	 /* hack up default attrib values as per spec as swizzling.
611	    normal, fog, secondary color. Crazy?
612	    May need more if we don't submit vec4 elements? */
613	 if (src[i].File == PROGRAM_INPUT) {
614	    if (src[i].Index == VERT_ATTRIB_NORMAL) {
615	       int j;
616	       for (j = 0; j < 4; j++) {
617		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
618		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
619		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
620		  }
621	       }
622	    }
623	    else if (src[i].Index == VERT_ATTRIB_COLOR1) {
624	       int j;
625	       for (j = 0; j < 4; j++) {
626		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
627		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
628		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
629		  }
630	       }
631	    }
632	    else if (src[i].Index == VERT_ATTRIB_FOG) {
633	       int j;
634	       for (j = 0; j < 4; j++) {
635		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
636		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
637		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
638		  }
639		  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
640			    GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
641		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
642		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
643		  }
644	       }
645	    }
646	 }
647      }
648
649      if(operands == 3){
650	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
651	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
652		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
653		VSF_FLAG_ALL);
654
655	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
656		  SWIZZLE_X, SWIZZLE_Y,
657		  SWIZZLE_Z, SWIZZLE_W,
658		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
659
660	    o_inst->src1 = ZERO_SRC_0;
661	    o_inst->src2 = UNUSED_SRC_1;
662	    o_inst++;
663
664	    src[2].File = PROGRAM_TEMPORARY;
665	    src[2].Index = u_temp_i;
666	    src[2].RelAddr = 0;
667	    u_temp_i--;
668	 }
669      }
670
671      if(operands >= 2){
672	 if( CMP_SRCS(src[1], src[0]) ){
673	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
674		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
675		VSF_FLAG_ALL);
676
677	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
678		  SWIZZLE_X, SWIZZLE_Y,
679		  SWIZZLE_Z, SWIZZLE_W,
680		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
681
682	    o_inst->src1 = ZERO_SRC_0;
683	    o_inst->src2 = UNUSED_SRC_1;
684	    o_inst++;
685
686	    src[0].File = PROGRAM_TEMPORARY;
687	    src[0].Index = u_temp_i;
688	    src[0].RelAddr = 0;
689	    u_temp_i--;
690	 }
691      }
692
693      dst = vpi->DstReg;
694      if (dst.File == PROGRAM_OUTPUT &&
695	  dst.Index == VERT_RESULT_FOGC &&
696	  dst.WriteMask & WRITEMASK_X) {
697	  fog_temp_i = u_temp_i;
698	  dst.File = PROGRAM_TEMPORARY;
699	  dst.Index = fog_temp_i;
700	  dofogfix = 1;
701	  u_temp_i--;
702      }
703
704      /* These ops need special handling. */
705      switch(vpi->Opcode){
706      case OPCODE_POW:
707/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
708   So may need to insert additional instruction */
709	 if ((src[0].File == src[1].File) &&
710	     (src[0].Index == src[1].Index)) {
711	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
712		   t_dst_mask(dst.WriteMask));
713	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
714		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
715		   SWIZZLE_ZERO,
716		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
717		   SWIZZLE_ZERO,
718		   t_src_class(src[0].File),
719		   src[0].Negate) | (src[0].RelAddr << 4);
720	    o_inst->src1 = UNUSED_SRC_0;
721	    o_inst->src2 = UNUSED_SRC_0;
722	 }
723	 else {
724	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
725		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
726		   VSF_FLAG_ALL);
727	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
728		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
729		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
730		   t_src_class(src[0].File),
731		   src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
732	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
733		   SWIZZLE_ZERO, SWIZZLE_ZERO,
734		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
735		   t_src_class(src[1].File),
736		   src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
737	    o_inst->src2 = UNUSED_SRC_1;
738	    o_inst++;
739
740	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
741		   t_dst_mask(dst.WriteMask));
742	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
743		   VSF_IN_COMPONENT_X,
744		   VSF_IN_COMPONENT_Y,
745		   VSF_IN_COMPONENT_Z,
746		   VSF_IN_COMPONENT_W,
747		   VSF_IN_CLASS_TMP,
748		   VSF_FLAG_NONE);
749	    o_inst->src1 = UNUSED_SRC_0;
750	    o_inst->src2 = UNUSED_SRC_0;
751	    u_temp_i--;
752	 }
753	 goto next;
754
755      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
756      case OPCODE_SWZ:
757	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
758		t_dst_mask(dst.WriteMask));
759	 o_inst->src0 = t_src(vp, &src[0]);
760	 o_inst->src1 = ZERO_SRC_0;
761	 o_inst->src2 = UNUSED_SRC_1;
762	 goto next;
763
764      case OPCODE_MAD:
765	 /* only 2 read ports into temp memory thus may need the macro op MAD_2
766	    instead (requiring 2 clocks) if all inputs are in temp memory
767	    (and, only if they actually reference 3 distinct temps) */
768	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
769	    src[1].File == PROGRAM_TEMPORARY &&
770	    src[2].File == PROGRAM_TEMPORARY &&
771	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
772	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
773	    (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
774	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
775
776	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
777	    t_dst_mask(dst.WriteMask));
778	 o_inst->src0 = t_src(vp, &src[0]);
779#if 0
780if ((o_inst - vp->instr) == 31) {
781/* fix up the broken vertex program of quake4 demo... */
782o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
783			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
784			t_src_class(src[1].File),
785			src[1].Negate) | (src[1].RelAddr << 4);
786o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
787			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
788			t_src_class(src[1].File),
789			src[1].Negate) | (src[1].RelAddr << 4);
790}
791else {
792	 o_inst->src1 = t_src(vp, &src[1]);
793	 o_inst->src2 = t_src(vp, &src[2]);
794}
795#else
796	 o_inst->src1 = t_src(vp, &src[1]);
797	 o_inst->src2 = t_src(vp, &src[2]);
798#endif
799	 goto next;
800
801      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
802	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
803		t_dst_mask(dst.WriteMask));
804
805	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
806		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
807		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
808		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
809		SWIZZLE_ZERO,
810		t_src_class(src[0].File),
811		src[0].Negate) | (src[0].RelAddr << 4);
812
813	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
814		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
815		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
816		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
817		SWIZZLE_ZERO,
818		t_src_class(src[1].File),
819		src[1].Negate) | (src[1].RelAddr << 4);
820
821	 o_inst->src2 = UNUSED_SRC_1;
822	 goto next;
823
824      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
825	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
826		t_dst_mask(dst.WriteMask));
827
828	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
829		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
830		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
831		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
832		VSF_IN_COMPONENT_ONE,
833		t_src_class(src[0].File),
834		src[0].Negate) | (src[0].RelAddr << 4);
835	 o_inst->src1 = t_src(vp, &src[1]);
836	 o_inst->src2 = UNUSED_SRC_1;
837	 goto next;
838
839      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
840	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
841		t_dst_mask(dst.WriteMask));
842
843	 o_inst->src0 = t_src(vp, &src[0]);
844	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
845		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
846		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
847		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
848		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
849		t_src_class(src[1].File),
850		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
851	 o_inst->src2 = UNUSED_SRC_1;
852	 goto next;
853
854      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
855	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
856		t_dst_mask(dst.WriteMask));
857
858	 o_inst->src0=t_src(vp, &src[0]);
859	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
860		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
861		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
862		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
863		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
864		t_src_class(src[0].File),
865		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
866	 o_inst->src2 = UNUSED_SRC_1;
867	 goto next;
868
869      case OPCODE_FLR:
870      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
871         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
872
873	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
874	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
875	    t_dst_mask(dst.WriteMask));
876
877	 o_inst->src0 = t_src(vp, &src[0]);
878	 o_inst->src1 = UNUSED_SRC_0;
879	 o_inst->src2 = UNUSED_SRC_1;
880	 o_inst++;
881
882	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
883		t_dst_mask(dst.WriteMask));
884
885	 o_inst->src0 = t_src(vp, &src[0]);
886	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
887		VSF_IN_COMPONENT_X,
888		VSF_IN_COMPONENT_Y,
889		VSF_IN_COMPONENT_Z,
890		VSF_IN_COMPONENT_W,
891		VSF_IN_CLASS_TMP,
892		/* Not 100% sure about this */
893		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
894
895	 o_inst->src2 = UNUSED_SRC_0;
896	 u_temp_i--;
897	 goto next;
898
899      case OPCODE_XPD:
900	 /* mul r0, r1.yzxw, r2.zxyw
901	    mad r0, -r2.yzxw, r1.zxyw, r0
902	  */
903	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
904	    src[1].File == PROGRAM_TEMPORARY &&
905	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
906	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
907
908	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
909	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
910	    t_dst_mask(dst.WriteMask));
911
912	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
913		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
914		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
915		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
916		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
917		t_src_class(src[0].File),
918		src[0].Negate) | (src[0].RelAddr << 4);
919
920	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
921		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
922		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
923		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
924		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
925		t_src_class(src[1].File),
926		src[1].Negate) | (src[1].RelAddr << 4);
927
928	 o_inst->src2 = UNUSED_SRC_1;
929	 o_inst++;
930	 u_temp_i--;
931
932	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
933		t_dst_mask(dst.WriteMask));
934
935	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
936		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
937		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
938		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
939		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
940		t_src_class(src[1].File),
941		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
942
943	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
944		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
945		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
946		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
947		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
948		t_src_class(src[0].File),
949		src[0].Negate) | (src[0].RelAddr << 4);
950
951	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
952		VSF_IN_COMPONENT_X,
953		VSF_IN_COMPONENT_Y,
954		VSF_IN_COMPONENT_Z,
955		VSF_IN_COMPONENT_W,
956		VSF_IN_CLASS_TMP,
957		VSF_FLAG_NONE);
958	 goto next;
959
960      case OPCODE_END:
961	 assert(0);
962      default:
963	 break;
964      }
965
966      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
967	    t_dst_mask(dst.WriteMask));
968
969      if(are_srcs_scalar){
970	 switch(operands){
971	    case 1:
972		o_inst->src0 = t_src_scalar(vp, &src[0]);
973		o_inst->src1 = UNUSED_SRC_0;
974		o_inst->src2 = UNUSED_SRC_1;
975	    break;
976
977	    case 2:
978		o_inst->src0 = t_src_scalar(vp, &src[0]);
979		o_inst->src1 = t_src_scalar(vp, &src[1]);
980		o_inst->src2 = UNUSED_SRC_1;
981	    break;
982
983	    case 3:
984		o_inst->src0 = t_src_scalar(vp, &src[0]);
985		o_inst->src1 = t_src_scalar(vp, &src[1]);
986		o_inst->src2 = t_src_scalar(vp, &src[2]);
987	    break;
988
989	    default:
990		fprintf(stderr, "illegal number of operands %lu\n", operands);
991		exit(-1);
992	    break;
993	 }
994      } else {
995	 switch(operands){
996	    case 1:
997		o_inst->src0 = t_src(vp, &src[0]);
998		o_inst->src1 = UNUSED_SRC_0;
999		o_inst->src2 = UNUSED_SRC_1;
1000	    break;
1001
1002	    case 2:
1003		o_inst->src0 = t_src(vp, &src[0]);
1004		o_inst->src1 = t_src(vp, &src[1]);
1005		o_inst->src2 = UNUSED_SRC_1;
1006	    break;
1007
1008	    case 3:
1009		o_inst->src0 = t_src(vp, &src[0]);
1010		o_inst->src1 = t_src(vp, &src[1]);
1011		o_inst->src2 = t_src(vp, &src[2]);
1012	    break;
1013
1014	    default:
1015		fprintf(stderr, "illegal number of operands %lu\n", operands);
1016		exit(-1);
1017	    break;
1018	 }
1019      }
1020      next:
1021
1022      if (dofogfix) {
1023	 o_inst++;
1024	 if (vp->fogmode == GL_EXP) {
1025	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1026		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1027		VSF_FLAG_X);
1028	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1029	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1030	    o_inst->src2 = UNUSED_SRC_1;
1031	    o_inst++;
1032	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1033		R200_VSF_OUT_CLASS_RESULT_FOGC,
1034		VSF_FLAG_X);
1035	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1036	    o_inst->src1 = UNUSED_SRC_0;
1037	    o_inst->src2 = UNUSED_SRC_1;
1038	 }
1039	 else if (vp->fogmode == GL_EXP2) {
1040	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1041		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1042		VSF_FLAG_X);
1043	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1044	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1045	    o_inst->src2 = UNUSED_SRC_1;
1046	    o_inst++;
1047	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1048		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1049		VSF_FLAG_X);
1050	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1051	    o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1052	    o_inst->src2 = UNUSED_SRC_1;
1053	    o_inst++;
1054	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1055		R200_VSF_OUT_CLASS_RESULT_FOGC,
1056		VSF_FLAG_X);
1057	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1058	    o_inst->src1 = UNUSED_SRC_0;
1059	    o_inst->src2 = UNUSED_SRC_1;
1060	 }
1061	 else { /* fogmode == GL_LINEAR */
1062		/* could do that with single op (dot) if using params like
1063		   with fixed function pipeline fog */
1064	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1065		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1066		VSF_FLAG_X);
1067	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1068	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1069	    o_inst->src2 = UNUSED_SRC_1;
1070	    o_inst++;
1071	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1072		R200_VSF_OUT_CLASS_RESULT_FOGC,
1073		VSF_FLAG_X);
1074	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1075	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1076	    o_inst->src2 = UNUSED_SRC_1;
1077
1078	 }
1079         dofogfix = 0;
1080      }
1081
1082      u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1083      if (mesa_vp->Base.NumNativeTemporaries <
1084	 (mesa_vp->Base.NumTemporaries + u_temp_used)) {
1085	 mesa_vp->Base.NumNativeTemporaries =
1086	    mesa_vp->Base.NumTemporaries + u_temp_used;
1087      }
1088      if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1089	 if (R200_DEBUG & RADEON_FALLBACKS) {
1090	    fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
1091	 }
1092	 return GL_FALSE;
1093      }
1094      u_temp_i = R200_VSF_MAX_TEMPS - 1;
1095      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1096	 mesa_vp->Base.NumNativeInstructions = 129;
1097	 if (R200_DEBUG & RADEON_FALLBACKS) {
1098	    fprintf(stderr, "more than 128 native instructions\n");
1099	 }
1100	 return GL_FALSE;
1101      }
1102      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1103	 vp->pos_end = (o_inst - vp->instr);
1104      }
1105   }
1106
1107   vp->native = GL_TRUE;
1108   mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1109#if 0
1110   fprintf(stderr, "hw program:\n");
1111   for(i=0; i < vp->program.length; i++)
1112      fprintf(stderr, "%08x\n", vp->instr[i]);
1113#endif
1114   return GL_TRUE;
1115}
1116
1117void r200SetupVertexProg( struct gl_context *ctx ) {
1118   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1119   struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1120   GLboolean fallback;
1121   GLint i;
1122
1123   if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1124      rmesa->curr_vp_hw = NULL;
1125      r200_translate_vertex_program(ctx, vp);
1126   }
1127   /* could optimize setting up vertex progs away for non-tcl hw */
1128   fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
1129   TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1130   if (rmesa->radeon.TclFallback) return;
1131
1132   R200_STATECHANGE( rmesa, vap );
1133   /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1134             maybe only when using more than 64 inst / 96 param? */
1135   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1136
1137   R200_STATECHANGE( rmesa, pvs );
1138
1139   rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1140      ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1141      (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1142   rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1143      (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1144
1145   /* maybe user clip planes just work with vertex progs... untested */
1146   if (ctx->Transform.ClipPlanesEnabled) {
1147      R200_STATECHANGE( rmesa, tcl );
1148      if (vp->mesa_program.IsPositionInvariant) {
1149	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1150      }
1151      else {
1152	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1153      }
1154   }
1155
1156   if (vp != rmesa->curr_vp_hw) {
1157      GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1158      drm_radeon_cmd_header_t tmp;
1159
1160      R200_STATECHANGE( rmesa, vpi[0] );
1161      R200_STATECHANGE( rmesa, vpi[1] );
1162
1163      /* FIXME: what about using a memcopy... */
1164      for (i = 0; (i < 64) && i < count; i++) {
1165	 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1166	 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1167	 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1168	 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1169      }
1170      /* hack up the cmd_size so not the whole state atom is emitted always.
1171         This may require some more thought, we may emit half progs on lost state, but
1172         hopefully it won't matter?
1173         WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1174         packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1175      rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1176      tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1177      tmp.veclinear.count = (count > 64) ? 64 : count;
1178      rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1179      if (count > 64) {
1180	 for (i = 0; i < (count - 64); i++) {
1181	    rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1182	    rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1183	    rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1184	    rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1185	 }
1186	 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1187	 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1188	 tmp.veclinear.count = count - 64;
1189	 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1190      }
1191      rmesa->curr_vp_hw = vp;
1192   }
1193}
1194
1195
1196static void
1197r200BindProgram(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1198{
1199   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1200
1201   switch(target){
1202   case GL_VERTEX_PROGRAM_ARB:
1203      rmesa->curr_vp_hw = NULL;
1204      break;
1205   default:
1206      _mesa_problem(ctx, "Target not supported yet!");
1207      break;
1208   }
1209}
1210
1211static struct gl_program *
1212r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id)
1213{
1214   struct r200_vertex_program *vp;
1215
1216   switch(target){
1217   case GL_VERTEX_PROGRAM_ARB:
1218      vp = CALLOC_STRUCT(r200_vertex_program);
1219      return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1220   case GL_FRAGMENT_PROGRAM_ARB:
1221   case GL_FRAGMENT_PROGRAM_NV:
1222      return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1223   default:
1224      _mesa_problem(ctx, "Bad target in r200NewProgram");
1225   }
1226   return NULL;
1227}
1228
1229
1230static void
1231r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
1232{
1233   _mesa_delete_program(ctx, prog);
1234}
1235
1236static GLboolean
1237r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1238{
1239   struct r200_vertex_program *vp = (void *)prog;
1240   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1241
1242   switch(target) {
1243   case GL_VERTEX_PROGRAM_ARB:
1244      vp->translated = GL_FALSE;
1245      vp->fogpidx = 0;
1246/*      memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1247      r200_translate_vertex_program(ctx, vp);
1248      rmesa->curr_vp_hw = NULL;
1249      break;
1250   case GL_FRAGMENT_SHADER_ATI:
1251      rmesa->afs_loaded = NULL;
1252      break;
1253   }
1254   /* need this for tcl fallbacks */
1255   (void) _tnl_program_string(ctx, target, prog);
1256
1257   /* XXX check if program is legal, within limits */
1258   return GL_TRUE;
1259}
1260
1261static GLboolean
1262r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1263{
1264   struct r200_vertex_program *vp = (void *)prog;
1265
1266   switch(target){
1267   case GL_VERTEX_STATE_PROGRAM_NV:
1268   case GL_VERTEX_PROGRAM_ARB:
1269      if (!vp->translated) {
1270	 r200_translate_vertex_program(ctx, vp);
1271      }
1272     /* does not take parameters etc. into account */
1273      return vp->native;
1274   default:
1275      _mesa_problem(ctx, "Bad target in r200NewProgram");
1276   }
1277   return 0;
1278}
1279
1280void r200InitShaderFuncs(struct dd_function_table *functions)
1281{
1282   functions->NewProgram = r200NewProgram;
1283   functions->BindProgram = r200BindProgram;
1284   functions->DeleteProgram = r200DeleteProgram;
1285   functions->ProgramStringNotify = r200ProgramStringNotify;
1286   functions->IsProgramNative = r200IsProgramNative;
1287}
1288