1/**************************************************************************
2
3Copyright (C) 2005 Aapo Tahkola.
4
5All Rights Reserved.
6
7Permission is hereby granted, free of charge, to any person obtaining a
8copy of this software and associated documentation files (the "Software"),
9to deal in the Software without restriction, including without limitation
10on the rights to use, copy, modify, merge, publish, distribute, sub
11license, and/or sell copies of the Software, and to permit persons to whom
12the Software is furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice (including the next
15paragraph) shall be included in all copies or substantial portions of the
16Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26**************************************************************************/
27
28/*
29 * Authors:
30 *   Aapo Tahkola <aet@rasterburn.org>
31 *   Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36#include "program/program.h"
37#include "program/prog_instruction.h"
38#include "program/prog_parameter.h"
39#include "program/prog_statevars.h"
40#include "program/programopt.h"
41#include "tnl/tnl.h"
42
43#include "r200_context.h"
44#include "r200_vertprog.h"
45#include "r200_ioctl.h"
46#include "r200_tcl.h"
47
48#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51    SWIZZLE_W != VSF_IN_COMPONENT_W || \
52    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54    WRITEMASK_X != VSF_FLAG_X || \
55    WRITEMASK_Y != VSF_FLAG_Y || \
56    WRITEMASK_Z != VSF_FLAG_Z || \
57    WRITEMASK_W != VSF_FLAG_W
58#error Cannot change these!
59#endif
60
61#define SCALAR_FLAG (1<<31)
62#define FLAG_MASK (1<<31)
63#define OP_MASK (0xf)  /* we are unlikely to have more than 15 */
64#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66static struct{
67   char *name;
68   int opcode;
69   unsigned long ip; /* number of input operands and flags */
70}op_names[]={
71   OPN(ABS, 1),
72   OPN(ADD, 2),
73   OPN(ARL, 1|SCALAR_FLAG),
74   OPN(DP3, 2),
75   OPN(DP4, 2),
76   OPN(DPH, 2),
77   OPN(DST, 2),
78   OPN(EX2, 1|SCALAR_FLAG),
79   OPN(EXP, 1|SCALAR_FLAG),
80   OPN(FLR, 1),
81   OPN(FRC, 1),
82   OPN(LG2, 1|SCALAR_FLAG),
83   OPN(LIT, 1),
84   OPN(LOG, 1|SCALAR_FLAG),
85   OPN(MAD, 3),
86   OPN(MAX, 2),
87   OPN(MIN, 2),
88   OPN(MOV, 1),
89   OPN(MUL, 2),
90   OPN(POW, 2|SCALAR_FLAG),
91   OPN(RCP, 1|SCALAR_FLAG),
92   OPN(RSQ, 1|SCALAR_FLAG),
93   OPN(SGE, 2),
94   OPN(SLT, 2),
95   OPN(SUB, 2),
96   OPN(SWZ, 1),
97   OPN(XPD, 2),
98   OPN(END, 0),
99};
100#undef OPN
101
102static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
103{
104   r200ContextPtr rmesa = R200_CONTEXT( ctx );
105   GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
106   int pi;
107   struct gl_program *mesa_vp = &vp->mesa_program;
108   struct gl_program_parameter_list *paramList;
109   drm_radeon_cmd_header_t tmp;
110
111   R200_STATECHANGE( rmesa, vpp[0] );
112   R200_STATECHANGE( rmesa, vpp[1] );
113   assert(mesa_vp->Parameters);
114   _mesa_load_state_parameters(ctx, mesa_vp->Parameters);
115   paramList = mesa_vp->Parameters;
116
117   if(paramList->NumParameters > R200_VSF_MAX_PARAM){
118      fprintf(stderr, "%s:Params exhausted\n", __func__);
119      return GL_FALSE;
120   }
121
122   for(pi = 0; pi < paramList->NumParameters; pi++) {
123      switch(paramList->Parameters[pi].Type) {
124      case PROGRAM_STATE_VAR:
125      //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
126      case PROGRAM_CONSTANT:
127	 *fcmd++ = paramList->ParameterValues[pi][0].f;
128	 *fcmd++ = paramList->ParameterValues[pi][1].f;
129	 *fcmd++ = paramList->ParameterValues[pi][2].f;
130	 *fcmd++ = paramList->ParameterValues[pi][3].f;
131	 break;
132      default:
133	 _mesa_problem(NULL, "Bad param type in %s", __func__);
134	 break;
135      }
136      if (pi == 95) {
137	 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
138      }
139   }
140   /* hack up the cmd_size so not the whole state atom is emitted always. */
141   rmesa->hw.vpp[0].cmd_size =
142      1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
143   tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
144   tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
145   rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
146   if (paramList->NumParameters > 96) {
147      rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
148      tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
149      tmp.veclinear.count = paramList->NumParameters - 96;
150      rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
151   }
152   return GL_TRUE;
153}
154
155static inline unsigned long t_dst_mask(GLuint mask)
156{
157   /* WRITEMASK_* is equivalent to VSF_FLAG_* */
158   return mask & VSF_FLAG_ALL;
159}
160
161static unsigned long t_dst(struct prog_dst_register *dst)
162{
163   switch(dst->File) {
164   case PROGRAM_TEMPORARY:
165      return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
166	 | R200_VSF_OUT_CLASS_TMP);
167   case PROGRAM_OUTPUT:
168      switch (dst->Index) {
169      case VARYING_SLOT_POS:
170	 return R200_VSF_OUT_CLASS_RESULT_POS;
171      case VARYING_SLOT_COL0:
172	 return R200_VSF_OUT_CLASS_RESULT_COLOR;
173      case VARYING_SLOT_COL1:
174	 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
175	    | R200_VSF_OUT_CLASS_RESULT_COLOR);
176      case VARYING_SLOT_FOGC:
177	 return R200_VSF_OUT_CLASS_RESULT_FOGC;
178      case VARYING_SLOT_TEX0:
179      case VARYING_SLOT_TEX1:
180      case VARYING_SLOT_TEX2:
181      case VARYING_SLOT_TEX3:
182      case VARYING_SLOT_TEX4:
183      case VARYING_SLOT_TEX5:
184	 return (((dst->Index - VARYING_SLOT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
185	    | R200_VSF_OUT_CLASS_RESULT_TEXC);
186      case VARYING_SLOT_PSIZ:
187	 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
188      default:
189	 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __func__, dst->Index);
190	 exit(0);
191	 return 0;
192      }
193   case PROGRAM_ADDRESS:
194      assert (dst->Index == 0);
195      return R200_VSF_OUT_CLASS_ADDR;
196   default:
197      fprintf(stderr, "problem in %s, unknown register type %d\n", __func__, dst->File);
198      exit(0);
199      return 0;
200   }
201}
202
203static unsigned long t_src_class(gl_register_file file)
204{
205
206   switch(file){
207   case PROGRAM_TEMPORARY:
208      return VSF_IN_CLASS_TMP;
209
210   case PROGRAM_INPUT:
211      return VSF_IN_CLASS_ATTR;
212
213   case PROGRAM_CONSTANT:
214   case PROGRAM_STATE_VAR:
215      return VSF_IN_CLASS_PARAM;
216   /*
217   case PROGRAM_OUTPUT:
218   case PROGRAM_ADDRESS:
219   */
220   default:
221      fprintf(stderr, "problem in %s", __func__);
222      exit(0);
223   }
224}
225
226static inline unsigned long t_swizzle(GLubyte swizzle)
227{
228/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
229   return swizzle;
230}
231
232#if 0
233static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
234{
235   int i;
236
237   if(vp == NULL){
238      fprintf(stderr, "vp null in call to %s from %s\n", __func__, caller);
239      return ;
240   }
241
242   fprintf(stderr, "%s:<", caller);
243   for(i=0; i < VERT_ATTRIB_MAX; i++)
244   fprintf(stderr, "%d ", vp->inputs[i]);
245   fprintf(stderr, ">\n");
246
247}
248#endif
249
250static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
251{
252/*
253   int i;
254   int max_reg = -1;
255*/
256   if(src->File == PROGRAM_INPUT){
257/*      if(vp->inputs[src->Index] != -1)
258	 return vp->inputs[src->Index];
259
260      for(i=0; i < VERT_ATTRIB_MAX; i++)
261	 if(vp->inputs[i] > max_reg)
262	    max_reg = vp->inputs[i];
263
264      vp->inputs[src->Index] = max_reg+1;*/
265
266      //vp_dump_inputs(vp, __func__);
267      assert(vp->inputs[src->Index] != -1);
268      return vp->inputs[src->Index];
269   } else {
270      if (src->Index < 0) {
271	 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
272	 return 0;
273      }
274      return src->Index;
275   }
276}
277
278static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
279{
280
281   return MAKE_VSF_SOURCE(t_src_index(vp, src),
282			t_swizzle(GET_SWZ(src->Swizzle, 0)),
283			t_swizzle(GET_SWZ(src->Swizzle, 1)),
284			t_swizzle(GET_SWZ(src->Swizzle, 2)),
285			t_swizzle(GET_SWZ(src->Swizzle, 3)),
286			t_src_class(src->File),
287			src->Negate) | (src->RelAddr << 4);
288}
289
290static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
291{
292
293   return MAKE_VSF_SOURCE(t_src_index(vp, src),
294			t_swizzle(GET_SWZ(src->Swizzle, 0)),
295			t_swizzle(GET_SWZ(src->Swizzle, 0)),
296			t_swizzle(GET_SWZ(src->Swizzle, 0)),
297			t_swizzle(GET_SWZ(src->Swizzle, 0)),
298			t_src_class(src->File),
299			src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
300}
301
302static unsigned long t_opcode(enum prog_opcode opcode)
303{
304
305   switch(opcode){
306   case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
307   /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
308    * seems to ignore neg offsets which isn't quite correct...
309    */
310   case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
311   case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
312   case OPCODE_DST: return R200_VPI_OUT_OP_DST;
313   case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
314   case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
315   case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
316   case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
317   case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
318   case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
319   case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
320   case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
321   case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
322   case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
323   case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
324   case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
325   case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
326
327   default:
328      fprintf(stderr, "%s: Should not be called with opcode %d!", __func__, opcode);
329   }
330   exit(-1);
331   return 0;
332}
333
334static unsigned long op_operands(enum prog_opcode opcode)
335{
336   int i;
337
338   /* Can we trust mesas opcodes to be in order ? */
339   for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
340      if(op_names[i].opcode == opcode)
341	 return op_names[i].ip;
342
343   fprintf(stderr, "op %d not found in op_names\n", opcode);
344   exit(-1);
345   return 0;
346}
347
348/* TODO: Get rid of t_src_class call */
349#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
350		       ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
351			 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
352			(t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
353			 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
354
355/* fglrx on rv250 codes up unused sources as follows:
356   unused but necessary sources are same as previous source, zero-ed out.
357   unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
358   i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
359   set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
360
361/* use these simpler definitions. Must obviously not be used with not yet set up regs.
362   Those are NOT semantically equivalent to the r300 ones, requires code changes */
363#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
364				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
365				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
366				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
367				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
368
369#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
370				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
371				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
372				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
373				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
374
375#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
376				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
377				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
378				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
379				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
380
381#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
382
383#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
384
385#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
386
387
388/**
389 * Generate an R200 vertex program from Mesa's internal representation.
390 *
391 * \return  GL_TRUE for success, GL_FALSE for failure.
392 */
393static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
394{
395   struct gl_program *mesa_vp = &vp->mesa_program;
396   struct prog_instruction *vpi;
397   int i;
398   VERTEX_SHADER_INSTRUCTION *o_inst;
399   unsigned long operands;
400   int are_srcs_scalar;
401   unsigned long hw_op;
402   int dofogfix = 0;
403   int fog_temp_i = 0;
404   int free_inputs;
405   int array_count = 0;
406   int u_temp_used;
407
408   vp->native = GL_FALSE;
409   vp->translated = GL_TRUE;
410   vp->fogmode = ctx->Fog.Mode;
411
412   if (mesa_vp->arb.NumInstructions == 0)
413      return GL_FALSE;
414
415#if 0
416   if ((mesa_vp->info.inputs_read &
417      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
418      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
419      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
420      if (R200_DEBUG & RADEON_FALLBACKS) {
421	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
422	    mesa_vp->info.inputs_read);
423      }
424      return GL_FALSE;
425   }
426#endif
427
428   if ((mesa_vp->info.outputs_written &
429      ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
430      (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
431      (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
432      (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
433      if (R200_DEBUG & RADEON_FALLBACKS) {
434	 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
435                 (unsigned long long) mesa_vp->info.outputs_written);
436      }
437      return GL_FALSE;
438   }
439
440   /* Initial value should be last tmp reg that hw supports.
441      Strangely enough r300 doesnt mind even though these would be out of range.
442      Smart enough to realize that it doesnt need it? */
443   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
444   struct prog_src_register src[3];
445   struct prog_dst_register dst;
446
447/* FIXME: is changing the prog safe to do here? */
448   if (mesa_vp->arb.IsPositionInvariant &&
449      /* make sure we only do this once */
450       !(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
451	 _mesa_insert_mvp_code(ctx, mesa_vp);
452      }
453
454   /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
455      base e isn't directly available neither. */
456   if ((mesa_vp->info.outputs_written & (1 << VARYING_SLOT_FOGC)) &&
457       !vp->fogpidx) {
458      struct gl_program_parameter_list *paramList;
459      gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
460      paramList = mesa_vp->Parameters;
461      vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
462   }
463
464   vp->pos_end = 0;
465   mesa_vp->arb.NumNativeInstructions = 0;
466   if (mesa_vp->Parameters)
467      mesa_vp->arb.NumNativeParameters = mesa_vp->Parameters->NumParameters;
468   else
469      mesa_vp->arb.NumNativeParameters = 0;
470
471   for(i = 0; i < VERT_ATTRIB_MAX; i++)
472      vp->inputs[i] = -1;
473   for(i = 0; i < 15; i++)
474      vp->inputmap_rev[i] = 255;
475   free_inputs = 0x2ffd;
476
477/* fglrx uses fixed inputs as follows for conventional attribs.
478   generic attribs use non-fixed assignment, fglrx will always use the
479   lowest attrib values available. We'll just do the same.
480   There are 12 generic attribs possible, corresponding to attrib 0, 2-11
481   and 13 in a hw vertex prog.
482   attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
483   (correspond to vertex normal/weight - maybe weight actually could be made vec4).
484   Additionally, not more than 12 arrays in total are possible I think.
485   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
486   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
487   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
488   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
489*/
490
491/* attr 4,5 and 13 are only used with generic attribs.
492   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
493   not possibe to use with vertex progs as it is lacking in vert prog specification) */
494/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
495   if (mesa_vp->info.inputs_read & VERT_BIT_POS) {
496      vp->inputs[VERT_ATTRIB_POS] = 0;
497      vp->inputmap_rev[0] = VERT_ATTRIB_POS;
498      free_inputs &= ~(1 << 0);
499      array_count++;
500   }
501   if (mesa_vp->info.inputs_read & VERT_BIT_WEIGHT) {
502      vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
503      vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
504      array_count++;
505   }
506   if (mesa_vp->info.inputs_read & VERT_BIT_NORMAL) {
507      vp->inputs[VERT_ATTRIB_NORMAL] = 1;
508      vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
509      array_count++;
510   }
511   if (mesa_vp->info.inputs_read & VERT_BIT_COLOR0) {
512      vp->inputs[VERT_ATTRIB_COLOR0] = 2;
513      vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
514      free_inputs &= ~(1 << 2);
515      array_count++;
516   }
517   if (mesa_vp->info.inputs_read & VERT_BIT_COLOR1) {
518      vp->inputs[VERT_ATTRIB_COLOR1] = 3;
519      vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
520      free_inputs &= ~(1 << 3);
521      array_count++;
522   }
523   if (mesa_vp->info.inputs_read & VERT_BIT_FOG) {
524      vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
525      vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
526      array_count++;
527   }
528   /* VERT_ATTRIB_TEX0-5 */
529   for (i = 0; i <= 5; i++) {
530      if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
531	 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
532	 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
533	 free_inputs &= ~(1 << (i + 6));
534	 array_count++;
535      }
536   }
537   /* using VERT_ATTRIB_TEX6/7 would be illegal */
538   for (; i < VERT_ATTRIB_TEX_MAX; i++) {
539      if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
540          if (R200_DEBUG & RADEON_FALLBACKS) {
541              fprintf(stderr, "texture attribute %d in vert prog\n", i);
542          }
543          return GL_FALSE;
544      }
545   }
546   /* completely ignore aliasing? */
547   for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
548      int j;
549   /* completely ignore aliasing? */
550      if (mesa_vp->info.inputs_read & VERT_BIT_GENERIC(i)) {
551	 array_count++;
552	 if (array_count > 12) {
553	    if (R200_DEBUG & RADEON_FALLBACKS) {
554	       fprintf(stderr, "more than 12 attribs used in vert prog\n");
555	    }
556	    return GL_FALSE;
557	 }
558	 for (j = 0; j < 14; j++) {
559	    /* will always find one due to limited array_count */
560	    if (free_inputs & (1 << j)) {
561	       free_inputs &= ~(1 << j);
562	       vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
563	       if (j == 0) {
564                  /* mapped to pos */
565                  vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
566	       } else if (j < 12) {
567                  /* mapped to col/tex */
568                  vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
569	       } else {
570                  /* mapped to pos1 */
571                  vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
572               }
573	       break;
574	    }
575	 }
576      }
577   }
578
579   if (!(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
580      if (R200_DEBUG & RADEON_FALLBACKS) {
581	 fprintf(stderr, "can't handle vert prog without position output\n");
582      }
583      return GL_FALSE;
584   }
585   if (free_inputs & 1) {
586      if (R200_DEBUG & RADEON_FALLBACKS) {
587	 fprintf(stderr, "can't handle vert prog without position input\n");
588      }
589      return GL_FALSE;
590   }
591
592   o_inst = vp->instr;
593   for (vpi = mesa_vp->arb.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
594      operands = op_operands(vpi->Opcode);
595      are_srcs_scalar = operands & SCALAR_FLAG;
596      operands &= OP_MASK;
597
598      for(i = 0; i < operands; i++) {
599	 src[i] = vpi->SrcReg[i];
600	 /* hack up default attrib values as per spec as swizzling.
601	    normal, fog, secondary color. Crazy?
602	    May need more if we don't submit vec4 elements? */
603	 if (src[i].File == PROGRAM_INPUT) {
604	    if (src[i].Index == VERT_ATTRIB_NORMAL) {
605	       int j;
606	       for (j = 0; j < 4; j++) {
607		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
608		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
609		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
610		  }
611	       }
612	    }
613	    else if (src[i].Index == VERT_ATTRIB_COLOR1) {
614	       int j;
615	       for (j = 0; j < 4; j++) {
616		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
617		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
618		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
619		  }
620	       }
621	    }
622	    else if (src[i].Index == VERT_ATTRIB_FOG) {
623	       int j;
624	       for (j = 0; j < 4; j++) {
625		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
626		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
627		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
628		  }
629		  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
630			    GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
631		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
632		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
633		  }
634	       }
635	    }
636	 }
637      }
638
639      if(operands == 3){
640	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
641	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
642		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
643		VSF_FLAG_ALL);
644
645	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
646		  SWIZZLE_X, SWIZZLE_Y,
647		  SWIZZLE_Z, SWIZZLE_W,
648		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
649
650	    o_inst->src1 = ZERO_SRC_0;
651	    o_inst->src2 = UNUSED_SRC_1;
652	    o_inst++;
653
654	    src[2].File = PROGRAM_TEMPORARY;
655	    src[2].Index = u_temp_i;
656	    src[2].RelAddr = 0;
657	    u_temp_i--;
658	 }
659      }
660
661      if(operands >= 2){
662	 if( CMP_SRCS(src[1], src[0]) ){
663	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
664		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
665		VSF_FLAG_ALL);
666
667	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
668		  SWIZZLE_X, SWIZZLE_Y,
669		  SWIZZLE_Z, SWIZZLE_W,
670		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
671
672	    o_inst->src1 = ZERO_SRC_0;
673	    o_inst->src2 = UNUSED_SRC_1;
674	    o_inst++;
675
676	    src[0].File = PROGRAM_TEMPORARY;
677	    src[0].Index = u_temp_i;
678	    src[0].RelAddr = 0;
679	    u_temp_i--;
680	 }
681      }
682
683      dst = vpi->DstReg;
684      if (dst.File == PROGRAM_OUTPUT &&
685	  dst.Index == VARYING_SLOT_FOGC &&
686	  dst.WriteMask & WRITEMASK_X) {
687	  fog_temp_i = u_temp_i;
688	  dst.File = PROGRAM_TEMPORARY;
689	  dst.Index = fog_temp_i;
690	  dofogfix = 1;
691	  u_temp_i--;
692      }
693
694      /* These ops need special handling. */
695      switch(vpi->Opcode){
696      case OPCODE_POW:
697/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
698   So may need to insert additional instruction */
699	 if ((src[0].File == src[1].File) &&
700	     (src[0].Index == src[1].Index)) {
701	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
702		   t_dst_mask(dst.WriteMask));
703	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
704		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
705		   SWIZZLE_ZERO,
706		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
707		   SWIZZLE_ZERO,
708		   t_src_class(src[0].File),
709		   src[0].Negate) | (src[0].RelAddr << 4);
710	    o_inst->src1 = UNUSED_SRC_0;
711	    o_inst->src2 = UNUSED_SRC_0;
712	 }
713	 else {
714	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
715		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
716		   VSF_FLAG_ALL);
717	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
718		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
719		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
720		   t_src_class(src[0].File),
721		   src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
722	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
723		   SWIZZLE_ZERO, SWIZZLE_ZERO,
724		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
725		   t_src_class(src[1].File),
726		   src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
727	    o_inst->src2 = UNUSED_SRC_1;
728	    o_inst++;
729
730	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
731		   t_dst_mask(dst.WriteMask));
732	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
733		   VSF_IN_COMPONENT_X,
734		   VSF_IN_COMPONENT_Y,
735		   VSF_IN_COMPONENT_Z,
736		   VSF_IN_COMPONENT_W,
737		   VSF_IN_CLASS_TMP,
738		   VSF_FLAG_NONE);
739	    o_inst->src1 = UNUSED_SRC_0;
740	    o_inst->src2 = UNUSED_SRC_0;
741	    u_temp_i--;
742	 }
743	 goto next;
744
745      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
746      case OPCODE_SWZ:
747	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
748		t_dst_mask(dst.WriteMask));
749	 o_inst->src0 = t_src(vp, &src[0]);
750	 o_inst->src1 = ZERO_SRC_0;
751	 o_inst->src2 = UNUSED_SRC_1;
752	 goto next;
753
754      case OPCODE_MAD:
755	 /* only 2 read ports into temp memory thus may need the macro op MAD_2
756	    instead (requiring 2 clocks) if all inputs are in temp memory
757	    (and, only if they actually reference 3 distinct temps) */
758	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
759	    src[1].File == PROGRAM_TEMPORARY &&
760	    src[2].File == PROGRAM_TEMPORARY &&
761	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
762	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
763	    (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
764	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
765
766	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
767	    t_dst_mask(dst.WriteMask));
768	 o_inst->src0 = t_src(vp, &src[0]);
769#if 0
770if ((o_inst - vp->instr) == 31) {
771/* fix up the broken vertex program of quake4 demo... */
772o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
773			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
774			t_src_class(src[1].File),
775			src[1].Negate) | (src[1].RelAddr << 4);
776o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
777			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
778			t_src_class(src[1].File),
779			src[1].Negate) | (src[1].RelAddr << 4);
780}
781else {
782	 o_inst->src1 = t_src(vp, &src[1]);
783	 o_inst->src2 = t_src(vp, &src[2]);
784}
785#else
786	 o_inst->src1 = t_src(vp, &src[1]);
787	 o_inst->src2 = t_src(vp, &src[2]);
788#endif
789	 goto next;
790
791      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
792	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
793		t_dst_mask(dst.WriteMask));
794
795	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
796		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
797		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
798		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
799		SWIZZLE_ZERO,
800		t_src_class(src[0].File),
801		src[0].Negate) | (src[0].RelAddr << 4);
802
803	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
804		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
805		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
806		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
807		SWIZZLE_ZERO,
808		t_src_class(src[1].File),
809		src[1].Negate) | (src[1].RelAddr << 4);
810
811	 o_inst->src2 = UNUSED_SRC_1;
812	 goto next;
813
814      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
815	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
816		t_dst_mask(dst.WriteMask));
817
818	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
819		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
820		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
821		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
822		VSF_IN_COMPONENT_ONE,
823		t_src_class(src[0].File),
824		src[0].Negate) | (src[0].RelAddr << 4);
825	 o_inst->src1 = t_src(vp, &src[1]);
826	 o_inst->src2 = UNUSED_SRC_1;
827	 goto next;
828
829      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
830	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
831		t_dst_mask(dst.WriteMask));
832
833	 o_inst->src0 = t_src(vp, &src[0]);
834	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
835		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
836		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
837		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
838		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
839		t_src_class(src[1].File),
840		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
841	 o_inst->src2 = UNUSED_SRC_1;
842	 goto next;
843
844      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
845	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
846		t_dst_mask(dst.WriteMask));
847
848	 o_inst->src0=t_src(vp, &src[0]);
849	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
850		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
851		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
852		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
853		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
854		t_src_class(src[0].File),
855		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
856	 o_inst->src2 = UNUSED_SRC_1;
857	 goto next;
858
859      case OPCODE_FLR:
860      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
861         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
862
863	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
864	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
865	    t_dst_mask(dst.WriteMask));
866
867	 o_inst->src0 = t_src(vp, &src[0]);
868	 o_inst->src1 = UNUSED_SRC_0;
869	 o_inst->src2 = UNUSED_SRC_1;
870	 o_inst++;
871
872	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
873		t_dst_mask(dst.WriteMask));
874
875	 o_inst->src0 = t_src(vp, &src[0]);
876	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
877		VSF_IN_COMPONENT_X,
878		VSF_IN_COMPONENT_Y,
879		VSF_IN_COMPONENT_Z,
880		VSF_IN_COMPONENT_W,
881		VSF_IN_CLASS_TMP,
882		/* Not 100% sure about this */
883		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
884
885	 o_inst->src2 = UNUSED_SRC_0;
886	 u_temp_i--;
887	 goto next;
888
889      case OPCODE_XPD:
890	 /* mul r0, r1.yzxw, r2.zxyw
891	    mad r0, -r2.yzxw, r1.zxyw, r0
892	  */
893	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
894	    src[1].File == PROGRAM_TEMPORARY &&
895	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
896	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
897
898	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
899	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
900	    t_dst_mask(dst.WriteMask));
901
902	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
903		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
904		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
905		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
906		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
907		t_src_class(src[0].File),
908		src[0].Negate) | (src[0].RelAddr << 4);
909
910	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
911		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
912		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
913		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
914		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
915		t_src_class(src[1].File),
916		src[1].Negate) | (src[1].RelAddr << 4);
917
918	 o_inst->src2 = UNUSED_SRC_1;
919	 o_inst++;
920	 u_temp_i--;
921
922	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
923		t_dst_mask(dst.WriteMask));
924
925	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
926		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
927		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
928		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
929		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
930		t_src_class(src[1].File),
931		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
932
933	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
934		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
935		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
936		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
937		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
938		t_src_class(src[0].File),
939		src[0].Negate) | (src[0].RelAddr << 4);
940
941	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
942		VSF_IN_COMPONENT_X,
943		VSF_IN_COMPONENT_Y,
944		VSF_IN_COMPONENT_Z,
945		VSF_IN_COMPONENT_W,
946		VSF_IN_CLASS_TMP,
947		VSF_FLAG_NONE);
948	 goto next;
949
950      case OPCODE_END:
951	 assert(0);
952      default:
953	 break;
954      }
955
956      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
957	    t_dst_mask(dst.WriteMask));
958
959      if(are_srcs_scalar){
960	 switch(operands){
961	    case 1:
962		o_inst->src0 = t_src_scalar(vp, &src[0]);
963		o_inst->src1 = UNUSED_SRC_0;
964		o_inst->src2 = UNUSED_SRC_1;
965	    break;
966
967	    case 2:
968		o_inst->src0 = t_src_scalar(vp, &src[0]);
969		o_inst->src1 = t_src_scalar(vp, &src[1]);
970		o_inst->src2 = UNUSED_SRC_1;
971	    break;
972
973	    case 3:
974		o_inst->src0 = t_src_scalar(vp, &src[0]);
975		o_inst->src1 = t_src_scalar(vp, &src[1]);
976		o_inst->src2 = t_src_scalar(vp, &src[2]);
977	    break;
978
979	    default:
980		fprintf(stderr, "illegal number of operands %lu\n", operands);
981		exit(-1);
982	    break;
983	 }
984      } else {
985	 switch(operands){
986	    case 1:
987		o_inst->src0 = t_src(vp, &src[0]);
988		o_inst->src1 = UNUSED_SRC_0;
989		o_inst->src2 = UNUSED_SRC_1;
990	    break;
991
992	    case 2:
993		o_inst->src0 = t_src(vp, &src[0]);
994		o_inst->src1 = t_src(vp, &src[1]);
995		o_inst->src2 = UNUSED_SRC_1;
996	    break;
997
998	    case 3:
999		o_inst->src0 = t_src(vp, &src[0]);
1000		o_inst->src1 = t_src(vp, &src[1]);
1001		o_inst->src2 = t_src(vp, &src[2]);
1002	    break;
1003
1004	    default:
1005		fprintf(stderr, "illegal number of operands %lu\n", operands);
1006		exit(-1);
1007	    break;
1008	 }
1009      }
1010      next:
1011
1012      if (dofogfix) {
1013	 o_inst++;
1014	 if (vp->fogmode == GL_EXP) {
1015	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1016		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1017		VSF_FLAG_X);
1018	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1019	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1020	    o_inst->src2 = UNUSED_SRC_1;
1021	    o_inst++;
1022	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1023		R200_VSF_OUT_CLASS_RESULT_FOGC,
1024		VSF_FLAG_X);
1025	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1026	    o_inst->src1 = UNUSED_SRC_0;
1027	    o_inst->src2 = UNUSED_SRC_1;
1028	 }
1029	 else if (vp->fogmode == GL_EXP2) {
1030	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1031		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1032		VSF_FLAG_X);
1033	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1034	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1035	    o_inst->src2 = UNUSED_SRC_1;
1036	    o_inst++;
1037	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1038		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1039		VSF_FLAG_X);
1040	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1041	    o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1042	    o_inst->src2 = UNUSED_SRC_1;
1043	    o_inst++;
1044	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1045		R200_VSF_OUT_CLASS_RESULT_FOGC,
1046		VSF_FLAG_X);
1047	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1048	    o_inst->src1 = UNUSED_SRC_0;
1049	    o_inst->src2 = UNUSED_SRC_1;
1050	 }
1051	 else { /* fogmode == GL_LINEAR */
1052		/* could do that with single op (dot) if using params like
1053		   with fixed function pipeline fog */
1054	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1055		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1056		VSF_FLAG_X);
1057	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1058	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1059	    o_inst->src2 = UNUSED_SRC_1;
1060	    o_inst++;
1061	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1062		R200_VSF_OUT_CLASS_RESULT_FOGC,
1063		VSF_FLAG_X);
1064	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1065	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1066	    o_inst->src2 = UNUSED_SRC_1;
1067
1068	 }
1069         dofogfix = 0;
1070      }
1071
1072      u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1073      if (mesa_vp->arb.NumNativeTemporaries <
1074          (mesa_vp->arb.NumTemporaries + u_temp_used)) {
1075         mesa_vp->arb.NumNativeTemporaries =
1076            mesa_vp->arb.NumTemporaries + u_temp_used;
1077      }
1078      if ((mesa_vp->arb.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1079	 if (R200_DEBUG & RADEON_FALLBACKS) {
1080            fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->arb.NumTemporaries, u_temp_used);
1081	 }
1082	 return GL_FALSE;
1083      }
1084      u_temp_i = R200_VSF_MAX_TEMPS - 1;
1085      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1086         mesa_vp->arb.NumNativeInstructions = 129;
1087	 if (R200_DEBUG & RADEON_FALLBACKS) {
1088	    fprintf(stderr, "more than 128 native instructions\n");
1089	 }
1090	 return GL_FALSE;
1091      }
1092      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1093	 vp->pos_end = (o_inst - vp->instr);
1094      }
1095   }
1096
1097   vp->native = GL_TRUE;
1098   mesa_vp->arb.NumNativeInstructions = (o_inst - vp->instr);
1099#if 0
1100   fprintf(stderr, "hw program:\n");
1101   for(i=0; i < vp->program.length; i++)
1102      fprintf(stderr, "%08x\n", vp->instr[i]);
1103#endif
1104   return GL_TRUE;
1105}
1106
1107void r200SetupVertexProg( struct gl_context *ctx ) {
1108   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1109   struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1110   GLboolean fallback;
1111   GLint i;
1112
1113   if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1114      rmesa->curr_vp_hw = NULL;
1115      r200_translate_vertex_program(ctx, vp);
1116   }
1117   /* could optimize setting up vertex progs away for non-tcl hw */
1118   fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
1119   TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1120   if (rmesa->radeon.TclFallback) return;
1121
1122   R200_STATECHANGE( rmesa, vap );
1123   /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1124             maybe only when using more than 64 inst / 96 param? */
1125   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1126
1127   R200_STATECHANGE( rmesa, pvs );
1128
1129   rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1130      ((vp->mesa_program.arb.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1131      (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1132   rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1133      (vp->mesa_program.arb.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1134
1135   /* maybe user clip planes just work with vertex progs... untested */
1136   if (ctx->Transform.ClipPlanesEnabled) {
1137      R200_STATECHANGE( rmesa, tcl );
1138      if (vp->mesa_program.arb.IsPositionInvariant) {
1139	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1140      }
1141      else {
1142	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1143      }
1144   }
1145
1146   if (vp != rmesa->curr_vp_hw) {
1147      GLuint count = vp->mesa_program.arb.NumNativeInstructions;
1148      drm_radeon_cmd_header_t tmp;
1149
1150      R200_STATECHANGE( rmesa, vpi[0] );
1151      R200_STATECHANGE( rmesa, vpi[1] );
1152
1153      /* FIXME: what about using a memcopy... */
1154      for (i = 0; (i < 64) && i < count; i++) {
1155	 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1156	 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1157	 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1158	 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1159      }
1160      /* hack up the cmd_size so not the whole state atom is emitted always.
1161         This may require some more thought, we may emit half progs on lost state, but
1162         hopefully it won't matter?
1163         WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1164         packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1165      rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1166      tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1167      tmp.veclinear.count = (count > 64) ? 64 : count;
1168      rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1169      if (count > 64) {
1170	 for (i = 0; i < (count - 64); i++) {
1171	    rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1172	    rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1173	    rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1174	    rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1175	 }
1176	 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1177	 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1178	 tmp.veclinear.count = count - 64;
1179	 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1180      }
1181      rmesa->curr_vp_hw = vp;
1182   }
1183}
1184
1185
1186static void
1187r200BindProgram(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1188{
1189   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1190
1191   switch(target){
1192   case GL_VERTEX_PROGRAM_ARB:
1193      rmesa->curr_vp_hw = NULL;
1194      break;
1195   default:
1196      _mesa_problem(ctx, "Target not supported yet!");
1197      break;
1198   }
1199}
1200
1201static struct gl_program *
1202r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id,
1203               bool is_arb_asm)
1204{
1205   switch(target){
1206   case GL_VERTEX_PROGRAM_ARB: {
1207      struct r200_vertex_program *vp = rzalloc(NULL,
1208                                               struct r200_vertex_program);
1209      return _mesa_init_gl_program(&vp->mesa_program, target, id, is_arb_asm);
1210   }
1211   case GL_FRAGMENT_PROGRAM_ARB: {
1212      struct gl_program *prog = rzalloc(NULL, struct gl_program);
1213      return _mesa_init_gl_program(prog, target, id, is_arb_asm);
1214   }
1215   default:
1216      _mesa_problem(ctx, "Bad target in r200NewProgram");
1217      return NULL;
1218   }
1219}
1220
1221
1222static void
1223r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
1224{
1225   _mesa_delete_program(ctx, prog);
1226}
1227
1228static GLboolean
1229r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1230{
1231   struct r200_vertex_program *vp = (void *)prog;
1232   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1233
1234   switch(target) {
1235   case GL_VERTEX_PROGRAM_ARB:
1236      vp->translated = GL_FALSE;
1237      vp->fogpidx = 0;
1238/*      memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_program));*/
1239      r200_translate_vertex_program(ctx, vp);
1240      rmesa->curr_vp_hw = NULL;
1241      break;
1242   case GL_FRAGMENT_SHADER_ATI:
1243      rmesa->afs_loaded = NULL;
1244      break;
1245   }
1246   /* need this for tcl fallbacks */
1247   (void) _tnl_program_string(ctx, target, prog);
1248
1249   /* XXX check if program is legal, within limits */
1250   return GL_TRUE;
1251}
1252
1253static GLboolean
1254r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1255{
1256   struct r200_vertex_program *vp = (void *)prog;
1257
1258   switch(target){
1259   case GL_VERTEX_PROGRAM_ARB:
1260      if (!vp->translated) {
1261	 r200_translate_vertex_program(ctx, vp);
1262      }
1263     /* does not take parameters etc. into account */
1264      return vp->native;
1265   default:
1266      _mesa_problem(ctx, "Bad target in r200NewProgram");
1267   }
1268   return 0;
1269}
1270
1271void r200InitShaderFuncs(struct dd_function_table *functions)
1272{
1273   functions->NewProgram = r200NewProgram;
1274   functions->BindProgram = r200BindProgram;
1275   functions->DeleteProgram = r200DeleteProgram;
1276   functions->ProgramStringNotify = r200ProgramStringNotify;
1277   functions->IsProgramNative = r200IsProgramNative;
1278}
1279