r200_vertprog.c revision fc606f7db9072d4f40081aea8f92f1d4489a5115
1/**************************************************************************
2
3Copyright (C) 2005 Aapo Tahkola.
4
5All Rights Reserved.
6
7Permission is hereby granted, free of charge, to any person obtaining a
8copy of this software and associated documentation files (the "Software"),
9to deal in the Software without restriction, including without limitation
10on the rights to use, copy, modify, merge, publish, distribute, sub
11license, and/or sell copies of the Software, and to permit persons to whom
12the Software is furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice (including the next
15paragraph) shall be included in all copies or substantial portions of the
16Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26**************************************************************************/
27
28/*
29 * Authors:
30 *   Aapo Tahkola <aet@rasterburn.org>
31 *   Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33#include "glheader.h"
34#include "macros.h"
35#include "enums.h"
36#include "program.h"
37
38#include "r200_context.h"
39#include "r200_vertprog.h"
40#include "r200_ioctl.h"
41#include "r200_tcl.h"
42#include "program_instruction.h"
43#include "tnl/tnl.h"
44
45#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
46    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
47    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
48    SWIZZLE_W != VSF_IN_COMPONENT_W || \
49    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
50    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
51    WRITEMASK_X != VSF_FLAG_X || \
52    WRITEMASK_Y != VSF_FLAG_Y || \
53    WRITEMASK_Z != VSF_FLAG_Z || \
54    WRITEMASK_W != VSF_FLAG_W
55#error Cannot change these!
56#endif
57
58#define SCALAR_FLAG (1<<31)
59#define FLAG_MASK (1<<31)
60#define OP_MASK (0xf)  /* we are unlikely to have more than 15 */
61#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
62
63static struct{
64   char *name;
65   int opcode;
66   unsigned long ip; /* number of input operands and flags */
67}op_names[]={
68   OPN(ABS, 1),
69   OPN(ADD, 2),
70   OPN(ARL, 1|SCALAR_FLAG),
71   OPN(DP3, 2),
72   OPN(DP4, 2),
73   OPN(DPH, 2),
74   OPN(DST, 2),
75   OPN(EX2, 1|SCALAR_FLAG),
76   OPN(EXP, 1|SCALAR_FLAG),
77   OPN(FLR, 1),
78   OPN(FRC, 1),
79   OPN(LG2, 1|SCALAR_FLAG),
80   OPN(LIT, 1),
81   OPN(LOG, 1|SCALAR_FLAG),
82   OPN(MAD, 3),
83   OPN(MAX, 2),
84   OPN(MIN, 2),
85   OPN(MOV, 1),
86   OPN(MUL, 2),
87   OPN(POW, 2|SCALAR_FLAG),
88   OPN(RCP, 1|SCALAR_FLAG),
89   OPN(RSQ, 1|SCALAR_FLAG),
90   OPN(SGE, 2),
91   OPN(SLT, 2),
92   OPN(SUB, 2),
93   OPN(SWZ, 1),
94   OPN(XPD, 2),
95   OPN(PRINT, 0),
96   OPN(END, 0),
97};
98#undef OPN
99
100static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
101{
102   r200ContextPtr rmesa = R200_CONTEXT( ctx );
103   GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
104   int pi;
105   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
106   struct gl_program_parameter_list *paramList;
107   drm_radeon_cmd_header_t tmp;
108
109   R200_STATECHANGE( rmesa, vpp[0] );
110   R200_STATECHANGE( rmesa, vpp[1] );
111   assert(mesa_vp->Base.Parameters);
112   _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
113   paramList = mesa_vp->Base.Parameters;
114
115   if(paramList->NumParameters > R200_VSF_MAX_PARAM){
116      fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
117      return GL_FALSE;
118   }
119
120   for(pi = 0; pi < paramList->NumParameters; pi++) {
121      switch(paramList->Parameters[pi].Type) {
122      case PROGRAM_STATE_VAR:
123      case PROGRAM_NAMED_PARAM:
124      //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
125      case PROGRAM_CONSTANT:
126	 *fcmd++ = paramList->ParameterValues[pi][0];
127	 *fcmd++ = paramList->ParameterValues[pi][1];
128	 *fcmd++ = paramList->ParameterValues[pi][2];
129	 *fcmd++ = paramList->ParameterValues[pi][3];
130	 break;
131      default:
132	 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
133	 break;
134      }
135      if (pi == 95) {
136	 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
137      }
138   }
139   /* hack up the cmd_size so not the whole state atom is emitted always. */
140   rmesa->hw.vpp[0].cmd_size =
141      1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
142   tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
143   tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
144   rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
145   if (paramList->NumParameters > 96) {
146      rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
147      tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
148      tmp.veclinear.count = paramList->NumParameters - 96;
149      rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
150   }
151   return GL_TRUE;
152}
153
154static __inline unsigned long t_dst_mask(GLuint mask)
155{
156   /* WRITEMASK_* is equivalent to VSF_FLAG_* */
157   return mask & VSF_FLAG_ALL;
158}
159
160static unsigned long t_dst(struct prog_dst_register *dst)
161{
162   switch(dst->File) {
163   case PROGRAM_TEMPORARY:
164      return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
165	 | R200_VSF_OUT_CLASS_TMP);
166   case PROGRAM_OUTPUT:
167      switch (dst->Index) {
168      case VERT_RESULT_HPOS:
169	 return R200_VSF_OUT_CLASS_RESULT_POS;
170      case VERT_RESULT_COL0:
171	 return R200_VSF_OUT_CLASS_RESULT_COLOR;
172      case VERT_RESULT_COL1:
173	 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
174	    | R200_VSF_OUT_CLASS_RESULT_COLOR);
175      case VERT_RESULT_FOGC:
176	 return R200_VSF_OUT_CLASS_RESULT_FOGC;
177      case VERT_RESULT_TEX0:
178      case VERT_RESULT_TEX1:
179      case VERT_RESULT_TEX2:
180      case VERT_RESULT_TEX3:
181      case VERT_RESULT_TEX4:
182      case VERT_RESULT_TEX5:
183	 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
184	    | R200_VSF_OUT_CLASS_RESULT_TEXC);
185      case VERT_RESULT_PSIZ:
186	 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
187      default:
188	 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
189	 exit(0);
190	 return 0;
191      }
192   case PROGRAM_ADDRESS:
193      assert (dst->Index == 0);
194      return R200_VSF_OUT_CLASS_ADDR;
195   default:
196      fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
197      exit(0);
198      return 0;
199   }
200}
201
202static unsigned long t_src_class(enum register_file file)
203{
204
205   switch(file){
206   case PROGRAM_TEMPORARY:
207      return VSF_IN_CLASS_TMP;
208
209   case PROGRAM_INPUT:
210      return VSF_IN_CLASS_ATTR;
211
212   case PROGRAM_LOCAL_PARAM:
213   case PROGRAM_ENV_PARAM:
214   case PROGRAM_NAMED_PARAM:
215   case PROGRAM_STATE_VAR:
216      return VSF_IN_CLASS_PARAM;
217   /*
218   case PROGRAM_OUTPUT:
219   case PROGRAM_WRITE_ONLY:
220   case PROGRAM_ADDRESS:
221   */
222   default:
223      fprintf(stderr, "problem in %s", __FUNCTION__);
224      exit(0);
225   }
226}
227
228static __inline unsigned long t_swizzle(GLubyte swizzle)
229{
230/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
231   return swizzle;
232}
233
234#if 0
235static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
236{
237   int i;
238
239   if(vp == NULL){
240      fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
241      return ;
242   }
243
244   fprintf(stderr, "%s:<", caller);
245   for(i=0; i < VERT_ATTRIB_MAX; i++)
246   fprintf(stderr, "%d ", vp->inputs[i]);
247   fprintf(stderr, ">\n");
248
249}
250#endif
251
252static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
253{
254/*
255   int i;
256   int max_reg = -1;
257*/
258   if(src->File == PROGRAM_INPUT){
259/*      if(vp->inputs[src->Index] != -1)
260	 return vp->inputs[src->Index];
261
262      for(i=0; i < VERT_ATTRIB_MAX; i++)
263	 if(vp->inputs[i] > max_reg)
264	    max_reg = vp->inputs[i];
265
266      vp->inputs[src->Index] = max_reg+1;*/
267
268      //vp_dump_inputs(vp, __FUNCTION__);
269      assert(vp->inputs[src->Index] != -1);
270      return vp->inputs[src->Index];
271   } else {
272      if (src->Index < 0) {
273	 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
274	 return 0;
275      }
276      return src->Index;
277   }
278}
279
280static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
281{
282
283   return MAKE_VSF_SOURCE(t_src_index(vp, src),
284			t_swizzle(GET_SWZ(src->Swizzle, 0)),
285			t_swizzle(GET_SWZ(src->Swizzle, 1)),
286			t_swizzle(GET_SWZ(src->Swizzle, 2)),
287			t_swizzle(GET_SWZ(src->Swizzle, 3)),
288			t_src_class(src->File),
289			src->NegateBase) | (src->RelAddr << 4);
290}
291
292static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
293{
294
295   return MAKE_VSF_SOURCE(t_src_index(vp, src),
296			t_swizzle(GET_SWZ(src->Swizzle, 0)),
297			t_swizzle(GET_SWZ(src->Swizzle, 0)),
298			t_swizzle(GET_SWZ(src->Swizzle, 0)),
299			t_swizzle(GET_SWZ(src->Swizzle, 0)),
300			t_src_class(src->File),
301			src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
302}
303
304static unsigned long t_opcode(enum prog_opcode opcode)
305{
306
307   switch(opcode){
308   case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
309   /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
310    * seems to ignore neg offsets which isn't quite correct...
311    */
312   case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
313   case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
314   case OPCODE_DST: return R200_VPI_OUT_OP_DST;
315   case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
316   case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
317   case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
318   case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
319   case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
320   case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
321   case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
322   case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
323   case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
324   case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
325   case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
326   case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
327   case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
328
329   default:
330      fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
331   }
332   exit(-1);
333   return 0;
334}
335
336static unsigned long op_operands(enum prog_opcode opcode)
337{
338   int i;
339
340   /* Can we trust mesas opcodes to be in order ? */
341   for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
342      if(op_names[i].opcode == opcode)
343	 return op_names[i].ip;
344
345   fprintf(stderr, "op %d not found in op_names\n", opcode);
346   exit(-1);
347   return 0;
348}
349
350/* TODO: Get rid of t_src_class call */
351#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
352		       ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
353			 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
354			(t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
355			 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
356
357/* fglrx on rv250 codes up unused sources as follows:
358   unused but necessary sources are same as previous source, zero-ed out.
359   unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
360   i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
361   set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
362
363/* use these simpler definitions. Must obviously not be used with not yet set up regs.
364   Those are NOT semantically equivalent to the r300 ones, requires code changes */
365#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
366				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
367				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
368				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
369				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
370
371#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
372				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
373				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
374				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
375				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
376
377#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
378				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
379				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
380				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
381				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
382
383#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
384
385#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
386
387#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
388
389
390/* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */
391#define PREFER_DP4
392
393
394/**
395 * Generate an R200 vertex program from Mesa's internal representation.
396 *
397 * \return  GL_TRUE for success, GL_FALSE for failure.
398 */
399static GLboolean r200_translate_vertex_program(struct r200_vertex_program *vp, GLenum fogmode)
400{
401   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
402   struct prog_instruction *vpi;
403   int i;
404   VERTEX_SHADER_INSTRUCTION *o_inst;
405   unsigned long operands;
406   int are_srcs_scalar;
407   unsigned long hw_op;
408   int dofogfix = 0;
409   int fog_temp_i = 0;
410
411   vp->native = GL_FALSE;
412   vp->translated = GL_TRUE;
413   vp->fogmode = fogmode;
414
415   if (mesa_vp->Base.NumInstructions == 0)
416      return GL_FALSE;
417
418   if ((mesa_vp->Base.InputsRead &
419      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
420      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
421      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
422      if (R200_DEBUG & DEBUG_FALLBACKS) {
423	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
424	    mesa_vp->Base.InputsRead);
425      }
426      return GL_FALSE;
427   }
428
429   if ((mesa_vp->Base.OutputsWritten &
430      ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) |
431      (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
432      (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
433      (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
434      if (R200_DEBUG & DEBUG_FALLBACKS) {
435	 fprintf(stderr, "can't handle vert prog outputs 0x%x\n",
436	    mesa_vp->Base.OutputsWritten);
437      }
438      return GL_FALSE;
439   }
440
441   if (mesa_vp->IsNVProgram) {
442   /* subtle differences in spec like guaranteed initialized regs could cause
443      headaches. Might want to remove the driconf option to enable it completely */
444      return GL_FALSE;
445   }
446   /* Initial value should be last tmp reg that hw supports.
447      Strangely enough r300 doesnt mind even though these would be out of range.
448      Smart enough to realize that it doesnt need it? */
449   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
450   struct prog_src_register src[3];
451   struct prog_dst_register dst;
452
453/* FIXME: is changing the prog safe to do here? */
454   if (mesa_vp->IsPositionInvariant &&
455      /* make sure we only do this once */
456       !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
457      struct gl_program_parameter_list *paramList;
458      GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX };
459
460#ifdef PREFER_DP4
461      tokens[5] = STATE_MATRIX;
462#else
463      tokens[5] = STATE_MATRIX_TRANSPOSE;
464#endif
465      paramList = mesa_vp->Base.Parameters;
466
467      vpi = malloc((mesa_vp->Base.NumInstructions + 4) * sizeof(struct prog_instruction));
468      memset(vpi, 0, 4 * sizeof(struct prog_instruction));
469
470      /* emit four dot product instructions to do MVP transformation */
471      for (i=0; i < 4; i++) {
472	 GLint idx;
473	 tokens[3] = tokens[4] = i;
474	 idx = _mesa_add_state_reference(paramList, tokens);
475#ifdef PREFER_DP4
476	 vpi[i].Opcode = OPCODE_DP4;
477	 vpi[i].StringPos = 0;
478	 vpi[i].Data = 0;
479
480	 vpi[i].DstReg.File = PROGRAM_OUTPUT;
481	 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
482	 vpi[i].DstReg.WriteMask = 1 << i;
483	 vpi[i].DstReg.CondMask = COND_TR;
484
485	 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
486	 vpi[i].SrcReg[0].Index = idx;
487	 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
488
489	 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
490	 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
491	 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
492#else
493	 if (i == 0)
494	    vpi[i].Opcode = OPCODE_MUL;
495	 else
496	    vpi[i].Opcode = OPCODE_MAD;
497
498	 vpi[i].StringPos = 0;
499	 vpi[i].Data = 0;
500
501	 if (i == 3)
502	    vpi[i].DstReg.File = PROGRAM_OUTPUT;
503	 else
504	    vpi[i].DstReg.File = PROGRAM_TEMPORARY;
505	 vpi[i].DstReg.Index = 0;
506	 vpi[i].DstReg.WriteMask = 0xf;
507	 vpi[i].DstReg.CondMask = COND_TR;
508
509	 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
510	 vpi[i].SrcReg[0].Index = idx;
511	 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
512
513	 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
514	 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
515	 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
516
517	 if (i > 0) {
518	    vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
519	    vpi[i].SrcReg[2].Index = 0;
520	    vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
521	 }
522#endif
523      }
524
525      /* now append original program after our new instructions */
526      memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
527
528      /* deallocate original program */
529      free(mesa_vp->Base.Instructions);
530
531      /* install new program */
532      mesa_vp->Base.Instructions = vpi;
533
534      mesa_vp->Base.NumInstructions += 4;
535      vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
536
537      assert(vpi->Opcode == OPCODE_END);
538
539      mesa_vp->Base.InputsRead |= (1 << VERT_ATTRIB_POS);
540      mesa_vp->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS);
541
542      //fprintf(stderr, "IsPositionInvariant is set!\n");
543      //_mesa_print_program(&mesa_vp->Base);
544   }
545
546   /* for fogc, can't change mesa_vp, as it would hose swtnl
547      maybe should just copy whole prog ? */
548   if (mesa_vp->Base.OutputsWritten & VERT_RESULT_FOGC && !vp->fogpidx) {
549      struct gl_program_parameter_list *paramList;
550      GLint tokens[6] = { STATE_FOG_PARAMS, 0, 0, 0, 0, 0 };
551      paramList = mesa_vp->Base.Parameters;
552      vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
553   }
554
555   vp->pos_end = 0;
556   mesa_vp->Base.NumNativeInstructions = 0;
557   if (mesa_vp->Base.Parameters)
558      mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
559   else
560      mesa_vp->Base.NumNativeParameters = 0;
561
562   for(i=0; i < VERT_ATTRIB_MAX; i++)
563      vp->inputs[i] = -1;
564/* fglrx uses fixed inputs as follows for conventional attribs.
565   generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
566   There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
567   attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
568   vertex normal/weight)
569   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
570   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
571   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
572   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
573   generic attribs would require some more work (dma regions, renaming). */
574
575/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
576   vp->inputs[VERT_ATTRIB_POS] = 0;
577   vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
578   vp->inputs[VERT_ATTRIB_NORMAL] = 1;
579   vp->inputs[VERT_ATTRIB_COLOR0] = 2;
580   vp->inputs[VERT_ATTRIB_COLOR1] = 3;
581   vp->inputs[VERT_ATTRIB_FOG] = 15;
582   vp->inputs[VERT_ATTRIB_TEX0] = 6;
583   vp->inputs[VERT_ATTRIB_TEX1] = 7;
584   vp->inputs[VERT_ATTRIB_TEX2] = 8;
585   vp->inputs[VERT_ATTRIB_TEX3] = 9;
586   vp->inputs[VERT_ATTRIB_TEX4] = 10;
587   vp->inputs[VERT_ATTRIB_TEX5] = 11;
588/* attr 4,5 and 13 are only used with generic attribs.
589   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
590   not possibe to use with vertex progs as it is lacking in vert prog specification) */
591
592   if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
593      if (R200_DEBUG & DEBUG_FALLBACKS) {
594	 fprintf(stderr, "can't handle vert prog without position output\n");
595      }
596      return GL_FALSE;
597   }
598
599   o_inst = vp->instr;
600   for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
601      operands = op_operands(vpi->Opcode);
602      are_srcs_scalar = operands & SCALAR_FLAG;
603      operands &= OP_MASK;
604
605      for(i = 0; i < operands; i++) {
606	 src[i] = vpi->SrcReg[i];
607	 /* hack up default attrib values as per spec as swizzling.
608	    normal, fog, secondary color. Crazy?
609	    May need more if we don't submit vec4 elements? */
610	 if (src[i].File == PROGRAM_INPUT) {
611	    if (src[i].Index == VERT_ATTRIB_NORMAL) {
612	       int j;
613	       for (j = 0; j < 4; j++) {
614		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
615		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
616		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
617		  }
618	       }
619	    }
620	    else if (src[i].Index == VERT_ATTRIB_COLOR1) {
621	       int j;
622	       for (j = 0; j < 4; j++) {
623		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
624		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
625		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
626		  }
627	       }
628	    }
629	    else if (src[i].Index == VERT_ATTRIB_FOG) {
630	       int j;
631	       for (j = 0; j < 4; j++) {
632		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
633		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
634		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
635		  }
636		  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
637			    GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
638		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
639		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
640		  }
641	       }
642	    }
643	 }
644      }
645
646      if(operands == 3){
647	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
648	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
649		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
650		VSF_FLAG_ALL);
651
652	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
653		  SWIZZLE_X, SWIZZLE_Y,
654		  SWIZZLE_Z, SWIZZLE_W,
655		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
656
657	    o_inst->src1 = ZERO_SRC_0;
658	    o_inst->src2 = UNUSED_SRC_1;
659	    o_inst++;
660
661	    src[2].File = PROGRAM_TEMPORARY;
662	    src[2].Index = u_temp_i;
663	    src[2].RelAddr = 0;
664	    u_temp_i--;
665	 }
666      }
667
668      if(operands >= 2){
669	 if( CMP_SRCS(src[1], src[0]) ){
670	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
671		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
672		VSF_FLAG_ALL);
673
674	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
675		  SWIZZLE_X, SWIZZLE_Y,
676		  SWIZZLE_Z, SWIZZLE_W,
677		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
678
679	    o_inst->src1 = ZERO_SRC_0;
680	    o_inst->src2 = UNUSED_SRC_1;
681	    o_inst++;
682
683	    src[0].File = PROGRAM_TEMPORARY;
684	    src[0].Index = u_temp_i;
685	    src[0].RelAddr = 0;
686	    u_temp_i--;
687	 }
688      }
689
690      dst = vpi->DstReg;
691      if (dst.File == PROGRAM_OUTPUT &&
692	  dst.Index == VERT_RESULT_FOGC &&
693	  dst.WriteMask & WRITEMASK_X) {
694	  fog_temp_i = u_temp_i;
695	  dst.File = PROGRAM_TEMPORARY;
696	  dst.Index = fog_temp_i;
697	  dofogfix = 1;
698	  u_temp_i--;
699      }
700
701      /* These ops need special handling. */
702      switch(vpi->Opcode){
703      case OPCODE_POW:
704/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
705   So may need to insert additional instruction */
706	 if ((src[0].File == src[1].File) &&
707	     (src[0].Index == src[1].Index)) {
708	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
709		   t_dst_mask(dst.WriteMask));
710	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
711		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
712		   SWIZZLE_ZERO,
713		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
714		   SWIZZLE_ZERO,
715		   t_src_class(src[0].File),
716		   src[0].NegateBase) | (src[0].RelAddr << 4);
717	    o_inst->src1 = UNUSED_SRC_0;
718	    o_inst->src2 = UNUSED_SRC_0;
719	 }
720	 else {
721	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
722		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
723		   VSF_FLAG_ALL);
724	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
725		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
726		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
727		   t_src_class(src[0].File),
728		   src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
729	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
730		   SWIZZLE_ZERO, SWIZZLE_ZERO,
731		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
732		   t_src_class(src[1].File),
733		   src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
734	    o_inst->src2 = UNUSED_SRC_1;
735	    o_inst++;
736
737	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
738		   t_dst_mask(dst.WriteMask));
739	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
740		   VSF_IN_COMPONENT_X,
741		   VSF_IN_COMPONENT_Y,
742		   VSF_IN_COMPONENT_Z,
743		   VSF_IN_COMPONENT_W,
744		   VSF_IN_CLASS_TMP,
745		   VSF_FLAG_NONE);
746	    o_inst->src1 = UNUSED_SRC_0;
747	    o_inst->src2 = UNUSED_SRC_0;
748	    u_temp_i--;
749	 }
750	 goto next;
751
752      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
753      case OPCODE_SWZ:
754	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
755		t_dst_mask(dst.WriteMask));
756	 o_inst->src0 = t_src(vp, &src[0]);
757	 o_inst->src1 = ZERO_SRC_0;
758	 o_inst->src2 = UNUSED_SRC_1;
759	 goto next;
760
761      case OPCODE_MAD:
762	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
763	    src[1].File == PROGRAM_TEMPORARY &&
764	    src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
765
766	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
767	    t_dst_mask(dst.WriteMask));
768	 o_inst->src0 = t_src(vp, &src[0]);
769#if 0
770if ((o_inst - vp->instr) == 31) {
771/* fix up the broken vertex program of quake4 demo... */
772o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
773			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
774			t_src_class(src[1].File),
775			src[1].NegateBase) | (src[1].RelAddr << 4);
776o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
777			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
778			t_src_class(src[1].File),
779			src[1].NegateBase) | (src[1].RelAddr << 4);
780}
781else {
782	 o_inst->src1 = t_src(vp, &src[1]);
783	 o_inst->src2 = t_src(vp, &src[2]);
784}
785#else
786	 o_inst->src1 = t_src(vp, &src[1]);
787	 o_inst->src2 = t_src(vp, &src[2]);
788#endif
789	 goto next;
790
791      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
792	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
793		t_dst_mask(dst.WriteMask));
794
795	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
796		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
797		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
798		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
799		SWIZZLE_ZERO,
800		t_src_class(src[0].File),
801		src[0].NegateBase) | (src[0].RelAddr << 4);
802
803	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
804		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
805		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
806		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
807		SWIZZLE_ZERO,
808		t_src_class(src[1].File),
809		src[1].NegateBase) | (src[1].RelAddr << 4);
810
811	 o_inst->src2 = UNUSED_SRC_1;
812	 goto next;
813
814      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
815	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
816		t_dst_mask(dst.WriteMask));
817
818	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
819		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
820		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
821		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
822		VSF_IN_COMPONENT_ONE,
823		t_src_class(src[0].File),
824		src[0].NegateBase) | (src[0].RelAddr << 4);
825	 o_inst->src1 = t_src(vp, &src[1]);
826	 o_inst->src2 = UNUSED_SRC_1;
827	 goto next;
828
829      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
830	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
831		t_dst_mask(dst.WriteMask));
832
833	 o_inst->src0 = t_src(vp, &src[0]);
834	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
835		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
836		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
837		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
838		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
839		t_src_class(src[1].File),
840		(!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
841	 o_inst->src2 = UNUSED_SRC_1;
842	 goto next;
843
844      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
845	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
846		t_dst_mask(dst.WriteMask));
847
848	 o_inst->src0=t_src(vp, &src[0]);
849	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
850		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
851		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
852		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
853		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
854		t_src_class(src[0].File),
855		(!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
856	 o_inst->src2 = UNUSED_SRC_1;
857	 goto next;
858
859      case OPCODE_FLR:
860      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
861         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
862
863	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
864	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
865	    t_dst_mask(dst.WriteMask));
866
867	 o_inst->src0 = t_src(vp, &src[0]);
868	 o_inst->src1 = UNUSED_SRC_0;
869	 o_inst->src2 = UNUSED_SRC_1;
870	 o_inst++;
871
872	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
873		t_dst_mask(dst.WriteMask));
874
875	 o_inst->src0 = t_src(vp, &src[0]);
876	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
877		VSF_IN_COMPONENT_X,
878		VSF_IN_COMPONENT_Y,
879		VSF_IN_COMPONENT_Z,
880		VSF_IN_COMPONENT_W,
881		VSF_IN_CLASS_TMP,
882		/* Not 100% sure about this */
883		(!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
884
885	 o_inst->src2 = UNUSED_SRC_0;
886	 u_temp_i--;
887	 goto next;
888
889      case OPCODE_XPD:
890	 /* mul r0, r1.yzxw, r2.zxyw
891	    mad r0, -r2.yzxw, r1.zxyw, r0
892	    NOTE: might need MAD_2
893	  */
894
895	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
896	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
897	    t_dst_mask(dst.WriteMask));
898
899	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
900		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
901		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
902		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
903		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
904		t_src_class(src[0].File),
905		src[0].NegateBase) | (src[0].RelAddr << 4);
906
907	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
908		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
909		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
910		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
911		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
912		t_src_class(src[1].File),
913		src[1].NegateBase) | (src[1].RelAddr << 4);
914
915	 o_inst->src2 = UNUSED_SRC_1;
916	 o_inst++;
917	 u_temp_i--;
918
919	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst),
920		t_dst_mask(dst.WriteMask));
921
922	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
923		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
924		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
925		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
926		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
927		t_src_class(src[1].File),
928		(!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
929
930	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
931		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
932		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
933		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
934		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
935		t_src_class(src[0].File),
936		src[0].NegateBase) | (src[0].RelAddr << 4);
937
938	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
939		VSF_IN_COMPONENT_X,
940		VSF_IN_COMPONENT_Y,
941		VSF_IN_COMPONENT_Z,
942		VSF_IN_COMPONENT_W,
943		VSF_IN_CLASS_TMP,
944		VSF_FLAG_NONE);
945	 goto next;
946
947      case OPCODE_END:
948	 assert(0);
949      default:
950	 break;
951      }
952
953      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
954	    t_dst_mask(dst.WriteMask));
955
956      if(are_srcs_scalar){
957	 switch(operands){
958	    case 1:
959		o_inst->src0 = t_src_scalar(vp, &src[0]);
960		o_inst->src1 = UNUSED_SRC_0;
961		o_inst->src2 = UNUSED_SRC_1;
962	    break;
963
964	    case 2:
965		o_inst->src0 = t_src_scalar(vp, &src[0]);
966		o_inst->src1 = t_src_scalar(vp, &src[1]);
967		o_inst->src2 = UNUSED_SRC_1;
968	    break;
969
970	    case 3:
971		o_inst->src0 = t_src_scalar(vp, &src[0]);
972		o_inst->src1 = t_src_scalar(vp, &src[1]);
973		o_inst->src2 = t_src_scalar(vp, &src[2]);
974	    break;
975
976	    default:
977		fprintf(stderr, "illegal number of operands %lu\n", operands);
978		exit(-1);
979	    break;
980	 }
981      } else {
982	 switch(operands){
983	    case 1:
984		o_inst->src0 = t_src(vp, &src[0]);
985		o_inst->src1 = UNUSED_SRC_0;
986		o_inst->src2 = UNUSED_SRC_1;
987	    break;
988
989	    case 2:
990		o_inst->src0 = t_src(vp, &src[0]);
991		o_inst->src1 = t_src(vp, &src[1]);
992		o_inst->src2 = UNUSED_SRC_1;
993	    break;
994
995	    case 3:
996		o_inst->src0 = t_src(vp, &src[0]);
997		o_inst->src1 = t_src(vp, &src[1]);
998		o_inst->src2 = t_src(vp, &src[2]);
999	    break;
1000
1001	    default:
1002		fprintf(stderr, "illegal number of operands %lu\n", operands);
1003		exit(-1);
1004	    break;
1005	 }
1006      }
1007      next:
1008
1009      if (dofogfix) {
1010	 o_inst++;
1011	 if (vp->fogmode == GL_EXP) {
1012	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1013		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1014		VSF_FLAG_X);
1015	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1016	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1017	    o_inst->src2 = UNUSED_SRC_1;
1018	    o_inst++;
1019	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_FOG,
1020		R200_VSF_OUT_CLASS_RESULT_FOGC,
1021		VSF_FLAG_X);
1022	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1023	    o_inst->src1 = UNUSED_SRC_0;
1024	    o_inst->src2 = UNUSED_SRC_1;
1025	 }
1026	 else if (vp->fogmode == GL_EXP2) {
1027	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1028		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1029		VSF_FLAG_X);
1030	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1031	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1032	    o_inst->src2 = UNUSED_SRC_1;
1033	    o_inst++;
1034	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1035		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1036		VSF_FLAG_X);
1037	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1038	    o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1039	    o_inst->src2 = UNUSED_SRC_1;
1040	    o_inst++;
1041	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_FOG,
1042		R200_VSF_OUT_CLASS_RESULT_FOGC,
1043		VSF_FLAG_X);
1044	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1045	    o_inst->src1 = UNUSED_SRC_0;
1046	    o_inst->src2 = UNUSED_SRC_1;
1047	 }
1048	 else { /* fogmode == GL_LINEAR */
1049		/* could do that with single op (dot) if using params like
1050		   with fixed function pipeline fog */
1051	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1052		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1053		VSF_FLAG_X);
1054	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1055	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1056	    o_inst->src2 = UNUSED_SRC_1;
1057	    o_inst++;
1058	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1059		R200_VSF_OUT_CLASS_RESULT_FOGC,
1060		VSF_FLAG_X);
1061	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1062	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1063	    o_inst->src2 = UNUSED_SRC_1;
1064
1065	 }
1066         dofogfix = 0;
1067      }
1068
1069      if (mesa_vp->Base.NumNativeTemporaries <
1070	 (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) {
1071	 mesa_vp->Base.NumNativeTemporaries =
1072	    mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i);
1073      }
1074      if (u_temp_i < mesa_vp->Base.NumTemporaries) {
1075	 if (R200_DEBUG & DEBUG_FALLBACKS) {
1076	    fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
1077	 }
1078	 return GL_FALSE;
1079      }
1080      u_temp_i = R200_VSF_MAX_TEMPS - 1;
1081      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1082	 mesa_vp->Base.NumNativeInstructions = 129;
1083	 if (R200_DEBUG & DEBUG_FALLBACKS) {
1084	    fprintf(stderr, "more than 128 native instructions\n");
1085	 }
1086	 return GL_FALSE;
1087      }
1088      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1089	 vp->pos_end = (o_inst - vp->instr);
1090      }
1091   }
1092
1093   vp->native = GL_TRUE;
1094   mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1095#if 0
1096   fprintf(stderr, "hw program:\n");
1097   for(i=0; i < vp->program.length; i++)
1098      fprintf(stderr, "%08x\n", vp->instr[i]);
1099#endif
1100   return GL_TRUE;
1101}
1102
1103void r200SetupVertexProg( GLcontext *ctx ) {
1104   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1105   struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1106   GLboolean fallback;
1107   GLint i;
1108
1109   if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1110      rmesa->curr_vp_hw = NULL;
1111      r200_translate_vertex_program(vp, ctx->Fog.Mode);
1112   }
1113   /* could optimize setting up vertex progs away for non-tcl hw */
1114   fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
1115      rmesa->r200Screen->drmSupportsVertexProgram);
1116   TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1117   if (rmesa->TclFallback) return;
1118
1119   R200_STATECHANGE( rmesa, vap );
1120   /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1121             maybe only when using more than 64 inst / 96 param? */
1122   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1123
1124   R200_STATECHANGE( rmesa, pvs );
1125
1126   rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1127      ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1128      (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1129   rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1130      (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1131
1132   /* maybe user clip planes just work with vertex progs... untested */
1133   if (ctx->Transform.ClipPlanesEnabled) {
1134      R200_STATECHANGE( rmesa, tcl );
1135      if (vp->mesa_program.IsPositionInvariant) {
1136	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1137      }
1138      else {
1139	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1140      }
1141   }
1142
1143   if (vp != rmesa->curr_vp_hw) {
1144      GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1145      drm_radeon_cmd_header_t tmp;
1146
1147      R200_STATECHANGE( rmesa, vpi[0] );
1148      R200_STATECHANGE( rmesa, vpi[1] );
1149
1150      /* FIXME: what about using a memcopy... */
1151      for (i = 0; (i < 64) && i < count; i++) {
1152	 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1153	 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1154	 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1155	 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1156      }
1157      /* hack up the cmd_size so not the whole state atom is emitted always.
1158         This may require some more thought, we may emit half progs on lost state, but
1159         hopefully it won't matter?
1160         WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1161         packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1162      rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1163      tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1164      tmp.veclinear.count = (count > 64) ? 64 : count;
1165      rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1166      if (count > 64) {
1167	 for (i = 0; i < (count - 64); i++) {
1168	    rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1169	    rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1170	    rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1171	    rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1172	 }
1173	 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1174	 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1175	 tmp.veclinear.count = count - 64;
1176	 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1177      }
1178      rmesa->curr_vp_hw = vp;
1179   }
1180}
1181
1182
1183static void
1184r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1185{
1186   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1187
1188   switch(target){
1189   case GL_VERTEX_PROGRAM_ARB:
1190      rmesa->curr_vp_hw = NULL;
1191      break;
1192   default:
1193      _mesa_problem(ctx, "Target not supported yet!");
1194      break;
1195   }
1196}
1197
1198static struct gl_program *
1199r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1200{
1201   struct r200_vertex_program *vp;
1202
1203   switch(target){
1204   case GL_VERTEX_PROGRAM_ARB:
1205      vp = CALLOC_STRUCT(r200_vertex_program);
1206      return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1207   case GL_FRAGMENT_PROGRAM_ARB:
1208   case GL_FRAGMENT_PROGRAM_NV:
1209      return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1210   default:
1211      _mesa_problem(ctx, "Bad target in r200NewProgram");
1212   }
1213   return NULL;
1214}
1215
1216
1217static void
1218r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1219{
1220   _mesa_delete_program(ctx, prog);
1221}
1222
1223static void
1224r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1225{
1226   struct r200_vertex_program *vp = (void *)prog;
1227   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1228
1229   switch(target) {
1230   case GL_VERTEX_PROGRAM_ARB:
1231      vp->translated = GL_FALSE;
1232      vp->fogpidx = 0;
1233/*      memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1234      r200_translate_vertex_program(vp, ctx->Fog.Mode);
1235      rmesa->curr_vp_hw = NULL;
1236      break;
1237   }
1238   /* need this for tcl fallbacks */
1239   _tnl_program_string(ctx, target, prog);
1240}
1241
1242static GLboolean
1243r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1244{
1245   struct r200_vertex_program *vp = (void *)prog;
1246
1247   switch(target){
1248   case GL_VERTEX_STATE_PROGRAM_NV:
1249   case GL_VERTEX_PROGRAM_ARB:
1250      if (!vp->translated) {
1251	 r200_translate_vertex_program(vp, ctx->Fog.Mode);
1252      }
1253     /* does not take parameters etc. into account */
1254      return vp->native;
1255   default:
1256      _mesa_problem(ctx, "Bad target in r200NewProgram");
1257   }
1258   return 0;
1259}
1260
1261void r200InitShaderFuncs(struct dd_function_table *functions)
1262{
1263   functions->NewProgram = r200NewProgram;
1264   functions->BindProgram = r200BindProgram;
1265   functions->DeleteProgram = r200DeleteProgram;
1266   functions->ProgramStringNotify = r200ProgramStringNotify;
1267   functions->IsProgramNative = r200IsProgramNative;
1268}
1269