st_mesa_to_tgsi.c revision d531f9c2f5c78468d913fc509b223760ac1c1124
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * \author
30 * Michal Krol,
31 * Keith Whitwell
32 */
33
34#include "pipe/p_compiler.h"
35#include "pipe/p_context.h"
36#include "pipe/p_screen.h"
37#include "pipe/p_shader_tokens.h"
38#include "pipe/p_state.h"
39#include "tgsi/tgsi_ureg.h"
40#include "st_mesa_to_tgsi.h"
41#include "st_context.h"
42#include "program/prog_instruction.h"
43#include "program/prog_parameter.h"
44#include "util/u_debug.h"
45#include "util/u_math.h"
46#include "util/u_memory.h"
47
48
49#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
50                           (1 << PROGRAM_ENV_PARAM) |    \
51                           (1 << PROGRAM_STATE_VAR) |    \
52                           (1 << PROGRAM_NAMED_PARAM) |  \
53                           (1 << PROGRAM_CONSTANT) |     \
54                           (1 << PROGRAM_UNIFORM))
55
56
57struct label {
58   unsigned branch_target;
59   unsigned token;
60};
61
62
63/**
64 * Intermediate state used during shader translation.
65 */
66struct st_translate {
67   struct ureg_program *ureg;
68
69   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
70   struct ureg_src *constants;
71   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
72   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
73   struct ureg_dst address[1];
74   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
75
76   /* Extra info for handling point size clamping in vertex shader */
77   struct ureg_dst pointSizeResult; /**< Actual point size output register */
78   struct ureg_src pointSizeConst;  /**< Point size range constant register */
79   GLint pointSizeOutIndex;         /**< Temp point size output register */
80   GLboolean prevInstWrotePointSize;
81
82   const GLuint *inputMapping;
83   const GLuint *outputMapping;
84
85   /* For every instruction that contains a label (eg CALL), keep
86    * details so that we can go back afterwards and emit the correct
87    * tgsi instruction number for each label.
88    */
89   struct label *labels;
90   unsigned labels_size;
91   unsigned labels_count;
92
93   /* Keep a record of the tgsi instruction number that each mesa
94    * instruction starts at, will be used to fix up labels after
95    * translation.
96    */
97   unsigned *insn;
98   unsigned insn_size;
99   unsigned insn_count;
100
101   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
102
103   boolean error;
104};
105
106
107/**
108 * Make note of a branch to a label in the TGSI code.
109 * After we've emitted all instructions, we'll go over the list
110 * of labels built here and patch the TGSI code with the actual
111 * location of each label.
112 */
113static unsigned *get_label( struct st_translate *t,
114                            unsigned branch_target )
115{
116   unsigned i;
117
118   if (t->labels_count + 1 >= t->labels_size) {
119      unsigned old_size = t->labels_size;
120      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
121      t->labels = REALLOC( t->labels,
122                           old_size * sizeof t->labels[0],
123                           t->labels_size * sizeof t->labels[0] );
124      if (t->labels == NULL) {
125         static unsigned dummy;
126         t->error = TRUE;
127         return &dummy;
128      }
129   }
130
131   i = t->labels_count++;
132   t->labels[i].branch_target = branch_target;
133   return &t->labels[i].token;
134}
135
136
137/**
138 * Called prior to emitting the TGSI code for each Mesa instruction.
139 * Allocate additional space for instructions if needed.
140 * Update the insn[] array so the next Mesa instruction points to
141 * the next TGSI instruction.
142 */
143static void set_insn_start( struct st_translate *t,
144                            unsigned start )
145{
146   if (t->insn_count + 1 >= t->insn_size) {
147      unsigned old_size = t->insn_size;
148      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
149      t->insn = REALLOC( t->insn,
150                         old_size * sizeof t->insn[0],
151                         t->insn_size * sizeof t->insn[0] );
152      if (t->insn == NULL) {
153         t->error = TRUE;
154         return;
155      }
156   }
157
158   t->insn[t->insn_count++] = start;
159}
160
161
162/**
163 * Map a Mesa dst register to a TGSI ureg_dst register.
164 */
165static struct ureg_dst
166dst_register( struct st_translate *t,
167              gl_register_file file,
168              GLuint index )
169{
170   switch( file ) {
171   case PROGRAM_UNDEFINED:
172      return ureg_dst_undef();
173
174   case PROGRAM_TEMPORARY:
175      if (ureg_dst_is_undef(t->temps[index]))
176         t->temps[index] = ureg_DECL_temporary( t->ureg );
177
178      return t->temps[index];
179
180   case PROGRAM_OUTPUT:
181      if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
182         t->prevInstWrotePointSize = GL_TRUE;
183
184      if (t->procType == TGSI_PROCESSOR_VERTEX)
185         assert(index < VERT_RESULT_MAX);
186      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
187         assert(index < FRAG_RESULT_MAX);
188      else
189         assert(index < GEOM_RESULT_MAX);
190
191      assert(t->outputMapping[index] < Elements(t->outputs));
192
193      return t->outputs[t->outputMapping[index]];
194
195   case PROGRAM_ADDRESS:
196      return t->address[index];
197
198   default:
199      debug_assert( 0 );
200      return ureg_dst_undef();
201   }
202}
203
204
205/**
206 * Map a Mesa src register to a TGSI ureg_src register.
207 */
208static struct ureg_src
209src_register( struct st_translate *t,
210              gl_register_file file,
211              GLint index )
212{
213   switch( file ) {
214   case PROGRAM_UNDEFINED:
215      return ureg_src_undef();
216
217   case PROGRAM_TEMPORARY:
218      assert(index >= 0);
219      if (ureg_dst_is_undef(t->temps[index]))
220         t->temps[index] = ureg_DECL_temporary( t->ureg );
221      assert(index < Elements(t->temps));
222      return ureg_src(t->temps[index]);
223
224   case PROGRAM_NAMED_PARAM:
225   case PROGRAM_ENV_PARAM:
226   case PROGRAM_LOCAL_PARAM:
227   case PROGRAM_UNIFORM:
228      assert(index >= 0);
229      return t->constants[index];
230   case PROGRAM_STATE_VAR:
231   case PROGRAM_CONSTANT:       /* ie, immediate */
232      if (index < 0)
233         return ureg_DECL_constant( t->ureg, 0 );
234      else
235         return t->constants[index];
236
237   case PROGRAM_INPUT:
238      assert(t->inputMapping[index] < Elements(t->inputs));
239      return t->inputs[t->inputMapping[index]];
240
241   case PROGRAM_OUTPUT:
242      assert(t->outputMapping[index] < Elements(t->outputs));
243      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
244
245   case PROGRAM_ADDRESS:
246      return ureg_src(t->address[index]);
247
248   default:
249      debug_assert( 0 );
250      return ureg_src_undef();
251   }
252}
253
254
255/**
256 * Map mesa texture target to TGSI texture target.
257 */
258static unsigned
259translate_texture_target( GLuint textarget,
260                          GLboolean shadow )
261{
262   if (shadow) {
263      switch( textarget ) {
264      case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_SHADOW1D;
265      case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_SHADOW2D;
266      case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT;
267      default: break;
268      }
269   }
270
271   switch( textarget ) {
272   case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
273   case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
274   case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;
275   case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
276   case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
277   default:
278      debug_assert( 0 );
279      return TGSI_TEXTURE_1D;
280   }
281}
282
283
284/**
285 * Create a TGSI ureg_dst register from a Mesa dest register.
286 */
287static struct ureg_dst
288translate_dst( struct st_translate *t,
289               const struct prog_dst_register *DstReg,
290               boolean saturate )
291{
292   struct ureg_dst dst = dst_register( t,
293                                       DstReg->File,
294                                       DstReg->Index );
295
296   dst = ureg_writemask( dst,
297                         DstReg->WriteMask );
298
299   if (saturate)
300      dst = ureg_saturate( dst );
301
302   if (DstReg->RelAddr)
303      dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
304
305   return dst;
306}
307
308
309/**
310 * Create a TGSI ureg_src register from a Mesa src register.
311 */
312static struct ureg_src
313translate_src( struct st_translate *t,
314               const struct prog_src_register *SrcReg )
315{
316   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
317
318   if (t->procType == TGSI_PROCESSOR_GEOMETRY && SrcReg->HasIndex2) {
319      src = src_register( t, SrcReg->File, SrcReg->Index2 );
320      if (SrcReg->RelAddr2)
321         src = ureg_src_dimension_indirect( src, ureg_src(t->address[0]),
322                                            SrcReg->Index);
323      else
324         src = ureg_src_dimension( src, SrcReg->Index);
325   }
326
327   src = ureg_swizzle( src,
328                       GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3,
329                       GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3,
330                       GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3,
331                       GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3);
332
333   if (SrcReg->Negate == NEGATE_XYZW)
334      src = ureg_negate(src);
335
336   if (SrcReg->Abs)
337      src = ureg_abs(src);
338
339   if (SrcReg->RelAddr) {
340      src = ureg_src_indirect( src, ureg_src(t->address[0]));
341      if (SrcReg->File != PROGRAM_INPUT &&
342          SrcReg->File != PROGRAM_OUTPUT) {
343         /* If SrcReg->Index was negative, it was set to zero in
344          * src_register().  Reassign it now.  But don't do this
345          * for input/output regs since they get remapped while
346          * const buffers don't.
347          */
348         src.Index = SrcReg->Index;
349      }
350   }
351
352   return src;
353}
354
355
356static struct ureg_src swizzle_4v( struct ureg_src src,
357                                   const unsigned *swz )
358{
359   return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] );
360}
361
362
363/**
364 * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
365 *
366 *   SWZ dst, src.x-y10
367 *
368 * becomes:
369 *
370 *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
371 */
372static void emit_swz( struct st_translate *t,
373                      struct ureg_dst dst,
374                      const struct prog_src_register *SrcReg )
375{
376   struct ureg_program *ureg = t->ureg;
377   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
378
379   unsigned negate_mask =  SrcReg->Negate;
380
381   unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
382                        (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
383                        (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
384                        (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
385
386   unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
387                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
388                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
389                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
390
391   unsigned negative_one_mask = one_mask & negate_mask;
392   unsigned positive_one_mask = one_mask & ~negate_mask;
393
394   struct ureg_src imm;
395   unsigned i;
396   unsigned mul_swizzle[4] = {0,0,0,0};
397   unsigned add_swizzle[4] = {0,0,0,0};
398   unsigned src_swizzle[4] = {0,0,0,0};
399   boolean need_add = FALSE;
400   boolean need_mul = FALSE;
401
402   if (dst.WriteMask == 0)
403      return;
404
405   /* Is this just a MOV?
406    */
407   if (zero_mask == 0 &&
408       one_mask == 0 &&
409       (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW))
410   {
411      ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
412      return;
413   }
414
415#define IMM_ZERO    0
416#define IMM_ONE     1
417#define IMM_NEG_ONE 2
418
419   imm = ureg_imm3f( ureg, 0, 1, -1 );
420
421   for (i = 0; i < 4; i++) {
422      unsigned bit = 1 << i;
423
424      if (dst.WriteMask & bit) {
425         if (positive_one_mask & bit) {
426            mul_swizzle[i] = IMM_ZERO;
427            add_swizzle[i] = IMM_ONE;
428            need_add = TRUE;
429         }
430         else if (negative_one_mask & bit) {
431            mul_swizzle[i] = IMM_ZERO;
432            add_swizzle[i] = IMM_NEG_ONE;
433            need_add = TRUE;
434         }
435         else if (zero_mask & bit) {
436            mul_swizzle[i] = IMM_ZERO;
437            add_swizzle[i] = IMM_ZERO;
438            need_add = TRUE;
439         }
440         else {
441            add_swizzle[i] = IMM_ZERO;
442            src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
443            need_mul = TRUE;
444            if (negate_mask & bit) {
445               mul_swizzle[i] = IMM_NEG_ONE;
446            }
447            else {
448               mul_swizzle[i] = IMM_ONE;
449            }
450         }
451      }
452   }
453
454   if (need_mul && need_add) {
455      ureg_MAD( ureg,
456                dst,
457                swizzle_4v( src, src_swizzle ),
458                swizzle_4v( imm, mul_swizzle ),
459                swizzle_4v( imm, add_swizzle ) );
460   }
461   else if (need_mul) {
462      ureg_MUL( ureg,
463                dst,
464                swizzle_4v( src, src_swizzle ),
465                swizzle_4v( imm, mul_swizzle ) );
466   }
467   else if (need_add) {
468      ureg_MOV( ureg,
469                dst,
470                swizzle_4v( imm, add_swizzle ) );
471   }
472   else {
473      debug_assert(0);
474   }
475
476#undef IMM_ZERO
477#undef IMM_ONE
478#undef IMM_NEG_ONE
479}
480
481
482/**
483 * Negate the value of DDY to match GL semantics where (0,0) is the
484 * lower-left corner of the window.
485 * Note that the GL_ARB_fragment_coord_conventions extension will
486 * effect this someday.
487 */
488static void emit_ddy( struct st_translate *t,
489                      struct ureg_dst dst,
490                      const struct prog_src_register *SrcReg )
491{
492   struct ureg_program *ureg = t->ureg;
493   struct ureg_src src = translate_src( t, SrcReg );
494   src = ureg_negate( src );
495   ureg_DDY( ureg, dst, src );
496}
497
498
499
500static unsigned
501translate_opcode( unsigned op )
502{
503   switch( op ) {
504   case OPCODE_ARL:
505      return TGSI_OPCODE_ARL;
506   case OPCODE_ABS:
507      return TGSI_OPCODE_ABS;
508   case OPCODE_ADD:
509      return TGSI_OPCODE_ADD;
510   case OPCODE_BGNLOOP:
511      return TGSI_OPCODE_BGNLOOP;
512   case OPCODE_BGNSUB:
513      return TGSI_OPCODE_BGNSUB;
514   case OPCODE_BRA:
515      return TGSI_OPCODE_BRA;
516   case OPCODE_BRK:
517      return TGSI_OPCODE_BRK;
518   case OPCODE_CAL:
519      return TGSI_OPCODE_CAL;
520   case OPCODE_CMP:
521      return TGSI_OPCODE_CMP;
522   case OPCODE_CONT:
523      return TGSI_OPCODE_CONT;
524   case OPCODE_COS:
525      return TGSI_OPCODE_COS;
526   case OPCODE_DDX:
527      return TGSI_OPCODE_DDX;
528   case OPCODE_DDY:
529      return TGSI_OPCODE_DDY;
530   case OPCODE_DP2:
531      return TGSI_OPCODE_DP2;
532   case OPCODE_DP2A:
533      return TGSI_OPCODE_DP2A;
534   case OPCODE_DP3:
535      return TGSI_OPCODE_DP3;
536   case OPCODE_DP4:
537      return TGSI_OPCODE_DP4;
538   case OPCODE_DPH:
539      return TGSI_OPCODE_DPH;
540   case OPCODE_DST:
541      return TGSI_OPCODE_DST;
542   case OPCODE_ELSE:
543      return TGSI_OPCODE_ELSE;
544   case OPCODE_EMIT_VERTEX:
545      return TGSI_OPCODE_EMIT;
546   case OPCODE_END_PRIMITIVE:
547      return TGSI_OPCODE_ENDPRIM;
548   case OPCODE_ENDIF:
549      return TGSI_OPCODE_ENDIF;
550   case OPCODE_ENDLOOP:
551      return TGSI_OPCODE_ENDLOOP;
552   case OPCODE_ENDSUB:
553      return TGSI_OPCODE_ENDSUB;
554   case OPCODE_EX2:
555      return TGSI_OPCODE_EX2;
556   case OPCODE_EXP:
557      return TGSI_OPCODE_EXP;
558   case OPCODE_FLR:
559      return TGSI_OPCODE_FLR;
560   case OPCODE_FRC:
561      return TGSI_OPCODE_FRC;
562   case OPCODE_IF:
563      return TGSI_OPCODE_IF;
564   case OPCODE_TRUNC:
565      return TGSI_OPCODE_TRUNC;
566   case OPCODE_KIL:
567      return TGSI_OPCODE_KIL;
568   case OPCODE_KIL_NV:
569      return TGSI_OPCODE_KILP;
570   case OPCODE_LG2:
571      return TGSI_OPCODE_LG2;
572   case OPCODE_LOG:
573      return TGSI_OPCODE_LOG;
574   case OPCODE_LIT:
575      return TGSI_OPCODE_LIT;
576   case OPCODE_LRP:
577      return TGSI_OPCODE_LRP;
578   case OPCODE_MAD:
579      return TGSI_OPCODE_MAD;
580   case OPCODE_MAX:
581      return TGSI_OPCODE_MAX;
582   case OPCODE_MIN:
583      return TGSI_OPCODE_MIN;
584   case OPCODE_MOV:
585      return TGSI_OPCODE_MOV;
586   case OPCODE_MUL:
587      return TGSI_OPCODE_MUL;
588   case OPCODE_NOP:
589      return TGSI_OPCODE_NOP;
590   case OPCODE_NRM3:
591      return TGSI_OPCODE_NRM;
592   case OPCODE_NRM4:
593      return TGSI_OPCODE_NRM4;
594   case OPCODE_POW:
595      return TGSI_OPCODE_POW;
596   case OPCODE_RCP:
597      return TGSI_OPCODE_RCP;
598   case OPCODE_RET:
599      return TGSI_OPCODE_RET;
600   case OPCODE_RSQ:
601      return TGSI_OPCODE_RSQ;
602   case OPCODE_SCS:
603      return TGSI_OPCODE_SCS;
604   case OPCODE_SEQ:
605      return TGSI_OPCODE_SEQ;
606   case OPCODE_SGE:
607      return TGSI_OPCODE_SGE;
608   case OPCODE_SGT:
609      return TGSI_OPCODE_SGT;
610   case OPCODE_SIN:
611      return TGSI_OPCODE_SIN;
612   case OPCODE_SLE:
613      return TGSI_OPCODE_SLE;
614   case OPCODE_SLT:
615      return TGSI_OPCODE_SLT;
616   case OPCODE_SNE:
617      return TGSI_OPCODE_SNE;
618   case OPCODE_SSG:
619      return TGSI_OPCODE_SSG;
620   case OPCODE_SUB:
621      return TGSI_OPCODE_SUB;
622   case OPCODE_TEX:
623      return TGSI_OPCODE_TEX;
624   case OPCODE_TXB:
625      return TGSI_OPCODE_TXB;
626   case OPCODE_TXD:
627      return TGSI_OPCODE_TXD;
628   case OPCODE_TXL:
629      return TGSI_OPCODE_TXL;
630   case OPCODE_TXP:
631      return TGSI_OPCODE_TXP;
632   case OPCODE_XPD:
633      return TGSI_OPCODE_XPD;
634   case OPCODE_END:
635      return TGSI_OPCODE_END;
636   default:
637      debug_assert( 0 );
638      return TGSI_OPCODE_NOP;
639   }
640}
641
642
643static void
644compile_instruction(
645   struct st_translate *t,
646   const struct prog_instruction *inst )
647{
648   struct ureg_program *ureg = t->ureg;
649   GLuint i;
650   struct ureg_dst dst[1];
651   struct ureg_src src[4];
652   unsigned num_dst;
653   unsigned num_src;
654
655   num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
656   num_src = _mesa_num_inst_src_regs( inst->Opcode );
657
658   if (num_dst)
659      dst[0] = translate_dst( t,
660                              &inst->DstReg,
661                              inst->SaturateMode );
662
663   for (i = 0; i < num_src; i++)
664      src[i] = translate_src( t, &inst->SrcReg[i] );
665
666   switch( inst->Opcode ) {
667   case OPCODE_SWZ:
668      emit_swz( t, dst[0], &inst->SrcReg[0] );
669      return;
670
671   case OPCODE_BGNLOOP:
672   case OPCODE_CAL:
673   case OPCODE_ELSE:
674   case OPCODE_ENDLOOP:
675   case OPCODE_IF:
676      debug_assert(num_dst == 0);
677      ureg_label_insn( ureg,
678                       translate_opcode( inst->Opcode ),
679                       src, num_src,
680                       get_label( t, inst->BranchTarget ));
681      return;
682
683   case OPCODE_TEX:
684   case OPCODE_TXB:
685   case OPCODE_TXD:
686   case OPCODE_TXL:
687   case OPCODE_TXP:
688      src[num_src++] = t->samplers[inst->TexSrcUnit];
689      ureg_tex_insn( ureg,
690                     translate_opcode( inst->Opcode ),
691                     dst, num_dst,
692                     translate_texture_target( inst->TexSrcTarget,
693                                               inst->TexShadow ),
694                     src, num_src );
695      return;
696
697   case OPCODE_SCS:
698      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
699      ureg_insn( ureg,
700                 translate_opcode( inst->Opcode ),
701                 dst, num_dst,
702                 src, num_src );
703      break;
704
705   case OPCODE_XPD:
706      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
707      ureg_insn( ureg,
708                 translate_opcode( inst->Opcode ),
709                 dst, num_dst,
710                 src, num_src );
711      break;
712
713   case OPCODE_NOISE1:
714   case OPCODE_NOISE2:
715   case OPCODE_NOISE3:
716   case OPCODE_NOISE4:
717      /* At some point, a motivated person could add a better
718       * implementation of noise.  Currently not even the nvidia
719       * binary drivers do anything more than this.  In any case, the
720       * place to do this is in the GL state tracker, not the poor
721       * driver.
722       */
723      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
724      break;
725
726   case OPCODE_DDY:
727      emit_ddy( t, dst[0], &inst->SrcReg[0] );
728      break;
729
730   default:
731      ureg_insn( ureg,
732                 translate_opcode( inst->Opcode ),
733                 dst, num_dst,
734                 src, num_src );
735      break;
736   }
737}
738
739
740/**
741 * Emit the TGSI instructions to adjust the WPOS pixel center convention
742 */
743static void
744emit_adjusted_wpos( struct st_translate *t,
745                    const struct gl_program *program, GLfloat value)
746{
747   struct ureg_program *ureg = t->ureg;
748   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
749   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
750
751   /* Note that we bias X and Y and pass Z and W through unchanged.
752    * The shader might also use gl_FragCoord.w and .z.
753    */
754   ureg_ADD(ureg, wpos_temp, wpos_input,
755            ureg_imm4f(ureg, value, value, 0.0f, 0.0f));
756
757   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
758}
759
760
761/**
762 * Emit the TGSI instructions for inverting the WPOS y coordinate.
763 * This code is unavoidable because it also depends on whether
764 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
765 */
766static void
767emit_wpos_inversion( struct st_translate *t,
768                     const struct gl_program *program,
769                     boolean invert)
770{
771   struct ureg_program *ureg = t->ureg;
772
773   /* Fragment program uses fragment position input.
774    * Need to replace instances of INPUT[WPOS] with temp T
775    * where T = INPUT[WPOS] by y is inverted.
776    */
777   static const gl_state_index wposTransformState[STATE_LENGTH]
778      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 };
779
780   /* XXX: note we are modifying the incoming shader here!  Need to
781    * do this before emitting the constant decls below, or this
782    * will be missed:
783    */
784   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
785                                                       wposTransformState);
786
787   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
788   struct ureg_dst wpos_temp;
789   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
790
791   /* MOV wpos_temp, input[wpos]
792    */
793   if (wpos_input.File == TGSI_FILE_TEMPORARY)
794      wpos_temp = ureg_dst(wpos_input);
795   else {
796      wpos_temp = ureg_DECL_temporary( ureg );
797      ureg_MOV( ureg, wpos_temp, wpos_input );
798   }
799
800   if (invert) {
801      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
802       */
803      ureg_MAD( ureg,
804                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
805                wpos_input,
806                ureg_scalar(wpostrans, 0),
807                ureg_scalar(wpostrans, 1));
808   } else {
809      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
810       */
811      ureg_MAD( ureg,
812                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
813                wpos_input,
814                ureg_scalar(wpostrans, 2),
815                ureg_scalar(wpostrans, 3));
816   }
817
818   /* Use wpos_temp as position input from here on:
819    */
820   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
821}
822
823
824/**
825 * Emit fragment position/ooordinate code.
826 */
827static void
828emit_wpos(struct st_context *st,
829          struct st_translate *t,
830          const struct gl_program *program,
831          struct ureg_program *ureg)
832{
833   const struct gl_fragment_program *fp =
834      (const struct gl_fragment_program *) program;
835   struct pipe_screen *pscreen = st->pipe->screen;
836   boolean invert = FALSE;
837
838   if (fp->OriginUpperLeft) {
839      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
840      }
841      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
842         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
843         invert = TRUE;
844      }
845      else
846         assert(0);
847   }
848   else {
849      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
850         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
851      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
852         invert = TRUE;
853      else
854         assert(0);
855   }
856
857   if (fp->PixelCenterInteger) {
858      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
859         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
860      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
861         emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f);
862      else
863         assert(0);
864   }
865   else {
866      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
867      }
868      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
869         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
870         emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f);
871      }
872      else
873         assert(0);
874   }
875
876   /* we invert after adjustment so that we avoid the MOV to temporary,
877    * and reuse the adjustment ADD instead */
878   emit_wpos_inversion(t, program, invert);
879}
880
881
882/**
883 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
884 * TGSI uses +1 for front, -1 for back.
885 * This function converts the TGSI value to the GL value.  Simply clamping/
886 * saturating the value to [0,1] does the job.
887 */
888static void
889emit_face_var( struct st_translate *t,
890               const struct gl_program *program )
891{
892   struct ureg_program *ureg = t->ureg;
893   struct ureg_dst face_temp = ureg_DECL_temporary( ureg );
894   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
895
896   /* MOV_SAT face_temp, input[face]
897    */
898   face_temp = ureg_saturate( face_temp );
899   ureg_MOV( ureg, face_temp, face_input );
900
901   /* Use face_temp as face input from here on:
902    */
903   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
904}
905
906
907static void
908emit_edgeflags( struct st_translate *t,
909                 const struct gl_program *program )
910{
911   struct ureg_program *ureg = t->ureg;
912   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
913   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
914
915   ureg_MOV( ureg, edge_dst, edge_src );
916}
917
918
919/**
920 * Translate Mesa program to TGSI format.
921 * \param program  the program to translate
922 * \param numInputs  number of input registers used
923 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
924 *                      input indexes
925 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
926 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
927 *                            each input
928 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
929 * \param numOutputs  number of output registers used
930 * \param outputMapping  maps Mesa fragment program outputs to TGSI
931 *                       generic outputs
932 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
933 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
934 *                             each output
935 *
936 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
937 */
938enum pipe_error
939st_translate_mesa_program(
940   struct gl_context *ctx,
941   uint procType,
942   struct ureg_program *ureg,
943   const struct gl_program *program,
944   GLuint numInputs,
945   const GLuint inputMapping[],
946   const ubyte inputSemanticName[],
947   const ubyte inputSemanticIndex[],
948   const GLuint interpMode[],
949   GLuint numOutputs,
950   const GLuint outputMapping[],
951   const ubyte outputSemanticName[],
952   const ubyte outputSemanticIndex[],
953   boolean passthrough_edgeflags )
954{
955   struct st_translate translate, *t;
956   unsigned i;
957   enum pipe_error ret = PIPE_OK;
958
959   assert(numInputs <= Elements(t->inputs));
960   assert(numOutputs <= Elements(t->outputs));
961
962   t = &translate;
963   memset(t, 0, sizeof *t);
964
965   t->procType = procType;
966   t->inputMapping = inputMapping;
967   t->outputMapping = outputMapping;
968   t->ureg = ureg;
969   t->pointSizeOutIndex = -1;
970   t->prevInstWrotePointSize = GL_FALSE;
971
972   /*_mesa_print_program(program);*/
973
974   /*
975    * Declare input attributes.
976    */
977   if (procType == TGSI_PROCESSOR_FRAGMENT) {
978      for (i = 0; i < numInputs; i++) {
979         if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) {
980            t->inputs[i] = ureg_DECL_fs_input_cyl(ureg,
981                                                  inputSemanticName[i],
982                                                  inputSemanticIndex[i],
983                                                  interpMode[i],
984                                                  TGSI_CYLINDRICAL_WRAP_X);
985         }
986         else {
987            t->inputs[i] = ureg_DECL_fs_input(ureg,
988                                              inputSemanticName[i],
989                                              inputSemanticIndex[i],
990                                              interpMode[i]);
991         }
992      }
993
994      if (program->InputsRead & FRAG_BIT_WPOS) {
995         /* Must do this after setting up t->inputs, and before
996          * emitting constant references, below:
997          */
998         emit_wpos(st_context(ctx), t, program, ureg);
999      }
1000
1001      if (program->InputsRead & FRAG_BIT_FACE) {
1002         emit_face_var( t, program );
1003      }
1004
1005      /*
1006       * Declare output attributes.
1007       */
1008      for (i = 0; i < numOutputs; i++) {
1009         switch (outputSemanticName[i]) {
1010         case TGSI_SEMANTIC_POSITION:
1011            t->outputs[i] = ureg_DECL_output( ureg,
1012                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
1013                                              outputSemanticIndex[i] );
1014
1015            t->outputs[i] = ureg_writemask( t->outputs[i],
1016                                            TGSI_WRITEMASK_Z );
1017            break;
1018         case TGSI_SEMANTIC_STENCIL:
1019            t->outputs[i] = ureg_DECL_output( ureg,
1020                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
1021                                              outputSemanticIndex[i] );
1022            t->outputs[i] = ureg_writemask( t->outputs[i],
1023                                            TGSI_WRITEMASK_Y );
1024            break;
1025         case TGSI_SEMANTIC_COLOR:
1026            t->outputs[i] = ureg_DECL_output( ureg,
1027                                              TGSI_SEMANTIC_COLOR,
1028                                              outputSemanticIndex[i] );
1029            break;
1030         default:
1031            debug_assert(0);
1032            return 0;
1033         }
1034      }
1035   }
1036   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
1037      for (i = 0; i < numInputs; i++) {
1038         t->inputs[i] = ureg_DECL_gs_input(ureg,
1039                                           i,
1040                                           inputSemanticName[i],
1041                                           inputSemanticIndex[i]);
1042      }
1043
1044      for (i = 0; i < numOutputs; i++) {
1045         t->outputs[i] = ureg_DECL_output( ureg,
1046                                           outputSemanticName[i],
1047                                           outputSemanticIndex[i] );
1048      }
1049   }
1050   else {
1051      assert(procType == TGSI_PROCESSOR_VERTEX);
1052
1053      for (i = 0; i < numInputs; i++) {
1054         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
1055      }
1056
1057      for (i = 0; i < numOutputs; i++) {
1058         t->outputs[i] = ureg_DECL_output( ureg,
1059                                           outputSemanticName[i],
1060                                           outputSemanticIndex[i] );
1061         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
1062            /* Writing to the point size result register requires special
1063             * handling to implement clamping.
1064             */
1065            static const gl_state_index pointSizeClampState[STATE_LENGTH]
1066               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
1067               /* XXX: note we are modifying the incoming shader here!  Need to
1068               * do this before emitting the constant decls below, or this
1069               * will be missed:
1070               */
1071            unsigned pointSizeClampConst =
1072               _mesa_add_state_reference(program->Parameters,
1073                                         pointSizeClampState);
1074            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
1075            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
1076            t->pointSizeResult = t->outputs[i];
1077            t->pointSizeOutIndex = i;
1078            t->outputs[i] = psizregtemp;
1079         }
1080      }
1081      if (passthrough_edgeflags)
1082         emit_edgeflags( t, program );
1083   }
1084
1085   /* Declare address register.
1086    */
1087   if (program->NumAddressRegs > 0) {
1088      debug_assert( program->NumAddressRegs == 1 );
1089      t->address[0] = ureg_DECL_address( ureg );
1090   }
1091
1092   if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
1093      /* If temps are accessed with indirect addressing, declare temporaries
1094       * in sequential order.  Else, we declare them on demand elsewhere.
1095       */
1096      for (i = 0; i < program->NumTemporaries; i++) {
1097         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
1098         t->temps[i] = ureg_DECL_temporary( t->ureg );
1099      }
1100   }
1101
1102   /* Emit constants and immediates.  Mesa uses a single index space
1103    * for these, so we put all the translated regs in t->constants.
1104    */
1105   if (program->Parameters) {
1106      t->constants = CALLOC( program->Parameters->NumParameters,
1107                             sizeof t->constants[0] );
1108      if (t->constants == NULL) {
1109         ret = PIPE_ERROR_OUT_OF_MEMORY;
1110         goto out;
1111      }
1112
1113      for (i = 0; i < program->Parameters->NumParameters; i++) {
1114         switch (program->Parameters->Parameters[i].Type) {
1115         case PROGRAM_ENV_PARAM:
1116         case PROGRAM_LOCAL_PARAM:
1117         case PROGRAM_STATE_VAR:
1118         case PROGRAM_NAMED_PARAM:
1119         case PROGRAM_UNIFORM:
1120            t->constants[i] = ureg_DECL_constant( ureg, i );
1121            break;
1122
1123            /* Emit immediates only when there's no indirect addressing of
1124             * the const buffer.
1125             * FIXME: Be smarter and recognize param arrays:
1126             * indirect addressing is only valid within the referenced
1127             * array.
1128             */
1129         case PROGRAM_CONSTANT:
1130            if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST)
1131               t->constants[i] = ureg_DECL_constant( ureg, i );
1132            else
1133               t->constants[i] =
1134                  ureg_DECL_immediate( ureg,
1135                                       program->Parameters->ParameterValues[i],
1136                                       4 );
1137            break;
1138         default:
1139            break;
1140         }
1141      }
1142   }
1143
1144   /* texture samplers */
1145   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1146      if (program->SamplersUsed & (1 << i)) {
1147         t->samplers[i] = ureg_DECL_sampler( ureg, i );
1148      }
1149   }
1150
1151   /* Emit each instruction in turn:
1152    */
1153   for (i = 0; i < program->NumInstructions; i++) {
1154      set_insn_start( t, ureg_get_instruction_number( ureg ));
1155      compile_instruction( t, &program->Instructions[i] );
1156
1157      if (t->prevInstWrotePointSize && program->Id) {
1158         /* The previous instruction wrote to the (fake) vertex point size
1159          * result register.  Now we need to clamp that value to the min/max
1160          * point size range, putting the result into the real point size
1161          * register.
1162          * Note that we can't do this easily at the end of program due to
1163          * possible early return.
1164          */
1165         set_insn_start( t, ureg_get_instruction_number( ureg ));
1166         ureg_MAX( t->ureg,
1167                   ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
1168                   ureg_src(t->outputs[t->pointSizeOutIndex]),
1169                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
1170         ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
1171                   ureg_src(t->outputs[t->pointSizeOutIndex]),
1172                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
1173      }
1174      t->prevInstWrotePointSize = GL_FALSE;
1175   }
1176
1177   /* Fix up all emitted labels:
1178    */
1179   for (i = 0; i < t->labels_count; i++) {
1180      ureg_fixup_label( ureg,
1181                        t->labels[i].token,
1182                        t->insn[t->labels[i].branch_target] );
1183   }
1184
1185out:
1186   FREE(t->insn);
1187   FREE(t->labels);
1188   FREE(t->constants);
1189
1190   if (t->error) {
1191      debug_printf("%s: translate error flag set\n", __FUNCTION__);
1192   }
1193
1194   return ret;
1195}
1196
1197
1198/**
1199 * Tokens cannot be free with free otherwise the builtin gallium
1200 * malloc debugging will get confused.
1201 */
1202void
1203st_free_tokens(const struct tgsi_token *tokens)
1204{
1205   FREE((void *)tokens);
1206}
1207