st_mesa_to_tgsi.c revision 2dfd348e33f0152e3ab693ec3b53911331f5c349
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * \author
30 * Michal Krol,
31 * Keith Whitwell
32 */
33
34#include "pipe/p_compiler.h"
35#include "pipe/p_context.h"
36#include "pipe/p_screen.h"
37#include "pipe/p_shader_tokens.h"
38#include "pipe/p_state.h"
39#include "tgsi/tgsi_ureg.h"
40#include "st_mesa_to_tgsi.h"
41#include "st_context.h"
42#include "program/prog_instruction.h"
43#include "program/prog_parameter.h"
44#include "util/u_debug.h"
45#include "util/u_math.h"
46#include "util/u_memory.h"
47
48
49#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
50                           (1 << PROGRAM_ENV_PARAM) |    \
51                           (1 << PROGRAM_STATE_VAR) |    \
52                           (1 << PROGRAM_NAMED_PARAM) |  \
53                           (1 << PROGRAM_CONSTANT) |     \
54                           (1 << PROGRAM_UNIFORM))
55
56
57struct label {
58   unsigned branch_target;
59   unsigned token;
60};
61
62
63/**
64 * Intermediate state used during shader translation.
65 */
66struct st_translate {
67   struct ureg_program *ureg;
68
69   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
70   struct ureg_src *constants;
71   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
72   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
73   struct ureg_dst address[1];
74   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
75
76   /* Extra info for handling point size clamping in vertex shader */
77   struct ureg_dst pointSizeResult; /**< Actual point size output register */
78   struct ureg_src pointSizeConst;  /**< Point size range constant register */
79   GLint pointSizeOutIndex;         /**< Temp point size output register */
80   GLboolean prevInstWrotePointSize;
81
82   const GLuint *inputMapping;
83   const GLuint *outputMapping;
84
85   /* For every instruction that contains a label (eg CALL), keep
86    * details so that we can go back afterwards and emit the correct
87    * tgsi instruction number for each label.
88    */
89   struct label *labels;
90   unsigned labels_size;
91   unsigned labels_count;
92
93   /* Keep a record of the tgsi instruction number that each mesa
94    * instruction starts at, will be used to fix up labels after
95    * translation.
96    */
97   unsigned *insn;
98   unsigned insn_size;
99   unsigned insn_count;
100
101   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
102
103   boolean error;
104};
105
106
107/**
108 * Make note of a branch to a label in the TGSI code.
109 * After we've emitted all instructions, we'll go over the list
110 * of labels built here and patch the TGSI code with the actual
111 * location of each label.
112 */
113static unsigned *get_label( struct st_translate *t,
114                            unsigned branch_target )
115{
116   unsigned i;
117
118   if (t->labels_count + 1 >= t->labels_size) {
119      unsigned old_size = t->labels_size;
120      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
121      t->labels = REALLOC( t->labels,
122                           old_size * sizeof t->labels[0],
123                           t->labels_size * sizeof t->labels[0] );
124      if (t->labels == NULL) {
125         static unsigned dummy;
126         t->error = TRUE;
127         return &dummy;
128      }
129   }
130
131   i = t->labels_count++;
132   t->labels[i].branch_target = branch_target;
133   return &t->labels[i].token;
134}
135
136
137/**
138 * Called prior to emitting the TGSI code for each Mesa instruction.
139 * Allocate additional space for instructions if needed.
140 * Update the insn[] array so the next Mesa instruction points to
141 * the next TGSI instruction.
142 */
143static void set_insn_start( struct st_translate *t,
144                            unsigned start )
145{
146   if (t->insn_count + 1 >= t->insn_size) {
147      unsigned old_size = t->insn_size;
148      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
149      t->insn = REALLOC( t->insn,
150                         old_size * sizeof t->insn[0],
151                         t->insn_size * sizeof t->insn[0] );
152      if (t->insn == NULL) {
153         t->error = TRUE;
154         return;
155      }
156   }
157
158   t->insn[t->insn_count++] = start;
159}
160
161
162/**
163 * Map a Mesa dst register to a TGSI ureg_dst register.
164 */
165static struct ureg_dst
166dst_register( struct st_translate *t,
167              gl_register_file file,
168              GLuint index )
169{
170   switch( file ) {
171   case PROGRAM_UNDEFINED:
172      return ureg_dst_undef();
173
174   case PROGRAM_TEMPORARY:
175      if (ureg_dst_is_undef(t->temps[index]))
176         t->temps[index] = ureg_DECL_temporary( t->ureg );
177
178      return t->temps[index];
179
180   case PROGRAM_OUTPUT:
181      if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
182         t->prevInstWrotePointSize = GL_TRUE;
183
184      if (t->procType == TGSI_PROCESSOR_VERTEX)
185         assert(index < VERT_RESULT_MAX);
186      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
187         assert(index < FRAG_RESULT_MAX);
188      else
189         assert(index < GEOM_RESULT_MAX);
190
191      assert(t->outputMapping[index] < Elements(t->outputs));
192
193      return t->outputs[t->outputMapping[index]];
194
195   case PROGRAM_ADDRESS:
196      return t->address[index];
197
198   default:
199      debug_assert( 0 );
200      return ureg_dst_undef();
201   }
202}
203
204
205/**
206 * Map a Mesa src register to a TGSI ureg_src register.
207 */
208static struct ureg_src
209src_register( struct st_translate *t,
210              gl_register_file file,
211              GLint index )
212{
213   switch( file ) {
214   case PROGRAM_UNDEFINED:
215      return ureg_src_undef();
216
217   case PROGRAM_TEMPORARY:
218      assert(index >= 0);
219      if (ureg_dst_is_undef(t->temps[index]))
220         t->temps[index] = ureg_DECL_temporary( t->ureg );
221      assert(index < Elements(t->temps));
222      return ureg_src(t->temps[index]);
223
224   case PROGRAM_NAMED_PARAM:
225   case PROGRAM_ENV_PARAM:
226   case PROGRAM_LOCAL_PARAM:
227   case PROGRAM_UNIFORM:
228      assert(index >= 0);
229      return t->constants[index];
230   case PROGRAM_STATE_VAR:
231   case PROGRAM_CONSTANT:       /* ie, immediate */
232      if (index < 0)
233         return ureg_DECL_constant( t->ureg, 0 );
234      else
235         return t->constants[index];
236
237   case PROGRAM_INPUT:
238      assert(t->inputMapping[index] < Elements(t->inputs));
239      return t->inputs[t->inputMapping[index]];
240
241   case PROGRAM_OUTPUT:
242      assert(t->outputMapping[index] < Elements(t->outputs));
243      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
244
245   case PROGRAM_ADDRESS:
246      return ureg_src(t->address[index]);
247
248   default:
249      debug_assert( 0 );
250      return ureg_src_undef();
251   }
252}
253
254
255/**
256 * Map mesa texture target to TGSI texture target.
257 */
258static unsigned
259translate_texture_target( GLuint textarget,
260                          GLboolean shadow )
261{
262   if (shadow) {
263      switch( textarget ) {
264      case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_SHADOW1D;
265      case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_SHADOW2D;
266      case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT;
267      default: break;
268      }
269   }
270
271   switch( textarget ) {
272   case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
273   case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
274   case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;
275   case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
276   case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
277   default:
278      debug_assert( 0 );
279      return TGSI_TEXTURE_1D;
280   }
281}
282
283
284/**
285 * Create a TGSI ureg_dst register from a Mesa dest register.
286 */
287static struct ureg_dst
288translate_dst( struct st_translate *t,
289               const struct prog_dst_register *DstReg,
290               boolean saturate )
291{
292   struct ureg_dst dst = dst_register( t,
293                                       DstReg->File,
294                                       DstReg->Index );
295
296   dst = ureg_writemask( dst,
297                         DstReg->WriteMask );
298
299   if (saturate)
300      dst = ureg_saturate( dst );
301
302   if (DstReg->RelAddr)
303      dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
304
305   return dst;
306}
307
308
309/**
310 * Create a TGSI ureg_src register from a Mesa src register.
311 */
312static struct ureg_src
313translate_src( struct st_translate *t,
314               const struct prog_src_register *SrcReg )
315{
316   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
317
318   if (t->procType == TGSI_PROCESSOR_GEOMETRY && SrcReg->HasIndex2) {
319      src = src_register( t, SrcReg->File, SrcReg->Index2 );
320      if (SrcReg->RelAddr2)
321         src = ureg_src_dimension_indirect( src, ureg_src(t->address[0]),
322                                            SrcReg->Index);
323      else
324         src = ureg_src_dimension( src, SrcReg->Index);
325   }
326
327   src = ureg_swizzle( src,
328                       GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3,
329                       GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3,
330                       GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3,
331                       GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3);
332
333   if (SrcReg->Negate == NEGATE_XYZW)
334      src = ureg_negate(src);
335
336   if (SrcReg->Abs)
337      src = ureg_abs(src);
338
339   if (SrcReg->RelAddr) {
340      src = ureg_src_indirect( src, ureg_src(t->address[0]));
341      if (SrcReg->File != PROGRAM_INPUT &&
342          SrcReg->File != PROGRAM_OUTPUT) {
343         /* If SrcReg->Index was negative, it was set to zero in
344          * src_register().  Reassign it now.  But don't do this
345          * for input/output regs since they get remapped while
346          * const buffers don't.
347          */
348         src.Index = SrcReg->Index;
349      }
350   }
351
352   return src;
353}
354
355
356static struct ureg_src swizzle_4v( struct ureg_src src,
357                                   const unsigned *swz )
358{
359   return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] );
360}
361
362
363/**
364 * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
365 *
366 *   SWZ dst, src.x-y10
367 *
368 * becomes:
369 *
370 *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
371 */
372static void emit_swz( struct st_translate *t,
373                      struct ureg_dst dst,
374                      const struct prog_src_register *SrcReg )
375{
376   struct ureg_program *ureg = t->ureg;
377   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
378
379   unsigned negate_mask =  SrcReg->Negate;
380
381   unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
382                        (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
383                        (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
384                        (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
385
386   unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
387                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
388                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
389                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
390
391   unsigned negative_one_mask = one_mask & negate_mask;
392   unsigned positive_one_mask = one_mask & ~negate_mask;
393
394   struct ureg_src imm;
395   unsigned i;
396   unsigned mul_swizzle[4] = {0,0,0,0};
397   unsigned add_swizzle[4] = {0,0,0,0};
398   unsigned src_swizzle[4] = {0,0,0,0};
399   boolean need_add = FALSE;
400   boolean need_mul = FALSE;
401
402   if (dst.WriteMask == 0)
403      return;
404
405   /* Is this just a MOV?
406    */
407   if (zero_mask == 0 &&
408       one_mask == 0 &&
409       (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW))
410   {
411      ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
412      return;
413   }
414
415#define IMM_ZERO    0
416#define IMM_ONE     1
417#define IMM_NEG_ONE 2
418
419   imm = ureg_imm3f( ureg, 0, 1, -1 );
420
421   for (i = 0; i < 4; i++) {
422      unsigned bit = 1 << i;
423
424      if (dst.WriteMask & bit) {
425         if (positive_one_mask & bit) {
426            mul_swizzle[i] = IMM_ZERO;
427            add_swizzle[i] = IMM_ONE;
428            need_add = TRUE;
429         }
430         else if (negative_one_mask & bit) {
431            mul_swizzle[i] = IMM_ZERO;
432            add_swizzle[i] = IMM_NEG_ONE;
433            need_add = TRUE;
434         }
435         else if (zero_mask & bit) {
436            mul_swizzle[i] = IMM_ZERO;
437            add_swizzle[i] = IMM_ZERO;
438            need_add = TRUE;
439         }
440         else {
441            add_swizzle[i] = IMM_ZERO;
442            src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
443            need_mul = TRUE;
444            if (negate_mask & bit) {
445               mul_swizzle[i] = IMM_NEG_ONE;
446            }
447            else {
448               mul_swizzle[i] = IMM_ONE;
449            }
450         }
451      }
452   }
453
454   if (need_mul && need_add) {
455      ureg_MAD( ureg,
456                dst,
457                swizzle_4v( src, src_swizzle ),
458                swizzle_4v( imm, mul_swizzle ),
459                swizzle_4v( imm, add_swizzle ) );
460   }
461   else if (need_mul) {
462      ureg_MUL( ureg,
463                dst,
464                swizzle_4v( src, src_swizzle ),
465                swizzle_4v( imm, mul_swizzle ) );
466   }
467   else if (need_add) {
468      ureg_MOV( ureg,
469                dst,
470                swizzle_4v( imm, add_swizzle ) );
471   }
472   else {
473      debug_assert(0);
474   }
475
476#undef IMM_ZERO
477#undef IMM_ONE
478#undef IMM_NEG_ONE
479}
480
481
482/**
483 * Negate the value of DDY to match GL semantics where (0,0) is the
484 * lower-left corner of the window.
485 * Note that the GL_ARB_fragment_coord_conventions extension will
486 * effect this someday.
487 */
488static void emit_ddy( struct st_translate *t,
489                      struct ureg_dst dst,
490                      const struct prog_src_register *SrcReg )
491{
492   struct ureg_program *ureg = t->ureg;
493   struct ureg_src src = translate_src( t, SrcReg );
494   src = ureg_negate( src );
495   ureg_DDY( ureg, dst, src );
496}
497
498
499
500static unsigned
501translate_opcode( unsigned op )
502{
503   switch( op ) {
504   case OPCODE_ARL:
505      return TGSI_OPCODE_ARL;
506   case OPCODE_ABS:
507      return TGSI_OPCODE_ABS;
508   case OPCODE_ADD:
509      return TGSI_OPCODE_ADD;
510   case OPCODE_BGNLOOP:
511      return TGSI_OPCODE_BGNLOOP;
512   case OPCODE_BGNSUB:
513      return TGSI_OPCODE_BGNSUB;
514   case OPCODE_BRA:
515      return TGSI_OPCODE_BRA;
516   case OPCODE_BRK:
517      return TGSI_OPCODE_BRK;
518   case OPCODE_CAL:
519      return TGSI_OPCODE_CAL;
520   case OPCODE_CMP:
521      return TGSI_OPCODE_CMP;
522   case OPCODE_CONT:
523      return TGSI_OPCODE_CONT;
524   case OPCODE_COS:
525      return TGSI_OPCODE_COS;
526   case OPCODE_DDX:
527      return TGSI_OPCODE_DDX;
528   case OPCODE_DDY:
529      return TGSI_OPCODE_DDY;
530   case OPCODE_DP2:
531      return TGSI_OPCODE_DP2;
532   case OPCODE_DP2A:
533      return TGSI_OPCODE_DP2A;
534   case OPCODE_DP3:
535      return TGSI_OPCODE_DP3;
536   case OPCODE_DP4:
537      return TGSI_OPCODE_DP4;
538   case OPCODE_DPH:
539      return TGSI_OPCODE_DPH;
540   case OPCODE_DST:
541      return TGSI_OPCODE_DST;
542   case OPCODE_ELSE:
543      return TGSI_OPCODE_ELSE;
544   case OPCODE_EMIT_VERTEX:
545      return TGSI_OPCODE_EMIT;
546   case OPCODE_END_PRIMITIVE:
547      return TGSI_OPCODE_ENDPRIM;
548   case OPCODE_ENDIF:
549      return TGSI_OPCODE_ENDIF;
550   case OPCODE_ENDLOOP:
551      return TGSI_OPCODE_ENDLOOP;
552   case OPCODE_ENDSUB:
553      return TGSI_OPCODE_ENDSUB;
554   case OPCODE_EX2:
555      return TGSI_OPCODE_EX2;
556   case OPCODE_EXP:
557      return TGSI_OPCODE_EXP;
558   case OPCODE_FLR:
559      return TGSI_OPCODE_FLR;
560   case OPCODE_FRC:
561      return TGSI_OPCODE_FRC;
562   case OPCODE_IF:
563      return TGSI_OPCODE_IF;
564   case OPCODE_TRUNC:
565      return TGSI_OPCODE_TRUNC;
566   case OPCODE_KIL:
567      return TGSI_OPCODE_KIL;
568   case OPCODE_KIL_NV:
569      return TGSI_OPCODE_KILP;
570   case OPCODE_LG2:
571      return TGSI_OPCODE_LG2;
572   case OPCODE_LOG:
573      return TGSI_OPCODE_LOG;
574   case OPCODE_LIT:
575      return TGSI_OPCODE_LIT;
576   case OPCODE_LRP:
577      return TGSI_OPCODE_LRP;
578   case OPCODE_MAD:
579      return TGSI_OPCODE_MAD;
580   case OPCODE_MAX:
581      return TGSI_OPCODE_MAX;
582   case OPCODE_MIN:
583      return TGSI_OPCODE_MIN;
584   case OPCODE_MOV:
585      return TGSI_OPCODE_MOV;
586   case OPCODE_MUL:
587      return TGSI_OPCODE_MUL;
588   case OPCODE_NOP:
589      return TGSI_OPCODE_NOP;
590   case OPCODE_NRM3:
591      return TGSI_OPCODE_NRM;
592   case OPCODE_NRM4:
593      return TGSI_OPCODE_NRM4;
594   case OPCODE_POW:
595      return TGSI_OPCODE_POW;
596   case OPCODE_RCP:
597      return TGSI_OPCODE_RCP;
598   case OPCODE_RET:
599      return TGSI_OPCODE_RET;
600   case OPCODE_RSQ:
601      return TGSI_OPCODE_RSQ;
602   case OPCODE_SCS:
603      return TGSI_OPCODE_SCS;
604   case OPCODE_SEQ:
605      return TGSI_OPCODE_SEQ;
606   case OPCODE_SGE:
607      return TGSI_OPCODE_SGE;
608   case OPCODE_SGT:
609      return TGSI_OPCODE_SGT;
610   case OPCODE_SIN:
611      return TGSI_OPCODE_SIN;
612   case OPCODE_SLE:
613      return TGSI_OPCODE_SLE;
614   case OPCODE_SLT:
615      return TGSI_OPCODE_SLT;
616   case OPCODE_SNE:
617      return TGSI_OPCODE_SNE;
618   case OPCODE_SSG:
619      return TGSI_OPCODE_SSG;
620   case OPCODE_SUB:
621      return TGSI_OPCODE_SUB;
622   case OPCODE_TEX:
623      return TGSI_OPCODE_TEX;
624   case OPCODE_TXB:
625      return TGSI_OPCODE_TXB;
626   case OPCODE_TXD:
627      return TGSI_OPCODE_TXD;
628   case OPCODE_TXL:
629      return TGSI_OPCODE_TXL;
630   case OPCODE_TXP:
631      return TGSI_OPCODE_TXP;
632   case OPCODE_XPD:
633      return TGSI_OPCODE_XPD;
634   case OPCODE_END:
635      return TGSI_OPCODE_END;
636   default:
637      debug_assert( 0 );
638      return TGSI_OPCODE_NOP;
639   }
640}
641
642
643static void
644compile_instruction(
645   struct st_translate *t,
646   const struct prog_instruction *inst )
647{
648   struct ureg_program *ureg = t->ureg;
649   GLuint i;
650   struct ureg_dst dst[1];
651   struct ureg_src src[4];
652   unsigned num_dst;
653   unsigned num_src;
654
655   num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
656   num_src = _mesa_num_inst_src_regs( inst->Opcode );
657
658   if (num_dst)
659      dst[0] = translate_dst( t,
660                              &inst->DstReg,
661                              inst->SaturateMode );
662
663   for (i = 0; i < num_src; i++)
664      src[i] = translate_src( t, &inst->SrcReg[i] );
665
666   switch( inst->Opcode ) {
667   case OPCODE_SWZ:
668      emit_swz( t, dst[0], &inst->SrcReg[0] );
669      return;
670
671   case OPCODE_BGNLOOP:
672   case OPCODE_CAL:
673   case OPCODE_ELSE:
674   case OPCODE_ENDLOOP:
675   case OPCODE_IF:
676      debug_assert(num_dst == 0);
677      ureg_label_insn( ureg,
678                       translate_opcode( inst->Opcode ),
679                       src, num_src,
680                       get_label( t, inst->BranchTarget ));
681      return;
682
683   case OPCODE_TEX:
684   case OPCODE_TXB:
685   case OPCODE_TXD:
686   case OPCODE_TXL:
687   case OPCODE_TXP:
688      src[num_src++] = t->samplers[inst->TexSrcUnit];
689      ureg_tex_insn( ureg,
690                     translate_opcode( inst->Opcode ),
691                     dst, num_dst,
692                     translate_texture_target( inst->TexSrcTarget,
693                                               inst->TexShadow ),
694                     src, num_src );
695      return;
696
697   case OPCODE_SCS:
698      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
699      ureg_insn( ureg,
700                 translate_opcode( inst->Opcode ),
701                 dst, num_dst,
702                 src, num_src );
703      break;
704
705   case OPCODE_XPD:
706      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
707      ureg_insn( ureg,
708                 translate_opcode( inst->Opcode ),
709                 dst, num_dst,
710                 src, num_src );
711      break;
712
713   case OPCODE_NOISE1:
714   case OPCODE_NOISE2:
715   case OPCODE_NOISE3:
716   case OPCODE_NOISE4:
717      /* At some point, a motivated person could add a better
718       * implementation of noise.  Currently not even the nvidia
719       * binary drivers do anything more than this.  In any case, the
720       * place to do this is in the GL state tracker, not the poor
721       * driver.
722       */
723      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
724      break;
725
726   case OPCODE_DDY:
727      emit_ddy( t, dst[0], &inst->SrcReg[0] );
728      break;
729
730   default:
731      ureg_insn( ureg,
732                 translate_opcode( inst->Opcode ),
733                 dst, num_dst,
734                 src, num_src );
735      break;
736   }
737}
738
739
740/**
741 * Emit the TGSI instructions to adjust the WPOS pixel center convention
742 */
743static void
744emit_adjusted_wpos( struct st_translate *t,
745                    const struct gl_program *program, GLfloat value)
746{
747   struct ureg_program *ureg = t->ureg;
748   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
749   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
750
751   /* Note that we bias X and Y and pass Z and W through unchanged.
752    * The shader might also use gl_FragCoord.w and .z.
753    */
754   ureg_ADD(ureg, wpos_temp, wpos_input,
755            ureg_imm4f(ureg, value, value, 0.0f, 0.0f));
756
757   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
758}
759
760
761/**
762 * Emit the TGSI instructions for inverting the WPOS y coordinate.
763 */
764static void
765emit_inverted_wpos( struct st_translate *t,
766                    const struct gl_program *program )
767{
768   struct ureg_program *ureg = t->ureg;
769
770   /* Fragment program uses fragment position input.
771    * Need to replace instances of INPUT[WPOS] with temp T
772    * where T = INPUT[WPOS] by y is inverted.
773    */
774   static const gl_state_index winSizeState[STATE_LENGTH]
775      = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
776
777   /* XXX: note we are modifying the incoming shader here!  Need to
778    * do this before emitting the constant decls below, or this
779    * will be missed:
780    */
781   unsigned winHeightConst = _mesa_add_state_reference(program->Parameters,
782                                                       winSizeState);
783
784   struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst );
785   struct ureg_dst wpos_temp;
786   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
787
788   /* MOV wpos_temp, input[wpos]
789    */
790   if (wpos_input.File == TGSI_FILE_TEMPORARY)
791      wpos_temp = ureg_dst(wpos_input);
792   else {
793      wpos_temp = ureg_DECL_temporary( ureg );
794      ureg_MOV( ureg, wpos_temp, wpos_input );
795   }
796
797   /* SUB wpos_temp.y, winsize_const, wpos_input
798    */
799   ureg_SUB( ureg,
800             ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
801             winsize,
802             wpos_input);
803
804   /* Use wpos_temp as position input from here on:
805    */
806   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
807}
808
809
810/**
811 * Emit fragment position/ooordinate code.
812 */
813static void
814emit_wpos(struct st_context *st,
815          struct st_translate *t,
816          const struct gl_program *program,
817          struct ureg_program *ureg)
818{
819   const struct gl_fragment_program *fp =
820      (const struct gl_fragment_program *) program;
821   struct pipe_screen *pscreen = st->pipe->screen;
822   boolean invert = FALSE;
823
824   if (fp->OriginUpperLeft) {
825      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
826      }
827      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
828         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
829         invert = TRUE;
830      }
831      else
832         assert(0);
833   }
834   else {
835      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
836         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
837      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
838         invert = TRUE;
839      else
840         assert(0);
841   }
842
843   if (fp->PixelCenterInteger) {
844      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
845         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
846      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
847         emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f);
848      else
849         assert(0);
850   }
851   else {
852      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
853      }
854      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
855         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
856         emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f);
857      }
858      else
859         assert(0);
860   }
861
862   /* we invert after adjustment so that we avoid the MOV to temporary,
863    * and reuse the adjustment ADD instead */
864   if (invert)
865      emit_inverted_wpos(t, program);
866}
867
868
869/**
870 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
871 * TGSI uses +1 for front, -1 for back.
872 * This function converts the TGSI value to the GL value.  Simply clamping/
873 * saturating the value to [0,1] does the job.
874 */
875static void
876emit_face_var( struct st_translate *t,
877               const struct gl_program *program )
878{
879   struct ureg_program *ureg = t->ureg;
880   struct ureg_dst face_temp = ureg_DECL_temporary( ureg );
881   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
882
883   /* MOV_SAT face_temp, input[face]
884    */
885   face_temp = ureg_saturate( face_temp );
886   ureg_MOV( ureg, face_temp, face_input );
887
888   /* Use face_temp as face input from here on:
889    */
890   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
891}
892
893
894static void
895emit_edgeflags( struct st_translate *t,
896                 const struct gl_program *program )
897{
898   struct ureg_program *ureg = t->ureg;
899   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
900   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
901
902   ureg_MOV( ureg, edge_dst, edge_src );
903}
904
905
906/**
907 * Translate Mesa program to TGSI format.
908 * \param program  the program to translate
909 * \param numInputs  number of input registers used
910 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
911 *                      input indexes
912 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
913 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
914 *                            each input
915 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
916 * \param numOutputs  number of output registers used
917 * \param outputMapping  maps Mesa fragment program outputs to TGSI
918 *                       generic outputs
919 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
920 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
921 *                             each output
922 *
923 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
924 */
925enum pipe_error
926st_translate_mesa_program(
927   GLcontext *ctx,
928   uint procType,
929   struct ureg_program *ureg,
930   const struct gl_program *program,
931   GLuint numInputs,
932   const GLuint inputMapping[],
933   const ubyte inputSemanticName[],
934   const ubyte inputSemanticIndex[],
935   const GLuint interpMode[],
936   GLuint numOutputs,
937   const GLuint outputMapping[],
938   const ubyte outputSemanticName[],
939   const ubyte outputSemanticIndex[],
940   boolean passthrough_edgeflags )
941{
942   struct st_translate translate, *t;
943   unsigned i;
944   enum pipe_error ret = PIPE_OK;
945
946   assert(numInputs <= Elements(t->inputs));
947   assert(numOutputs <= Elements(t->outputs));
948
949   t = &translate;
950   memset(t, 0, sizeof *t);
951
952   t->procType = procType;
953   t->inputMapping = inputMapping;
954   t->outputMapping = outputMapping;
955   t->ureg = ureg;
956   t->pointSizeOutIndex = -1;
957   t->prevInstWrotePointSize = GL_FALSE;
958
959   /*_mesa_print_program(program);*/
960
961   /*
962    * Declare input attributes.
963    */
964   if (procType == TGSI_PROCESSOR_FRAGMENT) {
965      for (i = 0; i < numInputs; i++) {
966         if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) {
967            t->inputs[i] = ureg_DECL_fs_input_cyl(ureg,
968                                                  inputSemanticName[i],
969                                                  inputSemanticIndex[i],
970                                                  interpMode[i],
971                                                  TGSI_CYLINDRICAL_WRAP_X);
972         }
973         else {
974            t->inputs[i] = ureg_DECL_fs_input(ureg,
975                                              inputSemanticName[i],
976                                              inputSemanticIndex[i],
977                                              interpMode[i]);
978         }
979      }
980
981      if (program->InputsRead & FRAG_BIT_WPOS) {
982         /* Must do this after setting up t->inputs, and before
983          * emitting constant references, below:
984          */
985         emit_wpos(st_context(ctx), t, program, ureg);
986      }
987
988      if (program->InputsRead & FRAG_BIT_FACE) {
989         emit_face_var( t, program );
990      }
991
992      /*
993       * Declare output attributes.
994       */
995      for (i = 0; i < numOutputs; i++) {
996         switch (outputSemanticName[i]) {
997         case TGSI_SEMANTIC_POSITION:
998            t->outputs[i] = ureg_DECL_output( ureg,
999                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
1000                                              outputSemanticIndex[i] );
1001
1002            t->outputs[i] = ureg_writemask( t->outputs[i],
1003                                            TGSI_WRITEMASK_Z );
1004            break;
1005         case TGSI_SEMANTIC_COLOR:
1006            t->outputs[i] = ureg_DECL_output( ureg,
1007                                              TGSI_SEMANTIC_COLOR,
1008                                              outputSemanticIndex[i] );
1009            break;
1010         default:
1011            debug_assert(0);
1012            return 0;
1013         }
1014      }
1015   }
1016   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
1017      for (i = 0; i < numInputs; i++) {
1018         t->inputs[i] = ureg_DECL_gs_input(ureg,
1019                                           i,
1020                                           inputSemanticName[i],
1021                                           inputSemanticIndex[i]);
1022      }
1023
1024      for (i = 0; i < numOutputs; i++) {
1025         t->outputs[i] = ureg_DECL_output( ureg,
1026                                           outputSemanticName[i],
1027                                           outputSemanticIndex[i] );
1028      }
1029   }
1030   else {
1031      assert(procType == TGSI_PROCESSOR_VERTEX);
1032
1033      for (i = 0; i < numInputs; i++) {
1034         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
1035      }
1036
1037      for (i = 0; i < numOutputs; i++) {
1038         t->outputs[i] = ureg_DECL_output( ureg,
1039                                           outputSemanticName[i],
1040                                           outputSemanticIndex[i] );
1041         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
1042            /* Writing to the point size result register requires special
1043             * handling to implement clamping.
1044             */
1045            static const gl_state_index pointSizeClampState[STATE_LENGTH]
1046               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
1047               /* XXX: note we are modifying the incoming shader here!  Need to
1048               * do this before emitting the constant decls below, or this
1049               * will be missed:
1050               */
1051            unsigned pointSizeClampConst =
1052               _mesa_add_state_reference(program->Parameters,
1053                                         pointSizeClampState);
1054            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
1055            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
1056            t->pointSizeResult = t->outputs[i];
1057            t->pointSizeOutIndex = i;
1058            t->outputs[i] = psizregtemp;
1059         }
1060      }
1061      if (passthrough_edgeflags)
1062         emit_edgeflags( t, program );
1063   }
1064
1065   /* Declare address register.
1066    */
1067   if (program->NumAddressRegs > 0) {
1068      debug_assert( program->NumAddressRegs == 1 );
1069      t->address[0] = ureg_DECL_address( ureg );
1070   }
1071
1072   if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
1073      /* If temps are accessed with indirect addressing, declare temporaries
1074       * in sequential order.  Else, we declare them on demand elsewhere.
1075       */
1076      for (i = 0; i < program->NumTemporaries; i++) {
1077         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
1078         t->temps[i] = ureg_DECL_temporary( t->ureg );
1079      }
1080   }
1081
1082   /* Emit constants and immediates.  Mesa uses a single index space
1083    * for these, so we put all the translated regs in t->constants.
1084    */
1085   if (program->Parameters) {
1086      t->constants = CALLOC( program->Parameters->NumParameters,
1087                             sizeof t->constants[0] );
1088      if (t->constants == NULL) {
1089         ret = PIPE_ERROR_OUT_OF_MEMORY;
1090         goto out;
1091      }
1092
1093      for (i = 0; i < program->Parameters->NumParameters; i++) {
1094         switch (program->Parameters->Parameters[i].Type) {
1095         case PROGRAM_ENV_PARAM:
1096         case PROGRAM_LOCAL_PARAM:
1097         case PROGRAM_STATE_VAR:
1098         case PROGRAM_NAMED_PARAM:
1099         case PROGRAM_UNIFORM:
1100            t->constants[i] = ureg_DECL_constant( ureg, i );
1101            break;
1102
1103            /* Emit immediates only when there's no indirect addressing of
1104             * the const buffer.
1105             * FIXME: Be smarter and recognize param arrays:
1106             * indirect addressing is only valid within the referenced
1107             * array.
1108             */
1109         case PROGRAM_CONSTANT:
1110            if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST)
1111               t->constants[i] = ureg_DECL_constant( ureg, i );
1112            else
1113               t->constants[i] =
1114                  ureg_DECL_immediate( ureg,
1115                                       program->Parameters->ParameterValues[i],
1116                                       4 );
1117            break;
1118         default:
1119            break;
1120         }
1121      }
1122   }
1123
1124   /* texture samplers */
1125   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1126      if (program->SamplersUsed & (1 << i)) {
1127         t->samplers[i] = ureg_DECL_sampler( ureg, i );
1128      }
1129   }
1130
1131   /* Emit each instruction in turn:
1132    */
1133   for (i = 0; i < program->NumInstructions; i++) {
1134      set_insn_start( t, ureg_get_instruction_number( ureg ));
1135      compile_instruction( t, &program->Instructions[i] );
1136
1137      if (t->prevInstWrotePointSize && program->Id) {
1138         /* The previous instruction wrote to the (fake) vertex point size
1139          * result register.  Now we need to clamp that value to the min/max
1140          * point size range, putting the result into the real point size
1141          * register.
1142          * Note that we can't do this easily at the end of program due to
1143          * possible early return.
1144          */
1145         set_insn_start( t, ureg_get_instruction_number( ureg ));
1146         ureg_MAX( t->ureg,
1147                   ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
1148                   ureg_src(t->outputs[t->pointSizeOutIndex]),
1149                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
1150         ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
1151                   ureg_src(t->outputs[t->pointSizeOutIndex]),
1152                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
1153      }
1154      t->prevInstWrotePointSize = GL_FALSE;
1155   }
1156
1157   /* Fix up all emitted labels:
1158    */
1159   for (i = 0; i < t->labels_count; i++) {
1160      ureg_fixup_label( ureg,
1161                        t->labels[i].token,
1162                        t->insn[t->labels[i].branch_target] );
1163   }
1164
1165out:
1166   FREE(t->insn);
1167   FREE(t->labels);
1168   FREE(t->constants);
1169
1170   if (t->error) {
1171      debug_printf("%s: translate error flag set\n", __FUNCTION__);
1172   }
1173
1174   return ret;
1175}
1176
1177
1178/**
1179 * Tokens cannot be free with free otherwise the builtin gallium
1180 * malloc debugging will get confused.
1181 */
1182void
1183st_free_tokens(const struct tgsi_token *tokens)
1184{
1185   FREE((void *)tokens);
1186}
1187