st_mesa_to_tgsi.c revision 3b2bdde1b2ee93f77c01f5a94ebb7778192c15f8
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * \author
30 * Michal Krol,
31 * Keith Whitwell
32 */
33
34#include "pipe/p_compiler.h"
35#include "pipe/p_shader_tokens.h"
36#include "pipe/p_state.h"
37#include "pipe/p_context.h"
38#include "tgsi/tgsi_ureg.h"
39#include "st_mesa_to_tgsi.h"
40#include "st_context.h"
41#include "shader/prog_instruction.h"
42#include "shader/prog_parameter.h"
43#include "util/u_debug.h"
44#include "util/u_math.h"
45#include "util/u_memory.h"
46
47struct label {
48   unsigned branch_target;
49   unsigned token;
50};
51
52
53/**
54 * Intermediate state used during shader translation.
55 */
56struct st_translate {
57   struct ureg_program *ureg;
58
59   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
60   struct ureg_src *constants;
61   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
62   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
63   struct ureg_dst address[1];
64   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
65   struct ureg_dst psizregreal;
66   struct ureg_src pointSizeConst;
67   GLint psizoutindex;
68   GLboolean prevInstWrotePsiz;
69
70   const GLuint *inputMapping;
71   const GLuint *outputMapping;
72
73   /* For every instruction that contains a label (eg CALL), keep
74    * details so that we can go back afterwards and emit the correct
75    * tgsi instruction number for each label.
76    */
77   struct label *labels;
78   unsigned labels_size;
79   unsigned labels_count;
80
81   /* Keep a record of the tgsi instruction number that each mesa
82    * instruction starts at, will be used to fix up labels after
83    * translation.
84    */
85   unsigned *insn;
86   unsigned insn_size;
87   unsigned insn_count;
88
89   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
90
91   boolean error;
92};
93
94
95static unsigned *get_label( struct st_translate *t,
96                            unsigned branch_target )
97{
98   unsigned i;
99
100   if (t->labels_count + 1 >= t->labels_size) {
101      unsigned old_size = t->labels_size;
102      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
103      t->labels = REALLOC( t->labels,
104                           old_size * sizeof t->labels[0],
105                           t->labels_size * sizeof t->labels[0] );
106      if (t->labels == NULL) {
107         static unsigned dummy;
108         t->error = TRUE;
109         return &dummy;
110      }
111   }
112
113   i = t->labels_count++;
114   t->labels[i].branch_target = branch_target;
115   return &t->labels[i].token;
116}
117
118
119static void set_insn_start( struct st_translate *t,
120                            unsigned start )
121{
122   if (t->insn_count + 1 >= t->insn_size) {
123      unsigned old_size = t->insn_size;
124      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
125      t->insn = REALLOC( t->insn,
126                         old_size * sizeof t->insn[0],
127                         t->insn_size * sizeof t->insn[0] );
128      if (t->insn == NULL) {
129         t->error = TRUE;
130         return;
131      }
132   }
133
134   t->insn[t->insn_count++] = start;
135}
136
137
138/*
139 * Map mesa register file to TGSI register file.
140 */
141static struct ureg_dst
142dst_register( struct st_translate *t,
143              gl_register_file file,
144              GLuint index )
145{
146   switch( file ) {
147   case PROGRAM_UNDEFINED:
148      return ureg_dst_undef();
149
150   case PROGRAM_TEMPORARY:
151      if (ureg_dst_is_undef(t->temps[index]))
152         t->temps[index] = ureg_DECL_temporary( t->ureg );
153
154      return t->temps[index];
155
156   case PROGRAM_OUTPUT:
157      if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
158         t->prevInstWrotePsiz = GL_TRUE;
159
160      if (t->procType == TGSI_PROCESSOR_VERTEX)
161         assert(index < VERT_RESULT_MAX);
162      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
163         assert(index < FRAG_RESULT_MAX);
164      else
165         assert(0 && "geom shaders not handled in dst_register() yet");
166
167      assert(t->outputMapping[index] < Elements(t->outputs));
168
169      return t->outputs[t->outputMapping[index]];
170
171   case PROGRAM_ADDRESS:
172      return t->address[index];
173
174   default:
175      debug_assert( 0 );
176      return ureg_dst_undef();
177   }
178}
179
180
181static struct ureg_src
182src_register( struct st_translate *t,
183              gl_register_file file,
184              GLint index )
185{
186   switch( file ) {
187   case PROGRAM_UNDEFINED:
188      return ureg_src_undef();
189
190   case PROGRAM_TEMPORARY:
191      ASSERT(index >= 0);
192      if (ureg_dst_is_undef(t->temps[index]))
193         t->temps[index] = ureg_DECL_temporary( t->ureg );
194      assert(index < Elements(t->temps));
195      return ureg_src(t->temps[index]);
196
197   case PROGRAM_NAMED_PARAM:
198   case PROGRAM_ENV_PARAM:
199   case PROGRAM_LOCAL_PARAM:
200   case PROGRAM_UNIFORM:
201      ASSERT(index >= 0);
202      return t->constants[index];
203   case PROGRAM_STATE_VAR:
204   case PROGRAM_CONSTANT:       /* ie, immediate */
205      if (index < 0)
206         return ureg_DECL_constant( t->ureg, 0 );
207      else
208         return t->constants[index];
209
210   case PROGRAM_INPUT:
211      assert(t->inputMapping[index] < Elements(t->inputs));
212      return t->inputs[t->inputMapping[index]];
213
214   case PROGRAM_OUTPUT:
215      assert(t->outputMapping[index] < Elements(t->outputs));
216      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
217
218   case PROGRAM_ADDRESS:
219      return ureg_src(t->address[index]);
220
221   default:
222      debug_assert( 0 );
223      return ureg_src_undef();
224   }
225}
226
227
228/**
229 * Map mesa texture target to TGSI texture target.
230 */
231static unsigned
232translate_texture_target( GLuint textarget,
233                          GLboolean shadow )
234{
235   if (shadow) {
236      switch( textarget ) {
237      case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_SHADOW1D;
238      case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_SHADOW2D;
239      case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT;
240      default: break;
241      }
242   }
243
244   switch( textarget ) {
245   case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
246   case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
247   case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;
248   case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
249   case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
250   default:
251      debug_assert( 0 );
252      return TGSI_TEXTURE_1D;
253   }
254}
255
256
257static struct ureg_dst
258translate_dst( struct st_translate *t,
259               const struct prog_dst_register *DstReg,
260               boolean saturate )
261{
262   struct ureg_dst dst = dst_register( t,
263                                       DstReg->File,
264                                       DstReg->Index );
265
266   dst = ureg_writemask( dst,
267                         DstReg->WriteMask );
268
269   if (saturate)
270      dst = ureg_saturate( dst );
271
272   if (DstReg->RelAddr)
273      dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
274
275   return dst;
276}
277
278
279static struct ureg_src
280translate_src( struct st_translate *t,
281               const struct prog_src_register *SrcReg )
282{
283   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
284
285   src = ureg_swizzle( src,
286                       GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3,
287                       GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3,
288                       GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3,
289                       GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3);
290
291   if (SrcReg->Negate == NEGATE_XYZW)
292      src = ureg_negate(src);
293
294   if (SrcReg->Abs)
295      src = ureg_abs(src);
296
297   if (SrcReg->RelAddr) {
298      src = ureg_src_indirect( src, ureg_src(t->address[0]));
299      /* If SrcReg->Index was negative, it was set to zero in
300       * src_register().  Reassign it now.
301       */
302      src.Index = SrcReg->Index;
303   }
304
305   return src;
306}
307
308
309static struct ureg_src swizzle_4v( struct ureg_src src,
310                                   const unsigned *swz )
311{
312   return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] );
313}
314
315
316/**
317 * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
318 *
319 *   SWZ dst, src.x-y10
320 *
321 * becomes:
322 *
323 *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
324 */
325static void emit_swz( struct st_translate *t,
326                      struct ureg_dst dst,
327                      const struct prog_src_register *SrcReg )
328{
329   struct ureg_program *ureg = t->ureg;
330   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
331
332   unsigned negate_mask =  SrcReg->Negate;
333
334   unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
335                        (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
336                        (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
337                        (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
338
339   unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
340                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
341                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
342                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
343
344   unsigned negative_one_mask = one_mask & negate_mask;
345   unsigned positive_one_mask = one_mask & ~negate_mask;
346
347   struct ureg_src imm;
348   unsigned i;
349   unsigned mul_swizzle[4] = {0,0,0,0};
350   unsigned add_swizzle[4] = {0,0,0,0};
351   unsigned src_swizzle[4] = {0,0,0,0};
352   boolean need_add = FALSE;
353   boolean need_mul = FALSE;
354
355   if (dst.WriteMask == 0)
356      return;
357
358   /* Is this just a MOV?
359    */
360   if (zero_mask == 0 &&
361       one_mask == 0 &&
362       (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW))
363   {
364      ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
365      return;
366   }
367
368#define IMM_ZERO    0
369#define IMM_ONE     1
370#define IMM_NEG_ONE 2
371
372   imm = ureg_imm3f( ureg, 0, 1, -1 );
373
374   for (i = 0; i < 4; i++) {
375      unsigned bit = 1 << i;
376
377      if (dst.WriteMask & bit) {
378         if (positive_one_mask & bit) {
379            mul_swizzle[i] = IMM_ZERO;
380            add_swizzle[i] = IMM_ONE;
381            need_add = TRUE;
382         }
383         else if (negative_one_mask & bit) {
384            mul_swizzle[i] = IMM_ZERO;
385            add_swizzle[i] = IMM_NEG_ONE;
386            need_add = TRUE;
387         }
388         else if (zero_mask & bit) {
389            mul_swizzle[i] = IMM_ZERO;
390            add_swizzle[i] = IMM_ZERO;
391            need_add = TRUE;
392         }
393         else {
394            add_swizzle[i] = IMM_ZERO;
395            src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
396            need_mul = TRUE;
397            if (negate_mask & bit) {
398               mul_swizzle[i] = IMM_NEG_ONE;
399            }
400            else {
401               mul_swizzle[i] = IMM_ONE;
402            }
403         }
404      }
405   }
406
407   if (need_mul && need_add) {
408      ureg_MAD( ureg,
409                dst,
410                swizzle_4v( src, src_swizzle ),
411                swizzle_4v( imm, mul_swizzle ),
412                swizzle_4v( imm, add_swizzle ) );
413   }
414   else if (need_mul) {
415      ureg_MUL( ureg,
416                dst,
417                swizzle_4v( src, src_swizzle ),
418                swizzle_4v( imm, mul_swizzle ) );
419   }
420   else if (need_add) {
421      ureg_MOV( ureg,
422                dst,
423                swizzle_4v( imm, add_swizzle ) );
424   }
425   else {
426      debug_assert(0);
427   }
428
429#undef IMM_ZERO
430#undef IMM_ONE
431#undef IMM_NEG_ONE
432}
433
434
435/**
436 * Negate the value of DDY to match GL semantics where (0,0) is the
437 * lower-left corner of the window.
438 * Note that the GL_ARB_fragment_coord_conventions extension will
439 * effect this someday.
440 */
441static void emit_ddy( struct st_translate *t,
442                      struct ureg_dst dst,
443                      const struct prog_src_register *SrcReg )
444{
445   struct ureg_program *ureg = t->ureg;
446   struct ureg_src src = translate_src( t, SrcReg );
447   src = ureg_negate( src );
448   ureg_DDY( ureg, dst, src );
449}
450
451
452
453static unsigned
454translate_opcode( unsigned op )
455{
456   switch( op ) {
457   case OPCODE_ARL:
458      return TGSI_OPCODE_ARL;
459   case OPCODE_ABS:
460      return TGSI_OPCODE_ABS;
461   case OPCODE_ADD:
462      return TGSI_OPCODE_ADD;
463   case OPCODE_BGNLOOP:
464      return TGSI_OPCODE_BGNLOOP;
465   case OPCODE_BGNSUB:
466      return TGSI_OPCODE_BGNSUB;
467   case OPCODE_BRA:
468      return TGSI_OPCODE_BRA;
469   case OPCODE_BRK:
470      return TGSI_OPCODE_BRK;
471   case OPCODE_CAL:
472      return TGSI_OPCODE_CAL;
473   case OPCODE_CMP:
474      return TGSI_OPCODE_CMP;
475   case OPCODE_CONT:
476      return TGSI_OPCODE_CONT;
477   case OPCODE_COS:
478      return TGSI_OPCODE_COS;
479   case OPCODE_DDX:
480      return TGSI_OPCODE_DDX;
481   case OPCODE_DDY:
482      return TGSI_OPCODE_DDY;
483   case OPCODE_DP2:
484      return TGSI_OPCODE_DP2;
485   case OPCODE_DP2A:
486      return TGSI_OPCODE_DP2A;
487   case OPCODE_DP3:
488      return TGSI_OPCODE_DP3;
489   case OPCODE_DP4:
490      return TGSI_OPCODE_DP4;
491   case OPCODE_DPH:
492      return TGSI_OPCODE_DPH;
493   case OPCODE_DST:
494      return TGSI_OPCODE_DST;
495   case OPCODE_ELSE:
496      return TGSI_OPCODE_ELSE;
497   case OPCODE_ENDIF:
498      return TGSI_OPCODE_ENDIF;
499   case OPCODE_ENDLOOP:
500      return TGSI_OPCODE_ENDLOOP;
501   case OPCODE_ENDSUB:
502      return TGSI_OPCODE_ENDSUB;
503   case OPCODE_EX2:
504      return TGSI_OPCODE_EX2;
505   case OPCODE_EXP:
506      return TGSI_OPCODE_EXP;
507   case OPCODE_FLR:
508      return TGSI_OPCODE_FLR;
509   case OPCODE_FRC:
510      return TGSI_OPCODE_FRC;
511   case OPCODE_IF:
512      return TGSI_OPCODE_IF;
513   case OPCODE_TRUNC:
514      return TGSI_OPCODE_TRUNC;
515   case OPCODE_KIL:
516      return TGSI_OPCODE_KIL;
517   case OPCODE_KIL_NV:
518      return TGSI_OPCODE_KILP;
519   case OPCODE_LG2:
520      return TGSI_OPCODE_LG2;
521   case OPCODE_LOG:
522      return TGSI_OPCODE_LOG;
523   case OPCODE_LIT:
524      return TGSI_OPCODE_LIT;
525   case OPCODE_LRP:
526      return TGSI_OPCODE_LRP;
527   case OPCODE_MAD:
528      return TGSI_OPCODE_MAD;
529   case OPCODE_MAX:
530      return TGSI_OPCODE_MAX;
531   case OPCODE_MIN:
532      return TGSI_OPCODE_MIN;
533   case OPCODE_MOV:
534      return TGSI_OPCODE_MOV;
535   case OPCODE_MUL:
536      return TGSI_OPCODE_MUL;
537   case OPCODE_NOP:
538      return TGSI_OPCODE_NOP;
539   case OPCODE_NRM3:
540      return TGSI_OPCODE_NRM;
541   case OPCODE_NRM4:
542      return TGSI_OPCODE_NRM4;
543   case OPCODE_POW:
544      return TGSI_OPCODE_POW;
545   case OPCODE_RCP:
546      return TGSI_OPCODE_RCP;
547   case OPCODE_RET:
548      return TGSI_OPCODE_RET;
549   case OPCODE_RSQ:
550      return TGSI_OPCODE_RSQ;
551   case OPCODE_SCS:
552      return TGSI_OPCODE_SCS;
553   case OPCODE_SEQ:
554      return TGSI_OPCODE_SEQ;
555   case OPCODE_SGE:
556      return TGSI_OPCODE_SGE;
557   case OPCODE_SGT:
558      return TGSI_OPCODE_SGT;
559   case OPCODE_SIN:
560      return TGSI_OPCODE_SIN;
561   case OPCODE_SLE:
562      return TGSI_OPCODE_SLE;
563   case OPCODE_SLT:
564      return TGSI_OPCODE_SLT;
565   case OPCODE_SNE:
566      return TGSI_OPCODE_SNE;
567   case OPCODE_SSG:
568      return TGSI_OPCODE_SSG;
569   case OPCODE_SUB:
570      return TGSI_OPCODE_SUB;
571   case OPCODE_TEX:
572      return TGSI_OPCODE_TEX;
573   case OPCODE_TXB:
574      return TGSI_OPCODE_TXB;
575   case OPCODE_TXD:
576      return TGSI_OPCODE_TXD;
577   case OPCODE_TXL:
578      return TGSI_OPCODE_TXL;
579   case OPCODE_TXP:
580      return TGSI_OPCODE_TXP;
581   case OPCODE_XPD:
582      return TGSI_OPCODE_XPD;
583   case OPCODE_END:
584      return TGSI_OPCODE_END;
585   default:
586      debug_assert( 0 );
587      return TGSI_OPCODE_NOP;
588   }
589}
590
591
592static void
593compile_instruction(
594   struct st_translate *t,
595   const struct prog_instruction *inst )
596{
597   struct ureg_program *ureg = t->ureg;
598   GLuint i;
599   struct ureg_dst dst[1];
600   struct ureg_src src[4];
601   unsigned num_dst;
602   unsigned num_src;
603
604   num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
605   num_src = _mesa_num_inst_src_regs( inst->Opcode );
606
607   if (num_dst)
608      dst[0] = translate_dst( t,
609                              &inst->DstReg,
610                              inst->SaturateMode );
611
612   for (i = 0; i < num_src; i++)
613      src[i] = translate_src( t, &inst->SrcReg[i] );
614
615   switch( inst->Opcode ) {
616   case OPCODE_SWZ:
617      emit_swz( t, dst[0], &inst->SrcReg[0] );
618      return;
619
620   case OPCODE_BGNLOOP:
621   case OPCODE_CAL:
622   case OPCODE_ELSE:
623   case OPCODE_ENDLOOP:
624   case OPCODE_IF:
625      debug_assert(num_dst == 0);
626      ureg_label_insn( ureg,
627                       translate_opcode( inst->Opcode ),
628                       src, num_src,
629                       get_label( t, inst->BranchTarget ));
630      return;
631
632   case OPCODE_TEX:
633   case OPCODE_TXB:
634   case OPCODE_TXD:
635   case OPCODE_TXL:
636   case OPCODE_TXP:
637      src[num_src++] = t->samplers[inst->TexSrcUnit];
638      ureg_tex_insn( ureg,
639                     translate_opcode( inst->Opcode ),
640                     dst, num_dst,
641                     translate_texture_target( inst->TexSrcTarget,
642                                               inst->TexShadow ),
643                     src, num_src );
644      return;
645
646   case OPCODE_SCS:
647      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
648      ureg_insn( ureg,
649                 translate_opcode( inst->Opcode ),
650                 dst, num_dst,
651                 src, num_src );
652      break;
653
654   case OPCODE_XPD:
655      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
656      ureg_insn( ureg,
657                 translate_opcode( inst->Opcode ),
658                 dst, num_dst,
659                 src, num_src );
660      break;
661
662   case OPCODE_NOISE1:
663   case OPCODE_NOISE2:
664   case OPCODE_NOISE3:
665   case OPCODE_NOISE4:
666      /* At some point, a motivated person could add a better
667       * implementation of noise.  Currently not even the nvidia
668       * binary drivers do anything more than this.  In any case, the
669       * place to do this is in the GL state tracker, not the poor
670       * driver.
671       */
672      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
673      break;
674
675   case OPCODE_DDY:
676      emit_ddy( t, dst[0], &inst->SrcReg[0] );
677      break;
678
679   default:
680      ureg_insn( ureg,
681                 translate_opcode( inst->Opcode ),
682                 dst, num_dst,
683                 src, num_src );
684      break;
685   }
686}
687
688/**
689 * Emit the TGSI instructions to adjust the WPOS pixel center convention
690 */
691static void
692emit_adjusted_wpos( struct st_translate *t,
693                    const struct gl_program *program, GLfloat value)
694{
695   struct ureg_program *ureg = t->ureg;
696   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
697   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
698
699   ureg_ADD(ureg, ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y),
700		   wpos_input, ureg_imm1f(ureg, value));
701
702   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
703}
704
705/**
706 * Emit the TGSI instructions for inverting the WPOS y coordinate.
707 */
708static void
709emit_inverted_wpos( struct st_translate *t,
710                    const struct gl_program *program )
711{
712   struct ureg_program *ureg = t->ureg;
713
714   /* Fragment program uses fragment position input.
715    * Need to replace instances of INPUT[WPOS] with temp T
716    * where T = INPUT[WPOS] by y is inverted.
717    */
718   static const gl_state_index winSizeState[STATE_LENGTH]
719      = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
720
721   /* XXX: note we are modifying the incoming shader here!  Need to
722    * do this before emitting the constant decls below, or this
723    * will be missed:
724    */
725   unsigned winHeightConst = _mesa_add_state_reference(program->Parameters,
726                                                       winSizeState);
727
728   struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst );
729   struct ureg_dst wpos_temp;
730   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
731
732   /* MOV wpos_temp, input[wpos]
733    */
734   if (wpos_input.File == TGSI_FILE_TEMPORARY)
735      wpos_temp = ureg_dst(wpos_input);
736   else {
737      wpos_temp = ureg_DECL_temporary( ureg );
738      ureg_MOV( ureg, wpos_temp, wpos_input );
739   }
740
741   /* SUB wpos_temp.y, winsize_const, wpos_input
742    */
743   ureg_SUB( ureg,
744             ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
745             winsize,
746             wpos_input);
747
748   /* Use wpos_temp as position input from here on:
749    */
750   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
751}
752
753
754/**
755 * Emit fragment position/ooordinate code.
756 */
757static void
758emit_wpos(struct st_context *st,
759          struct st_translate *t,
760          const struct gl_program *program,
761          struct ureg_program *ureg)
762{
763   const struct gl_fragment_program *fp =
764      (const struct gl_fragment_program *) program;
765   struct pipe_screen *pscreen = st->pipe->screen;
766   boolean invert = FALSE;
767
768   if (fp->OriginUpperLeft) {
769      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
770      }
771      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
772         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
773         invert = TRUE;
774      }
775      else
776         assert(0);
777   }
778   else {
779      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
780         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
781      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
782         invert = TRUE;
783      else
784         assert(0);
785   }
786
787   if (fp->PixelCenterInteger) {
788      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
789         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
790      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
791         emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f);
792      else
793         assert(0);
794   }
795   else {
796      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
797      }
798      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
799         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
800         emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f);
801      }
802      else
803         assert(0);
804   }
805
806   /* we invert after adjustment so that we avoid the MOV to temporary,
807    * and reuse the adjustment ADD instead */
808   if (invert)
809      emit_inverted_wpos(t, program);
810}
811
812
813/**
814 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
815 * TGSI uses +1 for front, -1 for back.
816 * This function converts the TGSI value to the GL value.  Simply clamping/
817 * saturating the value to [0,1] does the job.
818 */
819static void
820emit_face_var( struct st_translate *t,
821               const struct gl_program *program )
822{
823   struct ureg_program *ureg = t->ureg;
824   struct ureg_dst face_temp = ureg_DECL_temporary( ureg );
825   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
826
827   /* MOV_SAT face_temp, input[face]
828    */
829   face_temp = ureg_saturate( face_temp );
830   ureg_MOV( ureg, face_temp, face_input );
831
832   /* Use face_temp as face input from here on:
833    */
834   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
835}
836
837
838static void
839emit_edgeflags( struct st_translate *t,
840                 const struct gl_program *program )
841{
842   struct ureg_program *ureg = t->ureg;
843   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
844   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
845
846   ureg_MOV( ureg, edge_dst, edge_src );
847}
848
849
850/**
851 * Translate Mesa program to TGSI format.
852 * \param program  the program to translate
853 * \param numInputs  number of input registers used
854 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
855 *                      input indexes
856 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
857 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
858 *                            each input
859 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
860 * \param numOutputs  number of output registers used
861 * \param outputMapping  maps Mesa fragment program outputs to TGSI
862 *                       generic outputs
863 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
864 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
865 *                             each output
866 *
867 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
868 */
869enum pipe_error
870st_translate_mesa_program(
871   GLcontext *ctx,
872   uint procType,
873   struct ureg_program *ureg,
874   const struct gl_program *program,
875   GLuint numInputs,
876   const GLuint inputMapping[],
877   const ubyte inputSemanticName[],
878   const ubyte inputSemanticIndex[],
879   const GLuint interpMode[],
880   GLuint numOutputs,
881   const GLuint outputMapping[],
882   const ubyte outputSemanticName[],
883   const ubyte outputSemanticIndex[],
884   boolean passthrough_edgeflags )
885{
886   struct st_translate translate, *t;
887   unsigned i;
888   enum pipe_error ret = PIPE_OK;
889
890   t = &translate;
891   memset(t, 0, sizeof *t);
892
893   t->procType = procType;
894   t->inputMapping = inputMapping;
895   t->outputMapping = outputMapping;
896   t->ureg = ureg;
897   t->psizoutindex = -1;
898   t->prevInstWrotePsiz = GL_FALSE;
899
900   /*_mesa_print_program(program);*/
901
902   /*
903    * Declare input attributes.
904    */
905   if (procType == TGSI_PROCESSOR_FRAGMENT) {
906      for (i = 0; i < numInputs; i++) {
907         if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) {
908            t->inputs[i] = ureg_DECL_fs_input_cyl(ureg,
909                                                  inputSemanticName[i],
910                                                  inputSemanticIndex[i],
911                                                  interpMode[i],
912                                                  TGSI_CYLINDRICAL_WRAP_X);
913         }
914         else {
915            t->inputs[i] = ureg_DECL_fs_input(ureg,
916                                              inputSemanticName[i],
917                                              inputSemanticIndex[i],
918                                              interpMode[i]);
919         }
920      }
921
922      if (program->InputsRead & FRAG_BIT_WPOS) {
923         /* Must do this after setting up t->inputs, and before
924          * emitting constant references, below:
925          */
926         emit_wpos(st_context(ctx), t, program, ureg);
927      }
928
929      if (program->InputsRead & FRAG_BIT_FACE) {
930         emit_face_var( t, program );
931      }
932
933      /*
934       * Declare output attributes.
935       */
936      for (i = 0; i < numOutputs; i++) {
937         switch (outputSemanticName[i]) {
938         case TGSI_SEMANTIC_POSITION:
939            t->outputs[i] = ureg_DECL_output( ureg,
940                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
941                                              outputSemanticIndex[i] );
942
943            t->outputs[i] = ureg_writemask( t->outputs[i],
944                                            TGSI_WRITEMASK_Z );
945            break;
946         case TGSI_SEMANTIC_COLOR:
947            t->outputs[i] = ureg_DECL_output( ureg,
948                                              TGSI_SEMANTIC_COLOR,
949                                              outputSemanticIndex[i] );
950            break;
951         default:
952            debug_assert(0);
953            return 0;
954         }
955      }
956   }
957   else {
958      for (i = 0; i < numInputs; i++) {
959         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
960      }
961
962      for (i = 0; i < numOutputs; i++) {
963         t->outputs[i] = ureg_DECL_output( ureg,
964                                           outputSemanticName[i],
965                                           outputSemanticIndex[i] );
966         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
967            static const gl_state_index pointSizeClampState[STATE_LENGTH]
968               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
969               /* XXX: note we are modifying the incoming shader here!  Need to
970               * do this before emitting the constant decls below, or this
971               * will be missed:
972               */
973            unsigned pointSizeClampConst =
974               _mesa_add_state_reference(program->Parameters,
975                                         pointSizeClampState);
976            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
977            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
978            t->psizregreal = t->outputs[i];
979            t->psizoutindex = i;
980            t->outputs[i] = psizregtemp;
981         }
982      }
983      if (passthrough_edgeflags)
984         emit_edgeflags( t, program );
985   }
986
987   /* Declare address register.
988    */
989   if (program->NumAddressRegs > 0) {
990      debug_assert( program->NumAddressRegs == 1 );
991      t->address[0] = ureg_DECL_address( ureg );
992   }
993
994   /* Emit constants and immediates.  Mesa uses a single index space
995    * for these, so we put all the translated regs in t->constants.
996    */
997   if (program->Parameters) {
998      t->constants = CALLOC( program->Parameters->NumParameters,
999                             sizeof t->constants[0] );
1000      if (t->constants == NULL) {
1001         ret = PIPE_ERROR_OUT_OF_MEMORY;
1002         goto out;
1003      }
1004
1005      for (i = 0; i < program->Parameters->NumParameters; i++) {
1006         switch (program->Parameters->Parameters[i].Type) {
1007         case PROGRAM_ENV_PARAM:
1008         case PROGRAM_LOCAL_PARAM:
1009         case PROGRAM_STATE_VAR:
1010         case PROGRAM_NAMED_PARAM:
1011         case PROGRAM_UNIFORM:
1012            t->constants[i] = ureg_DECL_constant( ureg, i );
1013            break;
1014
1015            /* Emit immediates only when there is no address register
1016             * in use.  FIXME: Be smarter and recognize param arrays:
1017             * indirect addressing is only valid within the referenced
1018             * array.
1019             */
1020         case PROGRAM_CONSTANT:
1021            if (program->NumAddressRegs > 0)
1022               t->constants[i] = ureg_DECL_constant( ureg, i );
1023            else
1024               t->constants[i] =
1025                  ureg_DECL_immediate( ureg,
1026                                       program->Parameters->ParameterValues[i],
1027                                       4 );
1028            break;
1029         default:
1030            break;
1031         }
1032      }
1033   }
1034
1035   /* texture samplers */
1036   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1037      if (program->SamplersUsed & (1 << i)) {
1038         t->samplers[i] = ureg_DECL_sampler( ureg, i );
1039      }
1040   }
1041
1042   /* Emit each instruction in turn:
1043    */
1044   for (i = 0; i < program->NumInstructions; i++) {
1045      set_insn_start( t, ureg_get_instruction_number( ureg ));
1046      compile_instruction( t, &program->Instructions[i] );
1047
1048      /* note can't do that easily at the end of prog due to
1049         possible early return */
1050      if (t->prevInstWrotePsiz && program->Id) {
1051         set_insn_start( t, ureg_get_instruction_number( ureg ));
1052         ureg_MAX( t->ureg,
1053                   ureg_writemask(t->outputs[t->psizoutindex], WRITEMASK_X),
1054                   ureg_src(t->outputs[t->psizoutindex]),
1055                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
1056         ureg_MIN( t->ureg, ureg_writemask(t->psizregreal, WRITEMASK_X),
1057                   ureg_src(t->outputs[t->psizoutindex]),
1058                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
1059      }
1060      t->prevInstWrotePsiz = GL_FALSE;
1061   }
1062
1063   /* Fix up all emitted labels:
1064    */
1065   for (i = 0; i < t->labels_count; i++) {
1066      ureg_fixup_label( ureg,
1067                        t->labels[i].token,
1068                        t->insn[t->labels[i].branch_target] );
1069   }
1070
1071out:
1072   FREE(t->insn);
1073   FREE(t->labels);
1074   FREE(t->constants);
1075
1076   if (t->error) {
1077      debug_printf("%s: translate error flag set\n", __FUNCTION__);
1078   }
1079
1080   return ret;
1081}
1082
1083
1084/**
1085 * Tokens cannot be free with free otherwise the builtin gallium
1086 * malloc debugging will get confused.
1087 */
1088void
1089st_free_tokens(const struct tgsi_token *tokens)
1090{
1091   FREE((void *)tokens);
1092}
1093