st_mesa_to_tgsi.c revision 489eced800cb0f20a1e1d66b84a048e4df99503b
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * \author
30 * Michal Krol,
31 * Keith Whitwell
32 */
33
34#include "pipe/p_compiler.h"
35#include "pipe/p_shader_tokens.h"
36#include "pipe/p_state.h"
37#include "pipe/p_context.h"
38#include "tgsi/tgsi_ureg.h"
39#include "st_mesa_to_tgsi.h"
40#include "st_context.h"
41#include "shader/prog_instruction.h"
42#include "shader/prog_parameter.h"
43#include "util/u_debug.h"
44#include "util/u_math.h"
45#include "util/u_memory.h"
46
47struct label {
48   unsigned branch_target;
49   unsigned token;
50};
51
52
53/**
54 * Intermediate state used during shader translation.
55 */
56struct st_translate {
57   struct ureg_program *ureg;
58
59   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
60   struct ureg_src *constants;
61   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
62   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
63   struct ureg_dst address[1];
64   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
65   struct ureg_dst psizregreal;
66   struct ureg_src pointSizeConst;
67   GLint psizoutindex;
68   GLboolean prevInstWrotePsiz;
69
70   const GLuint *inputMapping;
71   const GLuint *outputMapping;
72
73   /* For every instruction that contains a label (eg CALL), keep
74    * details so that we can go back afterwards and emit the correct
75    * tgsi instruction number for each label.
76    */
77   struct label *labels;
78   unsigned labels_size;
79   unsigned labels_count;
80
81   /* Keep a record of the tgsi instruction number that each mesa
82    * instruction starts at, will be used to fix up labels after
83    * translation.
84    */
85   unsigned *insn;
86   unsigned insn_size;
87   unsigned insn_count;
88
89   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
90
91   boolean error;
92};
93
94
95static unsigned *get_label( struct st_translate *t,
96                            unsigned branch_target )
97{
98   unsigned i;
99
100   if (t->labels_count + 1 >= t->labels_size) {
101      unsigned old_size = t->labels_size;
102      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
103      t->labels = REALLOC( t->labels,
104                           old_size * sizeof t->labels[0],
105                           t->labels_size * sizeof t->labels[0] );
106      if (t->labels == NULL) {
107         static unsigned dummy;
108         t->error = TRUE;
109         return &dummy;
110      }
111   }
112
113   i = t->labels_count++;
114   t->labels[i].branch_target = branch_target;
115   return &t->labels[i].token;
116}
117
118
119static void set_insn_start( struct st_translate *t,
120                            unsigned start )
121{
122   if (t->insn_count + 1 >= t->insn_size) {
123      unsigned old_size = t->insn_size;
124      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
125      t->insn = REALLOC( t->insn,
126                         old_size * sizeof t->insn[0],
127                         t->insn_size * sizeof t->insn[0] );
128      if (t->insn == NULL) {
129         t->error = TRUE;
130         return;
131      }
132   }
133
134   t->insn[t->insn_count++] = start;
135}
136
137
138/*
139 * Map mesa register file to TGSI register file.
140 */
141static struct ureg_dst
142dst_register( struct st_translate *t,
143              gl_register_file file,
144              GLuint index )
145{
146   switch( file ) {
147   case PROGRAM_UNDEFINED:
148      return ureg_dst_undef();
149
150   case PROGRAM_TEMPORARY:
151      if (ureg_dst_is_undef(t->temps[index]))
152         t->temps[index] = ureg_DECL_temporary( t->ureg );
153
154      return t->temps[index];
155
156   case PROGRAM_OUTPUT:
157      if (index == t->psizoutindex)
158         t->prevInstWrotePsiz = GL_TRUE;
159      return t->outputs[t->outputMapping[index]];
160
161   case PROGRAM_ADDRESS:
162      return t->address[index];
163
164   default:
165      debug_assert( 0 );
166      return ureg_dst_undef();
167   }
168}
169
170
171static struct ureg_src
172src_register( struct st_translate *t,
173              gl_register_file file,
174              GLint index )
175{
176   switch( file ) {
177   case PROGRAM_UNDEFINED:
178      return ureg_src_undef();
179
180   case PROGRAM_TEMPORARY:
181      ASSERT(index >= 0);
182      if (ureg_dst_is_undef(t->temps[index]))
183         t->temps[index] = ureg_DECL_temporary( t->ureg );
184      return ureg_src(t->temps[index]);
185
186   case PROGRAM_NAMED_PARAM:
187   case PROGRAM_ENV_PARAM:
188   case PROGRAM_LOCAL_PARAM:
189   case PROGRAM_UNIFORM:
190      ASSERT(index >= 0);
191      return t->constants[index];
192   case PROGRAM_STATE_VAR:
193   case PROGRAM_CONSTANT:       /* ie, immediate */
194      if (index < 0)
195         return ureg_DECL_constant( t->ureg, 0 );
196      else
197         return t->constants[index];
198
199   case PROGRAM_INPUT:
200      return t->inputs[t->inputMapping[index]];
201
202   case PROGRAM_OUTPUT:
203      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
204
205   case PROGRAM_ADDRESS:
206      return ureg_src(t->address[index]);
207
208   default:
209      debug_assert( 0 );
210      return ureg_src_undef();
211   }
212}
213
214
215/**
216 * Map mesa texture target to TGSI texture target.
217 */
218static unsigned
219translate_texture_target( GLuint textarget,
220                          GLboolean shadow )
221{
222   if (shadow) {
223      switch( textarget ) {
224      case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_SHADOW1D;
225      case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_SHADOW2D;
226      case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT;
227      default: break;
228      }
229   }
230
231   switch( textarget ) {
232   case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
233   case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
234   case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;
235   case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
236   case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
237   default:
238      debug_assert( 0 );
239      return TGSI_TEXTURE_1D;
240   }
241}
242
243
244static struct ureg_dst
245translate_dst( struct st_translate *t,
246               const struct prog_dst_register *DstReg,
247               boolean saturate )
248{
249   struct ureg_dst dst = dst_register( t,
250                                       DstReg->File,
251                                       DstReg->Index );
252
253   dst = ureg_writemask( dst,
254                         DstReg->WriteMask );
255
256   if (saturate)
257      dst = ureg_saturate( dst );
258
259   if (DstReg->RelAddr)
260      dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
261
262   return dst;
263}
264
265
266static struct ureg_src
267translate_src( struct st_translate *t,
268               const struct prog_src_register *SrcReg )
269{
270   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
271
272   src = ureg_swizzle( src,
273                       GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3,
274                       GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3,
275                       GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3,
276                       GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3);
277
278   if (SrcReg->Negate == NEGATE_XYZW)
279      src = ureg_negate(src);
280
281   if (SrcReg->Abs)
282      src = ureg_abs(src);
283
284   if (SrcReg->RelAddr) {
285      src = ureg_src_indirect( src, ureg_src(t->address[0]));
286      /* If SrcReg->Index was negative, it was set to zero in
287       * src_register().  Reassign it now.
288       */
289      src.Index = SrcReg->Index;
290   }
291
292   return src;
293}
294
295
296static struct ureg_src swizzle_4v( struct ureg_src src,
297                                   const unsigned *swz )
298{
299   return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] );
300}
301
302
303/**
304 * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
305 *
306 *   SWZ dst, src.x-y10
307 *
308 * becomes:
309 *
310 *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
311 */
312static void emit_swz( struct st_translate *t,
313                      struct ureg_dst dst,
314                      const struct prog_src_register *SrcReg )
315{
316   struct ureg_program *ureg = t->ureg;
317   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
318
319   unsigned negate_mask =  SrcReg->Negate;
320
321   unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
322                        (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
323                        (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
324                        (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
325
326   unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
327                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
328                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
329                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
330
331   unsigned negative_one_mask = one_mask & negate_mask;
332   unsigned positive_one_mask = one_mask & ~negate_mask;
333
334   struct ureg_src imm;
335   unsigned i;
336   unsigned mul_swizzle[4] = {0,0,0,0};
337   unsigned add_swizzle[4] = {0,0,0,0};
338   unsigned src_swizzle[4] = {0,0,0,0};
339   boolean need_add = FALSE;
340   boolean need_mul = FALSE;
341
342   if (dst.WriteMask == 0)
343      return;
344
345   /* Is this just a MOV?
346    */
347   if (zero_mask == 0 &&
348       one_mask == 0 &&
349       (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW))
350   {
351      ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
352      return;
353   }
354
355#define IMM_ZERO    0
356#define IMM_ONE     1
357#define IMM_NEG_ONE 2
358
359   imm = ureg_imm3f( ureg, 0, 1, -1 );
360
361   for (i = 0; i < 4; i++) {
362      unsigned bit = 1 << i;
363
364      if (dst.WriteMask & bit) {
365         if (positive_one_mask & bit) {
366            mul_swizzle[i] = IMM_ZERO;
367            add_swizzle[i] = IMM_ONE;
368            need_add = TRUE;
369         }
370         else if (negative_one_mask & bit) {
371            mul_swizzle[i] = IMM_ZERO;
372            add_swizzle[i] = IMM_NEG_ONE;
373            need_add = TRUE;
374         }
375         else if (zero_mask & bit) {
376            mul_swizzle[i] = IMM_ZERO;
377            add_swizzle[i] = IMM_ZERO;
378            need_add = TRUE;
379         }
380         else {
381            add_swizzle[i] = IMM_ZERO;
382            src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
383            need_mul = TRUE;
384            if (negate_mask & bit) {
385               mul_swizzle[i] = IMM_NEG_ONE;
386            }
387            else {
388               mul_swizzle[i] = IMM_ONE;
389            }
390         }
391      }
392   }
393
394   if (need_mul && need_add) {
395      ureg_MAD( ureg,
396                dst,
397                swizzle_4v( src, src_swizzle ),
398                swizzle_4v( imm, mul_swizzle ),
399                swizzle_4v( imm, add_swizzle ) );
400   }
401   else if (need_mul) {
402      ureg_MUL( ureg,
403                dst,
404                swizzle_4v( src, src_swizzle ),
405                swizzle_4v( imm, mul_swizzle ) );
406   }
407   else if (need_add) {
408      ureg_MOV( ureg,
409                dst,
410                swizzle_4v( imm, add_swizzle ) );
411   }
412   else {
413      debug_assert(0);
414   }
415
416#undef IMM_ZERO
417#undef IMM_ONE
418#undef IMM_NEG_ONE
419}
420
421
422/**
423 * Negate the value of DDY to match GL semantics where (0,0) is the
424 * lower-left corner of the window.
425 * Note that the GL_ARB_fragment_coord_conventions extension will
426 * effect this someday.
427 */
428static void emit_ddy( struct st_translate *t,
429                      struct ureg_dst dst,
430                      const struct prog_src_register *SrcReg )
431{
432   struct ureg_program *ureg = t->ureg;
433   struct ureg_src src = translate_src( t, SrcReg );
434   src = ureg_negate( src );
435   ureg_DDY( ureg, dst, src );
436}
437
438
439
440static unsigned
441translate_opcode( unsigned op )
442{
443   switch( op ) {
444   case OPCODE_ARL:
445      return TGSI_OPCODE_ARL;
446   case OPCODE_ABS:
447      return TGSI_OPCODE_ABS;
448   case OPCODE_ADD:
449      return TGSI_OPCODE_ADD;
450   case OPCODE_BGNLOOP:
451      return TGSI_OPCODE_BGNLOOP;
452   case OPCODE_BGNSUB:
453      return TGSI_OPCODE_BGNSUB;
454   case OPCODE_BRA:
455      return TGSI_OPCODE_BRA;
456   case OPCODE_BRK:
457      return TGSI_OPCODE_BRK;
458   case OPCODE_CAL:
459      return TGSI_OPCODE_CAL;
460   case OPCODE_CMP:
461      return TGSI_OPCODE_CMP;
462   case OPCODE_CONT:
463      return TGSI_OPCODE_CONT;
464   case OPCODE_COS:
465      return TGSI_OPCODE_COS;
466   case OPCODE_DDX:
467      return TGSI_OPCODE_DDX;
468   case OPCODE_DDY:
469      return TGSI_OPCODE_DDY;
470   case OPCODE_DP2:
471      return TGSI_OPCODE_DP2;
472   case OPCODE_DP2A:
473      return TGSI_OPCODE_DP2A;
474   case OPCODE_DP3:
475      return TGSI_OPCODE_DP3;
476   case OPCODE_DP4:
477      return TGSI_OPCODE_DP4;
478   case OPCODE_DPH:
479      return TGSI_OPCODE_DPH;
480   case OPCODE_DST:
481      return TGSI_OPCODE_DST;
482   case OPCODE_ELSE:
483      return TGSI_OPCODE_ELSE;
484   case OPCODE_ENDIF:
485      return TGSI_OPCODE_ENDIF;
486   case OPCODE_ENDLOOP:
487      return TGSI_OPCODE_ENDLOOP;
488   case OPCODE_ENDSUB:
489      return TGSI_OPCODE_ENDSUB;
490   case OPCODE_EX2:
491      return TGSI_OPCODE_EX2;
492   case OPCODE_EXP:
493      return TGSI_OPCODE_EXP;
494   case OPCODE_FLR:
495      return TGSI_OPCODE_FLR;
496   case OPCODE_FRC:
497      return TGSI_OPCODE_FRC;
498   case OPCODE_IF:
499      return TGSI_OPCODE_IF;
500   case OPCODE_TRUNC:
501      return TGSI_OPCODE_TRUNC;
502   case OPCODE_KIL:
503      return TGSI_OPCODE_KIL;
504   case OPCODE_KIL_NV:
505      return TGSI_OPCODE_KILP;
506   case OPCODE_LG2:
507      return TGSI_OPCODE_LG2;
508   case OPCODE_LOG:
509      return TGSI_OPCODE_LOG;
510   case OPCODE_LIT:
511      return TGSI_OPCODE_LIT;
512   case OPCODE_LRP:
513      return TGSI_OPCODE_LRP;
514   case OPCODE_MAD:
515      return TGSI_OPCODE_MAD;
516   case OPCODE_MAX:
517      return TGSI_OPCODE_MAX;
518   case OPCODE_MIN:
519      return TGSI_OPCODE_MIN;
520   case OPCODE_MOV:
521      return TGSI_OPCODE_MOV;
522   case OPCODE_MUL:
523      return TGSI_OPCODE_MUL;
524   case OPCODE_NOP:
525      return TGSI_OPCODE_NOP;
526   case OPCODE_NRM3:
527      return TGSI_OPCODE_NRM;
528   case OPCODE_NRM4:
529      return TGSI_OPCODE_NRM4;
530   case OPCODE_POW:
531      return TGSI_OPCODE_POW;
532   case OPCODE_RCP:
533      return TGSI_OPCODE_RCP;
534   case OPCODE_RET:
535      return TGSI_OPCODE_RET;
536   case OPCODE_RSQ:
537      return TGSI_OPCODE_RSQ;
538   case OPCODE_SCS:
539      return TGSI_OPCODE_SCS;
540   case OPCODE_SEQ:
541      return TGSI_OPCODE_SEQ;
542   case OPCODE_SGE:
543      return TGSI_OPCODE_SGE;
544   case OPCODE_SGT:
545      return TGSI_OPCODE_SGT;
546   case OPCODE_SIN:
547      return TGSI_OPCODE_SIN;
548   case OPCODE_SLE:
549      return TGSI_OPCODE_SLE;
550   case OPCODE_SLT:
551      return TGSI_OPCODE_SLT;
552   case OPCODE_SNE:
553      return TGSI_OPCODE_SNE;
554   case OPCODE_SSG:
555      return TGSI_OPCODE_SSG;
556   case OPCODE_SUB:
557      return TGSI_OPCODE_SUB;
558   case OPCODE_TEX:
559      return TGSI_OPCODE_TEX;
560   case OPCODE_TXB:
561      return TGSI_OPCODE_TXB;
562   case OPCODE_TXD:
563      return TGSI_OPCODE_TXD;
564   case OPCODE_TXL:
565      return TGSI_OPCODE_TXL;
566   case OPCODE_TXP:
567      return TGSI_OPCODE_TXP;
568   case OPCODE_XPD:
569      return TGSI_OPCODE_XPD;
570   case OPCODE_END:
571      return TGSI_OPCODE_END;
572   default:
573      debug_assert( 0 );
574      return TGSI_OPCODE_NOP;
575   }
576}
577
578
579static void
580compile_instruction(
581   struct st_translate *t,
582   const struct prog_instruction *inst )
583{
584   struct ureg_program *ureg = t->ureg;
585   GLuint i;
586   struct ureg_dst dst[1];
587   struct ureg_src src[4];
588   unsigned num_dst;
589   unsigned num_src;
590
591   num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
592   num_src = _mesa_num_inst_src_regs( inst->Opcode );
593
594   if (num_dst)
595      dst[0] = translate_dst( t,
596                              &inst->DstReg,
597                              inst->SaturateMode );
598
599   for (i = 0; i < num_src; i++)
600      src[i] = translate_src( t, &inst->SrcReg[i] );
601
602   switch( inst->Opcode ) {
603   case OPCODE_SWZ:
604      emit_swz( t, dst[0], &inst->SrcReg[0] );
605      return;
606
607   case OPCODE_BGNLOOP:
608   case OPCODE_CAL:
609   case OPCODE_ELSE:
610   case OPCODE_ENDLOOP:
611   case OPCODE_IF:
612      debug_assert(num_dst == 0);
613      ureg_label_insn( ureg,
614                       translate_opcode( inst->Opcode ),
615                       src, num_src,
616                       get_label( t, inst->BranchTarget ));
617      return;
618
619   case OPCODE_TEX:
620   case OPCODE_TXB:
621   case OPCODE_TXD:
622   case OPCODE_TXL:
623   case OPCODE_TXP:
624      src[num_src++] = t->samplers[inst->TexSrcUnit];
625      ureg_tex_insn( ureg,
626                     translate_opcode( inst->Opcode ),
627                     dst, num_dst,
628                     translate_texture_target( inst->TexSrcTarget,
629                                               inst->TexShadow ),
630                     src, num_src );
631      return;
632
633   case OPCODE_SCS:
634      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
635      ureg_insn( ureg,
636                 translate_opcode( inst->Opcode ),
637                 dst, num_dst,
638                 src, num_src );
639      break;
640
641   case OPCODE_XPD:
642      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
643      ureg_insn( ureg,
644                 translate_opcode( inst->Opcode ),
645                 dst, num_dst,
646                 src, num_src );
647      break;
648
649   case OPCODE_NOISE1:
650   case OPCODE_NOISE2:
651   case OPCODE_NOISE3:
652   case OPCODE_NOISE4:
653      /* At some point, a motivated person could add a better
654       * implementation of noise.  Currently not even the nvidia
655       * binary drivers do anything more than this.  In any case, the
656       * place to do this is in the GL state tracker, not the poor
657       * driver.
658       */
659      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
660      break;
661
662   case OPCODE_DDY:
663      emit_ddy( t, dst[0], &inst->SrcReg[0] );
664      break;
665
666   default:
667      ureg_insn( ureg,
668                 translate_opcode( inst->Opcode ),
669                 dst, num_dst,
670                 src, num_src );
671      break;
672   }
673}
674
675/**
676 * Emit the TGSI instructions to adjust the WPOS pixel center convention
677 */
678static void
679emit_adjusted_wpos( struct st_translate *t,
680                    const struct gl_program *program, GLfloat value)
681{
682   struct ureg_program *ureg = t->ureg;
683   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
684   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
685
686   ureg_ADD(ureg, ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y),
687		   wpos_input, ureg_imm1f(ureg, value));
688
689   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
690}
691
692/**
693 * Emit the TGSI instructions for inverting the WPOS y coordinate.
694 */
695static void
696emit_inverted_wpos( struct st_translate *t,
697                    const struct gl_program *program )
698{
699   struct ureg_program *ureg = t->ureg;
700
701   /* Fragment program uses fragment position input.
702    * Need to replace instances of INPUT[WPOS] with temp T
703    * where T = INPUT[WPOS] by y is inverted.
704    */
705   static const gl_state_index winSizeState[STATE_LENGTH]
706      = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
707
708   /* XXX: note we are modifying the incoming shader here!  Need to
709    * do this before emitting the constant decls below, or this
710    * will be missed:
711    */
712   unsigned winHeightConst = _mesa_add_state_reference(program->Parameters,
713                                                       winSizeState);
714
715   struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst );
716   struct ureg_dst wpos_temp;
717   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
718
719   /* MOV wpos_temp, input[wpos]
720    */
721   if (wpos_input.File == TGSI_FILE_TEMPORARY)
722      wpos_temp = ureg_dst(wpos_input);
723   else {
724      wpos_temp = ureg_DECL_temporary( ureg );
725      ureg_MOV( ureg, wpos_temp, wpos_input );
726   }
727
728   /* SUB wpos_temp.y, winsize_const, wpos_input
729    */
730   ureg_SUB( ureg,
731             ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
732             winsize,
733             wpos_input);
734
735   /* Use wpos_temp as position input from here on:
736    */
737   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
738}
739
740
741/**
742 * Emit fragment position/ooordinate code.
743 */
744static void
745emit_wpos(struct st_context *st,
746          struct st_translate *t,
747          const struct gl_program *program,
748          struct ureg_program *ureg)
749{
750   const struct gl_fragment_program *fp =
751      (const struct gl_fragment_program *) program;
752   struct pipe_screen *pscreen = st->pipe->screen;
753   boolean invert = FALSE;
754
755   if (fp->OriginUpperLeft) {
756      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
757      }
758      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
759         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
760         invert = TRUE;
761      }
762      else
763         assert(0);
764   }
765   else {
766      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
767         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
768      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
769         invert = TRUE;
770      else
771         assert(0);
772   }
773
774   if (fp->PixelCenterInteger) {
775      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
776         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
777      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
778         emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f);
779      else
780         assert(0);
781   }
782   else {
783      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
784      }
785      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
786         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
787         emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f);
788      }
789      else
790         assert(0);
791   }
792
793   /* we invert after adjustment so that we avoid the MOV to temporary,
794    * and reuse the adjustment ADD instead */
795   if (invert)
796      emit_inverted_wpos(t, program);
797}
798
799
800/**
801 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
802 * TGSI uses +1 for front, -1 for back.
803 * This function converts the TGSI value to the GL value.  Simply clamping/
804 * saturating the value to [0,1] does the job.
805 */
806static void
807emit_face_var( struct st_translate *t,
808               const struct gl_program *program )
809{
810   struct ureg_program *ureg = t->ureg;
811   struct ureg_dst face_temp = ureg_DECL_temporary( ureg );
812   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
813
814   /* MOV_SAT face_temp, input[face]
815    */
816   face_temp = ureg_saturate( face_temp );
817   ureg_MOV( ureg, face_temp, face_input );
818
819   /* Use face_temp as face input from here on:
820    */
821   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
822}
823
824
825static void
826emit_edgeflags( struct st_translate *t,
827                 const struct gl_program *program )
828{
829   struct ureg_program *ureg = t->ureg;
830   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
831   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
832
833   ureg_MOV( ureg, edge_dst, edge_src );
834}
835
836
837/**
838 * Translate Mesa program to TGSI format.
839 * \param program  the program to translate
840 * \param numInputs  number of input registers used
841 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
842 *                      input indexes
843 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
844 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
845 *                            each input
846 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
847 * \param numOutputs  number of output registers used
848 * \param outputMapping  maps Mesa fragment program outputs to TGSI
849 *                       generic outputs
850 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
851 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
852 *                             each output
853 *
854 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
855 */
856enum pipe_error
857st_translate_mesa_program(
858   GLcontext *ctx,
859   uint procType,
860   struct ureg_program *ureg,
861   const struct gl_program *program,
862   GLuint numInputs,
863   const GLuint inputMapping[],
864   const ubyte inputSemanticName[],
865   const ubyte inputSemanticIndex[],
866   const GLuint interpMode[],
867   GLuint numOutputs,
868   const GLuint outputMapping[],
869   const ubyte outputSemanticName[],
870   const ubyte outputSemanticIndex[],
871   boolean passthrough_edgeflags )
872{
873   struct st_translate translate, *t;
874   unsigned i;
875   enum pipe_error ret = PIPE_OK;
876
877   t = &translate;
878   memset(t, 0, sizeof *t);
879
880   t->procType = procType;
881   t->inputMapping = inputMapping;
882   t->outputMapping = outputMapping;
883   t->ureg = ureg;
884   t->psizoutindex = -1;
885   t->prevInstWrotePsiz = GL_FALSE;
886
887   /*_mesa_print_program(program);*/
888
889   /*
890    * Declare input attributes.
891    */
892   if (procType == TGSI_PROCESSOR_FRAGMENT) {
893      for (i = 0; i < numInputs; i++) {
894         if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) {
895            t->inputs[i] = ureg_DECL_fs_input_cyl(ureg,
896                                                  inputSemanticName[i],
897                                                  inputSemanticIndex[i],
898                                                  interpMode[i],
899                                                  TGSI_CYLINDRICAL_WRAP_X);
900         }
901         else {
902            t->inputs[i] = ureg_DECL_fs_input(ureg,
903                                              inputSemanticName[i],
904                                              inputSemanticIndex[i],
905                                              interpMode[i]);
906         }
907      }
908
909      if (program->InputsRead & FRAG_BIT_WPOS) {
910         /* Must do this after setting up t->inputs, and before
911          * emitting constant references, below:
912          */
913         emit_wpos(st_context(ctx), t, program, ureg);
914      }
915
916      if (program->InputsRead & FRAG_BIT_FACE) {
917         emit_face_var( t, program );
918      }
919
920      /*
921       * Declare output attributes.
922       */
923      for (i = 0; i < numOutputs; i++) {
924         switch (outputSemanticName[i]) {
925         case TGSI_SEMANTIC_POSITION:
926            t->outputs[i] = ureg_DECL_output( ureg,
927                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
928                                              outputSemanticIndex[i] );
929
930            t->outputs[i] = ureg_writemask( t->outputs[i],
931                                            TGSI_WRITEMASK_Z );
932            break;
933         case TGSI_SEMANTIC_COLOR:
934            t->outputs[i] = ureg_DECL_output( ureg,
935                                              TGSI_SEMANTIC_COLOR,
936                                              outputSemanticIndex[i] );
937            break;
938         default:
939            debug_assert(0);
940            return 0;
941         }
942      }
943   }
944   else {
945      for (i = 0; i < numInputs; i++) {
946         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
947      }
948
949      for (i = 0; i < numOutputs; i++) {
950         t->outputs[i] = ureg_DECL_output( ureg,
951                                           outputSemanticName[i],
952                                           outputSemanticIndex[i] );
953         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
954            static const gl_state_index pointSizeClampState[STATE_LENGTH]
955               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
956               /* XXX: note we are modifying the incoming shader here!  Need to
957               * do this before emitting the constant decls below, or this
958               * will be missed:
959               */
960            unsigned pointSizeClampConst = _mesa_add_state_reference(program->Parameters,
961                                                                     pointSizeClampState);
962            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
963            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
964            t->psizregreal = t->outputs[i];
965            t->psizoutindex = i;
966            t->outputs[i] = psizregtemp;
967         }
968      }
969      if (passthrough_edgeflags)
970         emit_edgeflags( t, program );
971   }
972
973   /* Declare address register.
974    */
975   if (program->NumAddressRegs > 0) {
976      debug_assert( program->NumAddressRegs == 1 );
977      t->address[0] = ureg_DECL_address( ureg );
978   }
979
980
981   /* Emit constants and immediates.  Mesa uses a single index space
982    * for these, so we put all the translated regs in t->constants.
983    */
984   if (program->Parameters) {
985
986      t->constants = CALLOC( program->Parameters->NumParameters,
987                             sizeof t->constants[0] );
988      if (t->constants == NULL) {
989         ret = PIPE_ERROR_OUT_OF_MEMORY;
990         goto out;
991      }
992
993      for (i = 0; i < program->Parameters->NumParameters; i++) {
994         switch (program->Parameters->Parameters[i].Type) {
995         case PROGRAM_ENV_PARAM:
996         case PROGRAM_LOCAL_PARAM:
997         case PROGRAM_STATE_VAR:
998         case PROGRAM_NAMED_PARAM:
999         case PROGRAM_UNIFORM:
1000            t->constants[i] = ureg_DECL_constant( ureg, i );
1001            break;
1002
1003            /* Emit immediates only when there is no address register
1004             * in use.  FIXME: Be smarter and recognize param arrays:
1005             * indirect addressing is only valid within the referenced
1006             * array.
1007             */
1008         case PROGRAM_CONSTANT:
1009            if (program->NumAddressRegs > 0)
1010               t->constants[i] = ureg_DECL_constant( ureg, i );
1011            else
1012               t->constants[i] =
1013                  ureg_DECL_immediate( ureg,
1014                                       program->Parameters->ParameterValues[i],
1015                                       4 );
1016            break;
1017         default:
1018            break;
1019         }
1020      }
1021   }
1022
1023   /* texture samplers */
1024   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1025      if (program->SamplersUsed & (1 << i)) {
1026         t->samplers[i] = ureg_DECL_sampler( ureg, i );
1027      }
1028   }
1029
1030   /* Emit each instruction in turn:
1031    */
1032   for (i = 0; i < program->NumInstructions; i++) {
1033      set_insn_start( t, ureg_get_instruction_number( ureg ));
1034      compile_instruction( t, &program->Instructions[i] );
1035
1036      /* note can't do that easily at the end of prog due to
1037         possible early return */
1038      if (t->prevInstWrotePsiz && program->Id) {
1039         set_insn_start( t, ureg_get_instruction_number( ureg ));
1040         ureg_MAX( t->ureg, ureg_writemask(t->outputs[t->psizoutindex], WRITEMASK_X),
1041                   ureg_src(t->outputs[t->psizoutindex]),
1042                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
1043         ureg_MIN( t->ureg, ureg_writemask(t->psizregreal, WRITEMASK_X),
1044                   ureg_src(t->outputs[t->psizoutindex]),
1045                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
1046      }
1047      t->prevInstWrotePsiz = GL_FALSE;
1048   }
1049
1050   /* Fix up all emitted labels:
1051    */
1052   for (i = 0; i < t->labels_count; i++) {
1053      ureg_fixup_label( ureg,
1054                        t->labels[i].token,
1055                        t->insn[t->labels[i].branch_target] );
1056   }
1057
1058out:
1059   FREE(t->insn);
1060   FREE(t->labels);
1061   FREE(t->constants);
1062
1063   if (t->error) {
1064      debug_printf("%s: translate error flag set\n", __FUNCTION__);
1065   }
1066
1067   return ret;
1068}
1069
1070
1071/**
1072 * Tokens cannot be free with free otherwise the builtin gallium
1073 * malloc debugging will get confused.
1074 */
1075void
1076st_free_tokens(const struct tgsi_token *tokens)
1077{
1078   FREE((void *)tokens);
1079}
1080