st_mesa_to_tgsi.c revision f3b215cba2bca92d6582cc0c34702b73289f909c
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * \author
30 * Michal Krol
31 */
32
33#include "pipe/p_compiler.h"
34#include "pipe/p_shader_tokens.h"
35#include "tgsi/tgsi_parse.h"
36#include "tgsi/tgsi_build.h"
37#include "tgsi/tgsi_util.h"
38#include "tgsi/tgsi_dump.h"
39#include "tgsi/tgsi_sanity.h"
40#include "st_mesa_to_tgsi.h"
41#include "shader/prog_instruction.h"
42#include "shader/prog_parameter.h"
43#include "shader/prog_print.h"
44#include "util/u_debug.h"
45
46/*
47 * Map mesa register file to TGSI register file.
48 */
49static GLuint
50map_register_file(
51   gl_register_file file,
52   GLuint index,
53   const GLuint immediateMapping[],
54   GLboolean indirectAccess )
55{
56   switch( file ) {
57   case PROGRAM_UNDEFINED:
58      return TGSI_FILE_NULL;
59   case PROGRAM_TEMPORARY:
60      return TGSI_FILE_TEMPORARY;
61   /*case PROGRAM_LOCAL_PARAM:*/
62   /*case PROGRAM_ENV_PARAM:*/
63
64      /* Because of the longstanding problem with mesa arb shaders
65       * where constants, immediates and state variables are all
66       * bundled together as PROGRAM_STATE_VAR, we can't tell from the
67       * mesa register file whether this is a CONSTANT or an
68       * IMMEDIATE, hence we need all the other information.
69       */
70   case PROGRAM_STATE_VAR:
71   case PROGRAM_NAMED_PARAM:
72   case PROGRAM_UNIFORM:
73      if (!indirectAccess && immediateMapping && immediateMapping[index] != ~0)
74         return TGSI_FILE_IMMEDIATE;
75      else
76	 return TGSI_FILE_CONSTANT;
77   case PROGRAM_CONSTANT:
78      if (indirectAccess)
79         return TGSI_FILE_CONSTANT;
80      assert(immediateMapping[index] != ~0);
81      return TGSI_FILE_IMMEDIATE;
82   case PROGRAM_INPUT:
83      return TGSI_FILE_INPUT;
84   case PROGRAM_OUTPUT:
85      return TGSI_FILE_OUTPUT;
86   case PROGRAM_ADDRESS:
87      return TGSI_FILE_ADDRESS;
88   default:
89      assert( 0 );
90      return TGSI_FILE_NULL;
91   }
92}
93
94/**
95 * Map mesa register file index to TGSI index.
96 * Take special care when processing input and output indices.
97 * \param file  one of TGSI_FILE_x
98 * \param index  the mesa register file index
99 * \param inputMapping  maps Mesa input indexes to TGSI input indexes
100 * \param outputMapping  maps Mesa output indexes to TGSI output indexes
101 */
102static GLuint
103map_register_file_index(
104   GLuint procType,
105   GLuint file,
106   GLuint index,
107   GLuint *swizzle,
108   const GLuint inputMapping[],
109   const GLuint outputMapping[],
110   const GLuint immediateMapping[],
111   GLboolean indirectAccess )
112{
113   switch( file ) {
114   case TGSI_FILE_INPUT:
115      /* inputs are mapped according to the user-defined map */
116      return inputMapping[index];
117
118   case TGSI_FILE_OUTPUT:
119      return outputMapping[index];
120
121   case TGSI_FILE_IMMEDIATE:
122      if (indirectAccess)
123         return index;
124      assert(immediateMapping[index] != ~0);
125      return immediateMapping[index];
126
127   default:
128      return index;
129   }
130}
131
132/*
133 * Map mesa texture target to TGSI texture target.
134 */
135static GLuint
136map_texture_target(
137    GLuint textarget,
138    GLboolean shadow )
139{
140   switch( textarget ) {
141   case TEXTURE_1D_INDEX:
142      if (shadow)
143         return TGSI_TEXTURE_SHADOW1D;
144      else
145         return TGSI_TEXTURE_1D;
146   case TEXTURE_2D_INDEX:
147      if (shadow)
148         return TGSI_TEXTURE_SHADOW2D;
149      else
150         return TGSI_TEXTURE_2D;
151   case TEXTURE_3D_INDEX:
152      return TGSI_TEXTURE_3D;
153   case TEXTURE_CUBE_INDEX:
154      return TGSI_TEXTURE_CUBE;
155   case TEXTURE_RECT_INDEX:
156      if (shadow)
157         return TGSI_TEXTURE_SHADOWRECT;
158      else
159         return TGSI_TEXTURE_RECT;
160   default:
161      assert( 0 );
162   }
163
164   return TGSI_TEXTURE_1D;
165}
166
167static GLuint
168convert_sat(
169   GLuint sat )
170{
171   switch( sat ) {
172   case SATURATE_OFF:
173      return TGSI_SAT_NONE;
174   case SATURATE_ZERO_ONE:
175      return TGSI_SAT_ZERO_ONE;
176   case SATURATE_PLUS_MINUS_ONE:
177      return TGSI_SAT_MINUS_PLUS_ONE;
178   default:
179      assert( 0 );
180      return TGSI_SAT_NONE;
181   }
182}
183
184static GLuint
185convert_writemask(
186   GLuint writemask )
187{
188   assert( WRITEMASK_X == TGSI_WRITEMASK_X );
189   assert( WRITEMASK_Y == TGSI_WRITEMASK_Y );
190   assert( WRITEMASK_Z == TGSI_WRITEMASK_Z );
191   assert( WRITEMASK_W == TGSI_WRITEMASK_W );
192   assert( (writemask & ~TGSI_WRITEMASK_XYZW) == 0 );
193
194   return writemask;
195}
196
197static struct tgsi_full_immediate
198make_immediate(const float *value, uint size)
199{
200   struct tgsi_full_immediate imm;
201   unsigned i;
202
203   imm = tgsi_default_full_immediate();
204   imm.Immediate.NrTokens += size;
205   imm.Immediate.DataType = TGSI_IMM_FLOAT32;
206
207   for (i = 0; i < size; i++)
208      imm.u[i].Float = value[i];
209
210   return imm;
211}
212
213static void
214compile_instruction(
215   const struct prog_instruction *inst,
216   struct tgsi_full_instruction *fullinst,
217   const GLuint inputMapping[],
218   const GLuint outputMapping[],
219   const GLuint immediateMapping[],
220   GLboolean indirectAccess,
221   GLuint preamble_size,
222   GLuint procType,
223   GLboolean *insideSubroutine,
224   GLint wposTemp)
225{
226   GLuint i;
227   struct tgsi_full_dst_register *fulldst;
228   struct tgsi_full_src_register *fullsrc;
229
230   *fullinst = tgsi_default_full_instruction();
231
232   fullinst->Instruction.Saturate = convert_sat( inst->SaturateMode );
233   fullinst->Instruction.NumDstRegs = _mesa_num_inst_dst_regs( inst->Opcode );
234   fullinst->Instruction.NumSrcRegs = _mesa_num_inst_src_regs( inst->Opcode );
235
236   fulldst = &fullinst->FullDstRegisters[0];
237   fulldst->DstRegister.File = map_register_file( inst->DstReg.File, 0, NULL, GL_FALSE );
238   fulldst->DstRegister.Index = map_register_file_index(
239      procType,
240      fulldst->DstRegister.File,
241      inst->DstReg.Index,
242      NULL,
243      inputMapping,
244      outputMapping,
245      NULL,
246      GL_FALSE );
247   fulldst->DstRegister.WriteMask = convert_writemask( inst->DstReg.WriteMask );
248   if (inst->DstReg.RelAddr) {
249      fulldst->DstRegister.Indirect = 1;
250      fulldst->DstRegisterInd.File = TGSI_FILE_ADDRESS;
251      fulldst->DstRegisterInd.Index = 0;
252   }
253
254   for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
255      GLuint j;
256      GLuint swizzle = inst->SrcReg[i].Swizzle;
257
258      fullsrc = &fullinst->FullSrcRegisters[i];
259
260      if (procType == TGSI_PROCESSOR_FRAGMENT &&
261          inst->SrcReg[i].File == PROGRAM_INPUT &&
262          inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
263         /* special case of INPUT[WPOS] */
264         fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY;
265         fullsrc->SrcRegister.Index = wposTemp;
266      }
267      else {
268         /* any other src register */
269         fullsrc->SrcRegister.File = map_register_file(
270            inst->SrcReg[i].File,
271            inst->SrcReg[i].Index,
272            immediateMapping,
273            indirectAccess );
274         fullsrc->SrcRegister.Index = map_register_file_index(
275            procType,
276            fullsrc->SrcRegister.File,
277            inst->SrcReg[i].Index,
278            &swizzle,
279            inputMapping,
280            outputMapping,
281            immediateMapping,
282            indirectAccess );
283      }
284
285      /* swizzle (ext swizzle also depends on negation) */
286      {
287         GLuint swz[4];
288         GLboolean extended = (inst->SrcReg[i].Negate != NEGATE_NONE &&
289                               inst->SrcReg[i].Negate != NEGATE_XYZW);
290         for( j = 0; j < 4; j++ ) {
291            swz[j] = GET_SWZ( swizzle, j );
292            if (swz[j] > SWIZZLE_W)
293               extended = GL_TRUE;
294         }
295         if (extended) {
296            for (j = 0; j < 4; j++) {
297               tgsi_util_set_src_register_extswizzle(&fullsrc->SrcRegisterExtSwz,
298                                                     swz[j], j);
299            }
300         }
301         else {
302            for (j = 0; j < 4; j++) {
303               tgsi_util_set_src_register_swizzle(&fullsrc->SrcRegister,
304                                                  swz[j], j);
305            }
306         }
307      }
308
309      if( inst->SrcReg[i].Negate == NEGATE_XYZW ) {
310         fullsrc->SrcRegister.Negate = 1;
311      }
312      else if( inst->SrcReg[i].Negate != NEGATE_NONE ) {
313         if( inst->SrcReg[i].Negate & NEGATE_X ) {
314            fullsrc->SrcRegisterExtSwz.NegateX = 1;
315         }
316         if( inst->SrcReg[i].Negate & NEGATE_Y ) {
317            fullsrc->SrcRegisterExtSwz.NegateY = 1;
318         }
319         if( inst->SrcReg[i].Negate & NEGATE_Z ) {
320            fullsrc->SrcRegisterExtSwz.NegateZ = 1;
321         }
322         if( inst->SrcReg[i].Negate & NEGATE_W ) {
323            fullsrc->SrcRegisterExtSwz.NegateW = 1;
324         }
325      }
326
327      if( inst->SrcReg[i].Abs ) {
328         fullsrc->SrcRegisterExtMod.Absolute = 1;
329      }
330
331      if( inst->SrcReg[i].RelAddr ) {
332         fullsrc->SrcRegister.Indirect = 1;
333
334         fullsrc->SrcRegisterInd.File = TGSI_FILE_ADDRESS;
335         fullsrc->SrcRegisterInd.Index = 0;
336      }
337   }
338
339   switch( inst->Opcode ) {
340   case OPCODE_ARL:
341      fullinst->Instruction.Opcode = TGSI_OPCODE_ARL;
342      break;
343   case OPCODE_ABS:
344      fullinst->Instruction.Opcode = TGSI_OPCODE_ABS;
345      break;
346   case OPCODE_ADD:
347      fullinst->Instruction.Opcode = TGSI_OPCODE_ADD;
348      break;
349   case OPCODE_BGNLOOP:
350      fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP;
351      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
352      break;
353   case OPCODE_BGNSUB:
354      fullinst->Instruction.Opcode = TGSI_OPCODE_BGNSUB;
355      *insideSubroutine = GL_TRUE;
356      break;
357   case OPCODE_BRA:
358      fullinst->Instruction.Opcode = TGSI_OPCODE_BRA;
359      break;
360   case OPCODE_BRK:
361      fullinst->Instruction.Opcode = TGSI_OPCODE_BRK;
362      break;
363   case OPCODE_CAL:
364      fullinst->Instruction.Opcode = TGSI_OPCODE_CAL;
365      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
366      break;
367   case OPCODE_CMP:
368      fullinst->Instruction.Opcode = TGSI_OPCODE_CMP;
369      break;
370   case OPCODE_CONT:
371      fullinst->Instruction.Opcode = TGSI_OPCODE_CONT;
372      break;
373   case OPCODE_COS:
374      fullinst->Instruction.Opcode = TGSI_OPCODE_COS;
375      break;
376   case OPCODE_DDX:
377      fullinst->Instruction.Opcode = TGSI_OPCODE_DDX;
378      break;
379   case OPCODE_DDY:
380      fullinst->Instruction.Opcode = TGSI_OPCODE_DDY;
381      break;
382   case OPCODE_DP2:
383      fullinst->Instruction.Opcode = TGSI_OPCODE_DP2;
384      break;
385   case OPCODE_DP2A:
386      fullinst->Instruction.Opcode = TGSI_OPCODE_DP2A;
387      break;
388   case OPCODE_DP3:
389      fullinst->Instruction.Opcode = TGSI_OPCODE_DP3;
390      break;
391   case OPCODE_DP4:
392      fullinst->Instruction.Opcode = TGSI_OPCODE_DP4;
393      break;
394   case OPCODE_DPH:
395      fullinst->Instruction.Opcode = TGSI_OPCODE_DPH;
396      break;
397   case OPCODE_DST:
398      fullinst->Instruction.Opcode = TGSI_OPCODE_DST;
399      break;
400   case OPCODE_ELSE:
401      fullinst->Instruction.Opcode = TGSI_OPCODE_ELSE;
402      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
403      break;
404   case OPCODE_ENDIF:
405      fullinst->Instruction.Opcode = TGSI_OPCODE_ENDIF;
406      break;
407   case OPCODE_ENDLOOP:
408      fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP;
409      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
410      break;
411   case OPCODE_ENDSUB:
412      fullinst->Instruction.Opcode = TGSI_OPCODE_ENDSUB;
413      *insideSubroutine = GL_FALSE;
414      break;
415   case OPCODE_EX2:
416      fullinst->Instruction.Opcode = TGSI_OPCODE_EX2;
417      break;
418   case OPCODE_EXP:
419      fullinst->Instruction.Opcode = TGSI_OPCODE_EXP;
420      break;
421   case OPCODE_FLR:
422      fullinst->Instruction.Opcode = TGSI_OPCODE_FLR;
423      break;
424   case OPCODE_FRC:
425      fullinst->Instruction.Opcode = TGSI_OPCODE_FRC;
426      break;
427   case OPCODE_IF:
428      fullinst->Instruction.Opcode = TGSI_OPCODE_IF;
429      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
430      break;
431   case OPCODE_TRUNC:
432      fullinst->Instruction.Opcode = TGSI_OPCODE_TRUNC;
433      break;
434   case OPCODE_KIL:
435      /* conditional */
436      fullinst->Instruction.Opcode = TGSI_OPCODE_KIL;
437      break;
438   case OPCODE_KIL_NV:
439      /* predicated */
440      assert(inst->DstReg.CondMask == COND_TR);
441      fullinst->Instruction.Opcode = TGSI_OPCODE_KILP;
442      break;
443   case OPCODE_LG2:
444      fullinst->Instruction.Opcode = TGSI_OPCODE_LG2;
445      break;
446   case OPCODE_LOG:
447      fullinst->Instruction.Opcode = TGSI_OPCODE_LOG;
448      break;
449   case OPCODE_LIT:
450      fullinst->Instruction.Opcode = TGSI_OPCODE_LIT;
451      break;
452   case OPCODE_LRP:
453      fullinst->Instruction.Opcode = TGSI_OPCODE_LRP;
454      break;
455   case OPCODE_MAD:
456      fullinst->Instruction.Opcode = TGSI_OPCODE_MAD;
457      break;
458   case OPCODE_MAX:
459      fullinst->Instruction.Opcode = TGSI_OPCODE_MAX;
460      break;
461   case OPCODE_MIN:
462      fullinst->Instruction.Opcode = TGSI_OPCODE_MIN;
463      break;
464   case OPCODE_MOV:
465      fullinst->Instruction.Opcode = TGSI_OPCODE_MOV;
466      break;
467   case OPCODE_MUL:
468      fullinst->Instruction.Opcode = TGSI_OPCODE_MUL;
469      break;
470   case OPCODE_NOISE1:
471      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE1;
472      break;
473   case OPCODE_NOISE2:
474      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE2;
475      break;
476   case OPCODE_NOISE3:
477      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE3;
478      break;
479   case OPCODE_NOISE4:
480      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE4;
481      break;
482   case OPCODE_NOP:
483      fullinst->Instruction.Opcode = TGSI_OPCODE_NOP;
484      break;
485   case OPCODE_NRM3:
486      fullinst->Instruction.Opcode = TGSI_OPCODE_NRM;
487      break;
488   case OPCODE_NRM4:
489      fullinst->Instruction.Opcode = TGSI_OPCODE_NRM4;
490      break;
491   case OPCODE_POW:
492      fullinst->Instruction.Opcode = TGSI_OPCODE_POW;
493      break;
494   case OPCODE_RCP:
495      fullinst->Instruction.Opcode = TGSI_OPCODE_RCP;
496      break;
497   case OPCODE_RET:
498      /* If RET is used inside main (not a real subroutine) we may want
499       * to execute END instead of RET.  TBD...
500       */
501      if (1 /*  *insideSubroutine */) {
502         fullinst->Instruction.Opcode = TGSI_OPCODE_RET;
503      }
504      else {
505         /* inside main() pseudo-function */
506         fullinst->Instruction.Opcode = TGSI_OPCODE_END;
507      }
508      break;
509   case OPCODE_RSQ:
510      fullinst->Instruction.Opcode = TGSI_OPCODE_RSQ;
511      break;
512   case OPCODE_SCS:
513      fullinst->Instruction.Opcode = TGSI_OPCODE_SCS;
514      fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XY;
515      break;
516   case OPCODE_SEQ:
517      fullinst->Instruction.Opcode = TGSI_OPCODE_SEQ;
518      break;
519   case OPCODE_SGE:
520      fullinst->Instruction.Opcode = TGSI_OPCODE_SGE;
521      break;
522   case OPCODE_SGT:
523      fullinst->Instruction.Opcode = TGSI_OPCODE_SGT;
524      break;
525   case OPCODE_SIN:
526      fullinst->Instruction.Opcode = TGSI_OPCODE_SIN;
527      break;
528   case OPCODE_SLE:
529      fullinst->Instruction.Opcode = TGSI_OPCODE_SLE;
530      break;
531   case OPCODE_SLT:
532      fullinst->Instruction.Opcode = TGSI_OPCODE_SLT;
533      break;
534   case OPCODE_SNE:
535      fullinst->Instruction.Opcode = TGSI_OPCODE_SNE;
536      break;
537   case OPCODE_SSG:
538      fullinst->Instruction.Opcode = TGSI_OPCODE_SSG;
539      break;
540   case OPCODE_SUB:
541      fullinst->Instruction.Opcode = TGSI_OPCODE_SUB;
542      break;
543   case OPCODE_SWZ:
544      fullinst->Instruction.Opcode = TGSI_OPCODE_SWZ;
545      break;
546   case OPCODE_TEX:
547      /* ordinary texture lookup */
548      fullinst->Instruction.Opcode = TGSI_OPCODE_TEX;
549      fullinst->Instruction.NumSrcRegs = 2;
550      fullinst->InstructionExtTexture.Texture =
551         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
552      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
553      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
554      break;
555   case OPCODE_TXB:
556      /* texture lookup with LOD bias */
557      fullinst->Instruction.Opcode = TGSI_OPCODE_TXB;
558      fullinst->Instruction.NumSrcRegs = 2;
559      fullinst->InstructionExtTexture.Texture =
560         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
561      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
562      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
563      break;
564   case OPCODE_TXD:
565      /* texture lookup with explicit partial derivatives */
566      fullinst->Instruction.Opcode = TGSI_OPCODE_TXD;
567      fullinst->Instruction.NumSrcRegs = 4;
568      fullinst->InstructionExtTexture.Texture =
569         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
570      /* src[0] = coord, src[1] = d[strq]/dx, src[2] = d[strq]/dy */
571      fullinst->FullSrcRegisters[3].SrcRegister.File = TGSI_FILE_SAMPLER;
572      fullinst->FullSrcRegisters[3].SrcRegister.Index = inst->TexSrcUnit;
573      break;
574   case OPCODE_TXL:
575      /* texture lookup with explicit LOD */
576      fullinst->Instruction.Opcode = TGSI_OPCODE_TXL;
577      fullinst->Instruction.NumSrcRegs = 2;
578      fullinst->InstructionExtTexture.Texture =
579         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
580      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
581      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
582      break;
583   case OPCODE_TXP:
584      /* texture lookup with divide by Q component */
585      /* convert to TEX w/ special flag for division */
586      fullinst->Instruction.Opcode = TGSI_OPCODE_TXP;
587      fullinst->Instruction.NumSrcRegs = 2;
588      fullinst->InstructionExtTexture.Texture =
589         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
590      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
591      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
592      break;
593   case OPCODE_XPD:
594      fullinst->Instruction.Opcode = TGSI_OPCODE_XPD;
595      fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XYZ;
596      break;
597   case OPCODE_END:
598      fullinst->Instruction.Opcode = TGSI_OPCODE_END;
599      break;
600   default:
601      assert( 0 );
602   }
603}
604
605/**
606 * \param usage_mask  bitfield of TGSI_WRITEMASK_{XYZW} tokens
607 */
608static struct tgsi_full_declaration
609make_input_decl(
610   GLuint index,
611   GLboolean interpolate_info,
612   GLuint interpolate,
613   GLuint usage_mask,
614   GLboolean semantic_info,
615   GLuint semantic_name,
616   GLbitfield semantic_index,
617   GLbitfield input_flags)
618{
619   struct tgsi_full_declaration decl;
620
621   assert(semantic_name < TGSI_SEMANTIC_COUNT);
622
623   decl = tgsi_default_full_declaration();
624   decl.Declaration.File = TGSI_FILE_INPUT;
625   decl.Declaration.UsageMask = usage_mask;
626   decl.Declaration.Semantic = semantic_info;
627   decl.DeclarationRange.First = index;
628   decl.DeclarationRange.Last = index;
629   if (semantic_info) {
630      decl.Semantic.SemanticName = semantic_name;
631      decl.Semantic.SemanticIndex = semantic_index;
632   }
633   if (interpolate_info) {
634      decl.Declaration.Interpolate = interpolate;
635   }
636   if (input_flags & PROG_PARAM_BIT_CENTROID)
637      decl.Declaration.Centroid = 1;
638   if (input_flags & PROG_PARAM_BIT_INVARIANT)
639      decl.Declaration.Invariant = 1;
640
641   return decl;
642}
643
644/**
645 * \param usage_mask  bitfield of TGSI_WRITEMASK_{XYZW} tokens
646 */
647static struct tgsi_full_declaration
648make_output_decl(
649   GLuint index,
650   GLuint semantic_name,
651   GLuint semantic_index,
652   GLuint usage_mask,
653   GLbitfield output_flags)
654{
655   struct tgsi_full_declaration decl;
656
657   assert(semantic_name < TGSI_SEMANTIC_COUNT);
658
659   decl = tgsi_default_full_declaration();
660   decl.Declaration.File = TGSI_FILE_OUTPUT;
661   decl.Declaration.UsageMask = usage_mask;
662   decl.Declaration.Semantic = 1;
663   decl.DeclarationRange.First = index;
664   decl.DeclarationRange.Last = index;
665   decl.Semantic.SemanticName = semantic_name;
666   decl.Semantic.SemanticIndex = semantic_index;
667   if (output_flags & PROG_PARAM_BIT_CENTROID)
668      decl.Declaration.Centroid = 1;
669   if (output_flags & PROG_PARAM_BIT_INVARIANT)
670      decl.Declaration.Invariant = 1;
671
672   return decl;
673}
674
675
676static struct tgsi_full_declaration
677make_temp_decl(
678   GLuint start_index,
679   GLuint end_index )
680{
681   struct tgsi_full_declaration decl;
682   decl = tgsi_default_full_declaration();
683   decl.Declaration.File = TGSI_FILE_TEMPORARY;
684   decl.DeclarationRange.First = start_index;
685   decl.DeclarationRange.Last = end_index;
686   return decl;
687}
688
689static struct tgsi_full_declaration
690make_addr_decl(
691   GLuint start_index,
692   GLuint end_index )
693{
694   struct tgsi_full_declaration decl;
695
696   decl = tgsi_default_full_declaration();
697   decl.Declaration.File = TGSI_FILE_ADDRESS;
698   decl.DeclarationRange.First = start_index;
699   decl.DeclarationRange.Last = end_index;
700   return decl;
701}
702
703static struct tgsi_full_declaration
704make_sampler_decl(GLuint index)
705{
706   struct tgsi_full_declaration decl;
707   decl = tgsi_default_full_declaration();
708   decl.Declaration.File = TGSI_FILE_SAMPLER;
709   decl.DeclarationRange.First = index;
710   decl.DeclarationRange.Last = index;
711   return decl;
712}
713
714/** Reference into a constant buffer */
715static struct tgsi_full_declaration
716make_constant_decl(GLuint first, GLuint last)
717{
718   struct tgsi_full_declaration decl;
719   decl = tgsi_default_full_declaration();
720   decl.Declaration.File = TGSI_FILE_CONSTANT;
721   decl.DeclarationRange.First = first;
722   decl.DeclarationRange.Last = last;
723   return decl;
724}
725
726
727
728/**
729 * Find the temporaries which are used in the given program.
730 */
731static void
732find_temporaries(const struct gl_program *program,
733                 GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
734{
735   GLuint i, j;
736
737   for (i = 0; i < MAX_PROGRAM_TEMPS; i++)
738      tempsUsed[i] = GL_FALSE;
739
740   for (i = 0; i < program->NumInstructions; i++) {
741      const struct prog_instruction *inst = program->Instructions + i;
742      const GLuint n = _mesa_num_inst_src_regs( inst->Opcode );
743      for (j = 0; j < n; j++) {
744         if (inst->SrcReg[j].File == PROGRAM_TEMPORARY)
745            tempsUsed[inst->SrcReg[j].Index] = GL_TRUE;
746         if (inst->DstReg.File == PROGRAM_TEMPORARY)
747            tempsUsed[inst->DstReg.Index] = GL_TRUE;
748      }
749   }
750}
751
752
753/**
754 * Find an unused temporary in the tempsUsed array.
755 */
756static int
757find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
758{
759   int i;
760   for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
761      if (!tempsUsed[i]) {
762         tempsUsed[i] = GL_TRUE;
763         return i;
764      }
765   }
766   return -1;
767}
768
769
770/** helper for building simple TGSI instruction, one src register */
771static void
772build_tgsi_instruction1(struct tgsi_full_instruction *inst,
773                        int opcode,
774                        int dstFile, int dstIndex, int writemask,
775                        int srcFile1, int srcIndex1)
776{
777   *inst = tgsi_default_full_instruction();
778
779   inst->Instruction.Opcode = opcode;
780
781   inst->Instruction.NumDstRegs = 1;
782   inst->FullDstRegisters[0].DstRegister.File = dstFile;
783   inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
784   inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
785
786   inst->Instruction.NumSrcRegs = 1;
787   inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
788   inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
789}
790
791
792/** helper for building simple TGSI instruction, two src registers */
793static void
794build_tgsi_instruction2(struct tgsi_full_instruction *inst,
795                        int opcode,
796                        int dstFile, int dstIndex, int writemask,
797                        int srcFile1, int srcIndex1,
798                        int srcFile2, int srcIndex2)
799{
800   *inst = tgsi_default_full_instruction();
801
802   inst->Instruction.Opcode = opcode;
803
804   inst->Instruction.NumDstRegs = 1;
805   inst->FullDstRegisters[0].DstRegister.File = dstFile;
806   inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
807   inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
808
809   inst->Instruction.NumSrcRegs = 2;
810   inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
811   inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
812   inst->FullSrcRegisters[1].SrcRegister.File = srcFile2;
813   inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2;
814}
815
816
817
818/**
819 * Emit the TGSI instructions for inverting the WPOS y coordinate.
820 */
821static int
822emit_inverted_wpos(struct tgsi_token *tokens,
823                   int wpos_temp,
824                   int winsize_const,
825                   int wpos_input,
826                   struct tgsi_header *header, int maxTokens)
827{
828   struct tgsi_full_instruction fullinst;
829   int ti = 0;
830
831   /* MOV wpos_temp.xzw, input[wpos]; */
832   build_tgsi_instruction1(&fullinst,
833                           TGSI_OPCODE_MOV,
834                           TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW,
835                           TGSI_FILE_INPUT, 0);
836
837   ti += tgsi_build_full_instruction(&fullinst,
838                                     &tokens[ti],
839                                     header,
840                                     maxTokens - ti);
841
842   /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */
843   build_tgsi_instruction2(&fullinst,
844                           TGSI_OPCODE_SUB,
845                           TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y,
846                           TGSI_FILE_CONSTANT, winsize_const,
847                           TGSI_FILE_INPUT, wpos_input);
848
849   ti += tgsi_build_full_instruction(&fullinst,
850                                     &tokens[ti],
851                                     header,
852                                     maxTokens - ti);
853
854   return ti;
855}
856
857
858
859
860/**
861 * Translate Mesa program to TGSI format.
862 * \param program  the program to translate
863 * \param numInputs  number of input registers used
864 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
865 *                      input indexes
866 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
867 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for each input
868 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
869
870 * \param numOutputs  number of output registers used
871 * \param outputMapping  maps Mesa fragment program outputs to TGSI
872 *                       generic outputs
873 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
874 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for each output
875 * \param tokens  array to store translated tokens in
876 * \param maxTokens  size of the tokens array
877 *
878 * \return number of tokens placed in 'tokens' buffer, or zero if error
879 */
880GLuint
881st_translate_mesa_program(
882   GLcontext *ctx,
883   uint procType,
884   const struct gl_program *program,
885   GLuint numInputs,
886   const GLuint inputMapping[],
887   const ubyte inputSemanticName[],
888   const ubyte inputSemanticIndex[],
889   const GLuint interpMode[],
890   const GLbitfield inputFlags[],
891   GLuint numOutputs,
892   const GLuint outputMapping[],
893   const ubyte outputSemanticName[],
894   const ubyte outputSemanticIndex[],
895   const GLbitfield outputFlags[],
896   struct tgsi_token *tokens,
897   GLuint maxTokens )
898{
899   GLuint i;
900   GLuint ti;  /* token index */
901   struct tgsi_header *header;
902   struct tgsi_processor *processor;
903   GLuint preamble_size = 0;
904   GLuint immediates[1000];
905   GLuint numImmediates = 0;
906   GLboolean insideSubroutine = GL_FALSE;
907   GLboolean indirectAccess = GL_FALSE;
908   GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
909   GLint wposTemp = -1, winHeightConst = -1;
910
911   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
912          procType == TGSI_PROCESSOR_VERTEX);
913
914   find_temporaries(program, tempsUsed);
915
916   if (procType == TGSI_PROCESSOR_FRAGMENT) {
917      if (program->InputsRead & FRAG_BIT_WPOS) {
918         /* Fragment program uses fragment position input.
919          * Need to replace instances of INPUT[WPOS] with temp T
920          * where T = INPUT[WPOS] by y is inverted.
921          */
922         static const gl_state_index winSizeState[STATE_LENGTH]
923            = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
924         winHeightConst = _mesa_add_state_reference(program->Parameters,
925                                                    winSizeState);
926         wposTemp = find_free_temporary(tempsUsed);
927      }
928   }
929
930
931   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
932
933   header = (struct tgsi_header *) &tokens[1];
934   *header = tgsi_build_header();
935
936   processor = (struct tgsi_processor *) &tokens[2];
937   *processor = tgsi_build_processor( procType, header );
938
939   ti = 3;
940
941   /*
942    * Declare input attributes.
943    */
944   if (procType == TGSI_PROCESSOR_FRAGMENT) {
945      for (i = 0; i < numInputs; i++) {
946         struct tgsi_full_declaration fulldecl;
947         fulldecl = make_input_decl(i,
948                                    GL_TRUE, interpMode[i],
949                                    TGSI_WRITEMASK_XYZW,
950                                    GL_TRUE, inputSemanticName[i],
951                                    inputSemanticIndex[i],
952                                    inputFlags[i]);
953         ti += tgsi_build_full_declaration(&fulldecl,
954                                           &tokens[ti],
955                                           header,
956                                           maxTokens - ti );
957      }
958   }
959   else {
960      /* vertex prog */
961      /* XXX: this could probaby be merged with the clause above.
962       * the only difference is the semantic tags.
963       */
964      for (i = 0; i < numInputs; i++) {
965         struct tgsi_full_declaration fulldecl;
966         fulldecl = make_input_decl(i,
967                                    GL_FALSE, 0,
968                                    TGSI_WRITEMASK_XYZW,
969                                    GL_FALSE, 0, 0,
970                                    inputFlags[i]);
971         ti += tgsi_build_full_declaration(&fulldecl,
972                                           &tokens[ti],
973                                           header,
974                                           maxTokens - ti );
975      }
976   }
977
978   /*
979    * Declare output attributes.
980    */
981   if (procType == TGSI_PROCESSOR_FRAGMENT) {
982      for (i = 0; i < numOutputs; i++) {
983         struct tgsi_full_declaration fulldecl;
984         switch (outputSemanticName[i]) {
985         case TGSI_SEMANTIC_POSITION:
986            fulldecl = make_output_decl(i,
987                                        TGSI_SEMANTIC_POSITION, /* Z / Depth */
988                                        outputSemanticIndex[i],
989                                        TGSI_WRITEMASK_Z,
990                                        outputFlags[i]);
991            break;
992         case TGSI_SEMANTIC_COLOR:
993            fulldecl = make_output_decl(i,
994                                        TGSI_SEMANTIC_COLOR,
995                                        outputSemanticIndex[i],
996                                        TGSI_WRITEMASK_XYZW,
997                                        outputFlags[i]);
998            break;
999         default:
1000            assert(0);
1001            return 0;
1002         }
1003         ti += tgsi_build_full_declaration(&fulldecl,
1004                                           &tokens[ti],
1005                                           header,
1006                                           maxTokens - ti );
1007      }
1008   }
1009   else {
1010      /* vertex prog */
1011      for (i = 0; i < numOutputs; i++) {
1012         struct tgsi_full_declaration fulldecl;
1013         fulldecl = make_output_decl(i,
1014                                     outputSemanticName[i],
1015                                     outputSemanticIndex[i],
1016                                     TGSI_WRITEMASK_XYZW,
1017                                     outputFlags[i]);
1018         ti += tgsi_build_full_declaration(&fulldecl,
1019                                           &tokens[ti],
1020                                           header,
1021                                           maxTokens - ti );
1022      }
1023   }
1024
1025   /* temporary decls */
1026   {
1027      GLboolean inside_range = GL_FALSE;
1028      GLuint start_range = 0;
1029
1030      tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE;
1031      for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) {
1032         if (tempsUsed[i] && !inside_range) {
1033            inside_range = GL_TRUE;
1034            start_range = i;
1035         }
1036         else if (!tempsUsed[i] && inside_range) {
1037            struct tgsi_full_declaration fulldecl;
1038
1039            inside_range = GL_FALSE;
1040            fulldecl = make_temp_decl( start_range, i - 1 );
1041            ti += tgsi_build_full_declaration(
1042               &fulldecl,
1043               &tokens[ti],
1044               header,
1045               maxTokens - ti );
1046         }
1047      }
1048   }
1049
1050   /* Declare address register.
1051   */
1052   if (program->NumAddressRegs > 0) {
1053      struct tgsi_full_declaration fulldecl;
1054
1055      assert( program->NumAddressRegs == 1 );
1056
1057      fulldecl = make_addr_decl( 0, 0 );
1058      ti += tgsi_build_full_declaration(
1059         &fulldecl,
1060         &tokens[ti],
1061         header,
1062         maxTokens - ti );
1063
1064      indirectAccess = GL_TRUE;
1065   }
1066
1067   /* immediates/literals */
1068   memset(immediates, ~0, sizeof(immediates));
1069
1070   /* Emit immediates only when there is no address register in use.
1071    * FIXME: Be smarter and recognize param arrays -- indirect addressing is
1072    *        only valid within the referenced array.
1073    */
1074   if (program->Parameters && !indirectAccess) {
1075      for (i = 0; i < program->Parameters->NumParameters; i++) {
1076         if (program->Parameters->Parameters[i].Type == PROGRAM_CONSTANT) {
1077            struct tgsi_full_immediate fullimm;
1078
1079            fullimm = make_immediate( program->Parameters->ParameterValues[i], 4 );
1080            ti += tgsi_build_full_immediate(
1081               &fullimm,
1082               &tokens[ti],
1083               header,
1084               maxTokens - ti );
1085            immediates[i] = numImmediates;
1086            numImmediates++;
1087         }
1088      }
1089   }
1090
1091   /* constant buffer refs */
1092   if (program->Parameters) {
1093      GLint start = -1, end = -1;
1094
1095      for (i = 0; i < program->Parameters->NumParameters; i++) {
1096         GLboolean emit = (i == program->Parameters->NumParameters - 1);
1097         GLboolean matches;
1098
1099         switch (program->Parameters->Parameters[i].Type) {
1100         case PROGRAM_ENV_PARAM:
1101         case PROGRAM_STATE_VAR:
1102         case PROGRAM_NAMED_PARAM:
1103         case PROGRAM_UNIFORM:
1104            matches = GL_TRUE;
1105            break;
1106         case PROGRAM_CONSTANT:
1107            matches = indirectAccess;
1108            break;
1109         default:
1110            matches = GL_FALSE;
1111         }
1112
1113         if (matches) {
1114            if (start == -1) {
1115               /* begin a sequence */
1116               start = i;
1117               end = i;
1118            }
1119            else {
1120               /* continue sequence */
1121               end = i;
1122            }
1123         }
1124         else {
1125            if (start != -1) {
1126               /* end of sequence */
1127               emit = GL_TRUE;
1128            }
1129         }
1130
1131         if (emit && start >= 0) {
1132            struct tgsi_full_declaration fulldecl;
1133
1134            fulldecl = make_constant_decl( start, end );
1135            ti += tgsi_build_full_declaration(
1136               &fulldecl,
1137               &tokens[ti],
1138               header,
1139               maxTokens - ti );
1140            start = end = -1;
1141         }
1142      }
1143   }
1144
1145   /* texture samplers */
1146   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1147      if (program->SamplersUsed & (1 << i)) {
1148         struct tgsi_full_declaration fulldecl;
1149
1150         fulldecl = make_sampler_decl( i );
1151         ti += tgsi_build_full_declaration(
1152            &fulldecl,
1153            &tokens[ti],
1154            header,
1155            maxTokens - ti );
1156      }
1157   }
1158
1159   /* invert WPOS fragment input */
1160   if (wposTemp >= 0) {
1161      ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst,
1162                               inputMapping[FRAG_ATTRIB_WPOS],
1163                               header, maxTokens - ti);
1164      preamble_size = 2; /* two instructions added */
1165   }
1166
1167   for (i = 0; i < program->NumInstructions; i++) {
1168      struct tgsi_full_instruction fullinst;
1169
1170      compile_instruction(
1171         &program->Instructions[i],
1172         &fullinst,
1173         inputMapping,
1174         outputMapping,
1175         immediates,
1176         indirectAccess,
1177         preamble_size,
1178         procType,
1179         &insideSubroutine,
1180         wposTemp);
1181
1182      ti += tgsi_build_full_instruction(
1183         &fullinst,
1184         &tokens[ti],
1185         header,
1186         maxTokens - ti );
1187   }
1188
1189#if DEBUG
1190   if(!tgsi_sanity_check(tokens)) {
1191      debug_printf("Due to sanity check failure(s) above the following shader program is invalid:\n");
1192      debug_printf("\nOriginal program:\n%s", program->String);
1193      debug_printf("\nMesa program:\n");
1194      _mesa_print_program(program);
1195      debug_printf("\nTGSI program:\n");
1196      tgsi_dump(tokens, 0);
1197      assert(0);
1198   }
1199#endif
1200
1201   return ti;
1202}
1203