st_mesa_to_tgsi.c revision cb90c43676c258419e4b617c908570891d3674cb
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * \author
30 * Michal Krol
31 */
32
33#include "pipe/p_compiler.h"
34#include "pipe/p_shader_tokens.h"
35#include "tgsi/tgsi_parse.h"
36#include "tgsi/tgsi_build.h"
37#include "tgsi/tgsi_util.h"
38#include "tgsi/tgsi_dump.h"
39#include "tgsi/tgsi_sanity.h"
40#include "st_mesa_to_tgsi.h"
41#include "shader/prog_instruction.h"
42#include "shader/prog_parameter.h"
43#include "shader/prog_print.h"
44#include "util/u_debug.h"
45
46/*
47 * Map mesa register file to TGSI register file.
48 */
49static GLuint
50map_register_file(
51   gl_register_file file,
52   GLuint index,
53   const GLuint immediateMapping[],
54   GLboolean indirectAccess )
55{
56   switch( file ) {
57   case PROGRAM_UNDEFINED:
58      return TGSI_FILE_NULL;
59   case PROGRAM_TEMPORARY:
60      return TGSI_FILE_TEMPORARY;
61   /*case PROGRAM_LOCAL_PARAM:*/
62   /*case PROGRAM_ENV_PARAM:*/
63
64      /* Because of the longstanding problem with mesa arb shaders
65       * where constants, immediates and state variables are all
66       * bundled together as PROGRAM_STATE_VAR, we can't tell from the
67       * mesa register file whether this is a CONSTANT or an
68       * IMMEDIATE, hence we need all the other information.
69       */
70   case PROGRAM_STATE_VAR:
71   case PROGRAM_NAMED_PARAM:
72   case PROGRAM_UNIFORM:
73      if (!indirectAccess && immediateMapping && immediateMapping[index] != ~0)
74         return TGSI_FILE_IMMEDIATE;
75      else
76	 return TGSI_FILE_CONSTANT;
77   case PROGRAM_CONSTANT:
78      if (indirectAccess)
79         return TGSI_FILE_CONSTANT;
80      assert(immediateMapping[index] != ~0);
81      return TGSI_FILE_IMMEDIATE;
82   case PROGRAM_INPUT:
83      return TGSI_FILE_INPUT;
84   case PROGRAM_OUTPUT:
85      return TGSI_FILE_OUTPUT;
86   case PROGRAM_ADDRESS:
87      return TGSI_FILE_ADDRESS;
88   default:
89      assert( 0 );
90      return TGSI_FILE_NULL;
91   }
92}
93
94/**
95 * Map mesa register file index to TGSI index.
96 * Take special care when processing input and output indices.
97 * \param file  one of TGSI_FILE_x
98 * \param index  the mesa register file index
99 * \param inputMapping  maps Mesa input indexes to TGSI input indexes
100 * \param outputMapping  maps Mesa output indexes to TGSI output indexes
101 */
102static GLuint
103map_register_file_index(
104   GLuint procType,
105   GLuint file,
106   GLuint index,
107   GLuint *swizzle,
108   const GLuint inputMapping[],
109   const GLuint outputMapping[],
110   const GLuint immediateMapping[],
111   GLboolean indirectAccess )
112{
113   switch( file ) {
114   case TGSI_FILE_INPUT:
115      if (procType == TGSI_PROCESSOR_FRAGMENT &&
116          index == FRAG_ATTRIB_FOGC) {
117         if (GET_SWZ(*swizzle, 0) == SWIZZLE_X) {
118            /* do nothing we're, ok */
119         } else if (GET_SWZ(*swizzle, 0) == SWIZZLE_Y) {
120            /* replace the swizzle with xxxx */
121            *swizzle = MAKE_SWIZZLE4(SWIZZLE_X,
122                                     SWIZZLE_X,
123                                     SWIZZLE_X,
124                                     SWIZZLE_X);
125            /* register after fog */
126            return inputMapping[index] + 1;
127         } else {
128            *swizzle = MAKE_SWIZZLE4(SWIZZLE_Z,
129                                     SWIZZLE_W,
130                                     SWIZZLE_Z,
131                                     SWIZZLE_W);
132            /* register after frontface */
133            return inputMapping[index] + 2;
134         }
135      }
136      /* inputs are mapped according to the user-defined map */
137      return inputMapping[index];
138
139   case TGSI_FILE_OUTPUT:
140      return outputMapping[index];
141
142   case TGSI_FILE_IMMEDIATE:
143      if (indirectAccess)
144         return index;
145      assert(immediateMapping[index] != ~0);
146      return immediateMapping[index];
147
148   default:
149      return index;
150   }
151}
152
153/*
154 * Map mesa texture target to TGSI texture target.
155 */
156static GLuint
157map_texture_target(
158    GLuint textarget,
159    GLboolean shadow )
160{
161   switch( textarget ) {
162   case TEXTURE_1D_INDEX:
163      if (shadow)
164         return TGSI_TEXTURE_SHADOW1D;
165      else
166         return TGSI_TEXTURE_1D;
167   case TEXTURE_2D_INDEX:
168      if (shadow)
169         return TGSI_TEXTURE_SHADOW2D;
170      else
171         return TGSI_TEXTURE_2D;
172   case TEXTURE_3D_INDEX:
173      return TGSI_TEXTURE_3D;
174   case TEXTURE_CUBE_INDEX:
175      return TGSI_TEXTURE_CUBE;
176   case TEXTURE_RECT_INDEX:
177      if (shadow)
178         return TGSI_TEXTURE_SHADOWRECT;
179      else
180         return TGSI_TEXTURE_RECT;
181   default:
182      assert( 0 );
183   }
184
185   return TGSI_TEXTURE_1D;
186}
187
188static GLuint
189convert_sat(
190   GLuint sat )
191{
192   switch( sat ) {
193   case SATURATE_OFF:
194      return TGSI_SAT_NONE;
195   case SATURATE_ZERO_ONE:
196      return TGSI_SAT_ZERO_ONE;
197   case SATURATE_PLUS_MINUS_ONE:
198      return TGSI_SAT_MINUS_PLUS_ONE;
199   default:
200      assert( 0 );
201      return TGSI_SAT_NONE;
202   }
203}
204
205static GLuint
206convert_writemask(
207   GLuint writemask )
208{
209   assert( WRITEMASK_X == TGSI_WRITEMASK_X );
210   assert( WRITEMASK_Y == TGSI_WRITEMASK_Y );
211   assert( WRITEMASK_Z == TGSI_WRITEMASK_Z );
212   assert( WRITEMASK_W == TGSI_WRITEMASK_W );
213   assert( (writemask & ~TGSI_WRITEMASK_XYZW) == 0 );
214
215   return writemask;
216}
217
218static struct tgsi_full_immediate
219make_immediate(const float *value, uint size)
220{
221   struct tgsi_full_immediate imm;
222   unsigned i;
223
224   imm = tgsi_default_full_immediate();
225   imm.Immediate.NrTokens += size;
226   imm.Immediate.DataType = TGSI_IMM_FLOAT32;
227
228   for (i = 0; i < size; i++)
229      imm.u[i].Float = value[i];
230
231   return imm;
232}
233
234static void
235compile_instruction(
236   const struct prog_instruction *inst,
237   struct tgsi_full_instruction *fullinst,
238   const GLuint inputMapping[],
239   const GLuint outputMapping[],
240   const GLuint immediateMapping[],
241   GLboolean indirectAccess,
242   GLuint preamble_size,
243   GLuint procType,
244   GLboolean *insideSubroutine,
245   GLint wposTemp)
246{
247   GLuint i;
248   struct tgsi_full_dst_register *fulldst;
249   struct tgsi_full_src_register *fullsrc;
250
251   *fullinst = tgsi_default_full_instruction();
252
253   fullinst->Instruction.Saturate = convert_sat( inst->SaturateMode );
254   fullinst->Instruction.NumDstRegs = _mesa_num_inst_dst_regs( inst->Opcode );
255   fullinst->Instruction.NumSrcRegs = _mesa_num_inst_src_regs( inst->Opcode );
256
257   fulldst = &fullinst->FullDstRegisters[0];
258   fulldst->DstRegister.File = map_register_file( inst->DstReg.File, 0, NULL, GL_FALSE );
259   fulldst->DstRegister.Index = map_register_file_index(
260      procType,
261      fulldst->DstRegister.File,
262      inst->DstReg.Index,
263      NULL,
264      inputMapping,
265      outputMapping,
266      NULL,
267      GL_FALSE );
268   fulldst->DstRegister.WriteMask = convert_writemask( inst->DstReg.WriteMask );
269   if (inst->DstReg.RelAddr) {
270      fulldst->DstRegister.Indirect = 1;
271      fulldst->DstRegisterInd.File = TGSI_FILE_ADDRESS;
272      fulldst->DstRegisterInd.Index = 0;
273   }
274
275   for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
276      GLuint j;
277      GLuint swizzle = inst->SrcReg[i].Swizzle;
278
279      fullsrc = &fullinst->FullSrcRegisters[i];
280
281      if (procType == TGSI_PROCESSOR_FRAGMENT &&
282          inst->SrcReg[i].File == PROGRAM_INPUT &&
283          inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
284         /* special case of INPUT[WPOS] */
285         fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY;
286         fullsrc->SrcRegister.Index = wposTemp;
287      }
288      else {
289         /* any other src register */
290         fullsrc->SrcRegister.File = map_register_file(
291            inst->SrcReg[i].File,
292            inst->SrcReg[i].Index,
293            immediateMapping,
294            indirectAccess );
295         fullsrc->SrcRegister.Index = map_register_file_index(
296            procType,
297            fullsrc->SrcRegister.File,
298            inst->SrcReg[i].Index,
299            &swizzle,
300            inputMapping,
301            outputMapping,
302            immediateMapping,
303            indirectAccess );
304      }
305
306      /* swizzle (ext swizzle also depends on negation) */
307      {
308         GLuint swz[4];
309         GLboolean extended = (inst->SrcReg[i].Negate != NEGATE_NONE &&
310                               inst->SrcReg[i].Negate != NEGATE_XYZW);
311         for( j = 0; j < 4; j++ ) {
312            swz[j] = GET_SWZ( swizzle, j );
313            if (swz[j] > SWIZZLE_W)
314               extended = GL_TRUE;
315         }
316         if (extended) {
317            for (j = 0; j < 4; j++) {
318               tgsi_util_set_src_register_extswizzle(&fullsrc->SrcRegisterExtSwz,
319                                                     swz[j], j);
320            }
321         }
322         else {
323            for (j = 0; j < 4; j++) {
324               tgsi_util_set_src_register_swizzle(&fullsrc->SrcRegister,
325                                                  swz[j], j);
326            }
327         }
328      }
329
330      if( inst->SrcReg[i].Negate == NEGATE_XYZW ) {
331         fullsrc->SrcRegister.Negate = 1;
332      }
333      else if( inst->SrcReg[i].Negate != NEGATE_NONE ) {
334         if( inst->SrcReg[i].Negate & NEGATE_X ) {
335            fullsrc->SrcRegisterExtSwz.NegateX = 1;
336         }
337         if( inst->SrcReg[i].Negate & NEGATE_Y ) {
338            fullsrc->SrcRegisterExtSwz.NegateY = 1;
339         }
340         if( inst->SrcReg[i].Negate & NEGATE_Z ) {
341            fullsrc->SrcRegisterExtSwz.NegateZ = 1;
342         }
343         if( inst->SrcReg[i].Negate & NEGATE_W ) {
344            fullsrc->SrcRegisterExtSwz.NegateW = 1;
345         }
346      }
347
348      if( inst->SrcReg[i].Abs ) {
349         fullsrc->SrcRegisterExtMod.Absolute = 1;
350      }
351
352      if( inst->SrcReg[i].RelAddr ) {
353         fullsrc->SrcRegister.Indirect = 1;
354
355         fullsrc->SrcRegisterInd.File = TGSI_FILE_ADDRESS;
356         fullsrc->SrcRegisterInd.Index = 0;
357      }
358   }
359
360   switch( inst->Opcode ) {
361   case OPCODE_ARL:
362      fullinst->Instruction.Opcode = TGSI_OPCODE_ARL;
363      break;
364   case OPCODE_ABS:
365      fullinst->Instruction.Opcode = TGSI_OPCODE_ABS;
366      break;
367   case OPCODE_ADD:
368      fullinst->Instruction.Opcode = TGSI_OPCODE_ADD;
369      break;
370   case OPCODE_BGNLOOP:
371      fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP;
372      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
373      break;
374   case OPCODE_BGNSUB:
375      fullinst->Instruction.Opcode = TGSI_OPCODE_BGNSUB;
376      *insideSubroutine = GL_TRUE;
377      break;
378   case OPCODE_BRA:
379      fullinst->Instruction.Opcode = TGSI_OPCODE_BRA;
380      break;
381   case OPCODE_BRK:
382      fullinst->Instruction.Opcode = TGSI_OPCODE_BRK;
383      break;
384   case OPCODE_CAL:
385      fullinst->Instruction.Opcode = TGSI_OPCODE_CAL;
386      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
387      break;
388   case OPCODE_CMP:
389      fullinst->Instruction.Opcode = TGSI_OPCODE_CMP;
390      break;
391   case OPCODE_CONT:
392      fullinst->Instruction.Opcode = TGSI_OPCODE_CONT;
393      break;
394   case OPCODE_COS:
395      fullinst->Instruction.Opcode = TGSI_OPCODE_COS;
396      break;
397   case OPCODE_DDX:
398      fullinst->Instruction.Opcode = TGSI_OPCODE_DDX;
399      break;
400   case OPCODE_DDY:
401      fullinst->Instruction.Opcode = TGSI_OPCODE_DDY;
402      break;
403   case OPCODE_DP2:
404      fullinst->Instruction.Opcode = TGSI_OPCODE_DP2;
405      break;
406   case OPCODE_DP2A:
407      fullinst->Instruction.Opcode = TGSI_OPCODE_DP2A;
408      break;
409   case OPCODE_DP3:
410      fullinst->Instruction.Opcode = TGSI_OPCODE_DP3;
411      break;
412   case OPCODE_DP4:
413      fullinst->Instruction.Opcode = TGSI_OPCODE_DP4;
414      break;
415   case OPCODE_DPH:
416      fullinst->Instruction.Opcode = TGSI_OPCODE_DPH;
417      break;
418   case OPCODE_DST:
419      fullinst->Instruction.Opcode = TGSI_OPCODE_DST;
420      break;
421   case OPCODE_ELSE:
422      fullinst->Instruction.Opcode = TGSI_OPCODE_ELSE;
423      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
424      break;
425   case OPCODE_ENDIF:
426      fullinst->Instruction.Opcode = TGSI_OPCODE_ENDIF;
427      break;
428   case OPCODE_ENDLOOP:
429      fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP;
430      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
431      break;
432   case OPCODE_ENDSUB:
433      fullinst->Instruction.Opcode = TGSI_OPCODE_ENDSUB;
434      *insideSubroutine = GL_FALSE;
435      break;
436   case OPCODE_EX2:
437      fullinst->Instruction.Opcode = TGSI_OPCODE_EX2;
438      break;
439   case OPCODE_EXP:
440      fullinst->Instruction.Opcode = TGSI_OPCODE_EXP;
441      break;
442   case OPCODE_FLR:
443      fullinst->Instruction.Opcode = TGSI_OPCODE_FLR;
444      break;
445   case OPCODE_FRC:
446      fullinst->Instruction.Opcode = TGSI_OPCODE_FRC;
447      break;
448   case OPCODE_IF:
449      fullinst->Instruction.Opcode = TGSI_OPCODE_IF;
450      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
451      break;
452   case OPCODE_TRUNC:
453      fullinst->Instruction.Opcode = TGSI_OPCODE_TRUNC;
454      break;
455   case OPCODE_KIL:
456      /* conditional */
457      fullinst->Instruction.Opcode = TGSI_OPCODE_KIL;
458      break;
459   case OPCODE_KIL_NV:
460      /* predicated */
461      assert(inst->DstReg.CondMask == COND_TR);
462      fullinst->Instruction.Opcode = TGSI_OPCODE_KILP;
463      break;
464   case OPCODE_LG2:
465      fullinst->Instruction.Opcode = TGSI_OPCODE_LG2;
466      break;
467   case OPCODE_LOG:
468      fullinst->Instruction.Opcode = TGSI_OPCODE_LOG;
469      break;
470   case OPCODE_LIT:
471      fullinst->Instruction.Opcode = TGSI_OPCODE_LIT;
472      break;
473   case OPCODE_LRP:
474      fullinst->Instruction.Opcode = TGSI_OPCODE_LRP;
475      break;
476   case OPCODE_MAD:
477      fullinst->Instruction.Opcode = TGSI_OPCODE_MAD;
478      break;
479   case OPCODE_MAX:
480      fullinst->Instruction.Opcode = TGSI_OPCODE_MAX;
481      break;
482   case OPCODE_MIN:
483      fullinst->Instruction.Opcode = TGSI_OPCODE_MIN;
484      break;
485   case OPCODE_MOV:
486      fullinst->Instruction.Opcode = TGSI_OPCODE_MOV;
487      break;
488   case OPCODE_MUL:
489      fullinst->Instruction.Opcode = TGSI_OPCODE_MUL;
490      break;
491   case OPCODE_NOISE1:
492      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE1;
493      break;
494   case OPCODE_NOISE2:
495      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE2;
496      break;
497   case OPCODE_NOISE3:
498      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE3;
499      break;
500   case OPCODE_NOISE4:
501      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE4;
502      break;
503   case OPCODE_NOP:
504      fullinst->Instruction.Opcode = TGSI_OPCODE_NOP;
505      break;
506   case OPCODE_NRM3:
507      fullinst->Instruction.Opcode = TGSI_OPCODE_NRM;
508      break;
509   case OPCODE_NRM4:
510      fullinst->Instruction.Opcode = TGSI_OPCODE_NRM4;
511      break;
512   case OPCODE_POW:
513      fullinst->Instruction.Opcode = TGSI_OPCODE_POW;
514      break;
515   case OPCODE_RCP:
516      fullinst->Instruction.Opcode = TGSI_OPCODE_RCP;
517      break;
518   case OPCODE_RET:
519      /* If RET is used inside main (not a real subroutine) we may want
520       * to execute END instead of RET.  TBD...
521       */
522      if (1 /*  *insideSubroutine */) {
523         fullinst->Instruction.Opcode = TGSI_OPCODE_RET;
524      }
525      else {
526         /* inside main() pseudo-function */
527         fullinst->Instruction.Opcode = TGSI_OPCODE_END;
528      }
529      break;
530   case OPCODE_RSQ:
531      fullinst->Instruction.Opcode = TGSI_OPCODE_RSQ;
532      break;
533   case OPCODE_SCS:
534      fullinst->Instruction.Opcode = TGSI_OPCODE_SCS;
535      fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XY;
536      break;
537   case OPCODE_SEQ:
538      fullinst->Instruction.Opcode = TGSI_OPCODE_SEQ;
539      break;
540   case OPCODE_SGE:
541      fullinst->Instruction.Opcode = TGSI_OPCODE_SGE;
542      break;
543   case OPCODE_SGT:
544      fullinst->Instruction.Opcode = TGSI_OPCODE_SGT;
545      break;
546   case OPCODE_SIN:
547      fullinst->Instruction.Opcode = TGSI_OPCODE_SIN;
548      break;
549   case OPCODE_SLE:
550      fullinst->Instruction.Opcode = TGSI_OPCODE_SLE;
551      break;
552   case OPCODE_SLT:
553      fullinst->Instruction.Opcode = TGSI_OPCODE_SLT;
554      break;
555   case OPCODE_SNE:
556      fullinst->Instruction.Opcode = TGSI_OPCODE_SNE;
557      break;
558   case OPCODE_SSG:
559      fullinst->Instruction.Opcode = TGSI_OPCODE_SSG;
560      break;
561   case OPCODE_SUB:
562      fullinst->Instruction.Opcode = TGSI_OPCODE_SUB;
563      break;
564   case OPCODE_SWZ:
565      fullinst->Instruction.Opcode = TGSI_OPCODE_SWZ;
566      break;
567   case OPCODE_TEX:
568      /* ordinary texture lookup */
569      fullinst->Instruction.Opcode = TGSI_OPCODE_TEX;
570      fullinst->Instruction.NumSrcRegs = 2;
571      fullinst->InstructionExtTexture.Texture =
572         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
573      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
574      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
575      break;
576   case OPCODE_TXB:
577      /* texture lookup with LOD bias */
578      fullinst->Instruction.Opcode = TGSI_OPCODE_TXB;
579      fullinst->Instruction.NumSrcRegs = 2;
580      fullinst->InstructionExtTexture.Texture =
581         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
582      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
583      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
584      break;
585   case OPCODE_TXD:
586      /* texture lookup with explicit partial derivatives */
587      fullinst->Instruction.Opcode = TGSI_OPCODE_TXD;
588      fullinst->Instruction.NumSrcRegs = 4;
589      fullinst->InstructionExtTexture.Texture =
590         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
591      /* src[0] = coord, src[1] = d[strq]/dx, src[2] = d[strq]/dy */
592      fullinst->FullSrcRegisters[3].SrcRegister.File = TGSI_FILE_SAMPLER;
593      fullinst->FullSrcRegisters[3].SrcRegister.Index = inst->TexSrcUnit;
594      break;
595   case OPCODE_TXL:
596      /* texture lookup with explicit LOD */
597      fullinst->Instruction.Opcode = TGSI_OPCODE_TXL;
598      fullinst->Instruction.NumSrcRegs = 2;
599      fullinst->InstructionExtTexture.Texture =
600         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
601      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
602      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
603      break;
604   case OPCODE_TXP:
605      /* texture lookup with divide by Q component */
606      /* convert to TEX w/ special flag for division */
607      fullinst->Instruction.Opcode = TGSI_OPCODE_TXP;
608      fullinst->Instruction.NumSrcRegs = 2;
609      fullinst->InstructionExtTexture.Texture =
610         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
611      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
612      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
613      break;
614   case OPCODE_XPD:
615      fullinst->Instruction.Opcode = TGSI_OPCODE_XPD;
616      fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XYZ;
617      break;
618   case OPCODE_END:
619      fullinst->Instruction.Opcode = TGSI_OPCODE_END;
620      break;
621   default:
622      assert( 0 );
623   }
624}
625
626/**
627 * \param usage_mask  bitfield of TGSI_WRITEMASK_{XYZW} tokens
628 */
629static struct tgsi_full_declaration
630make_input_decl(
631   GLuint index,
632   GLboolean interpolate_info,
633   GLuint interpolate,
634   GLuint usage_mask,
635   GLboolean semantic_info,
636   GLuint semantic_name,
637   GLbitfield semantic_index,
638   GLbitfield input_flags)
639{
640   struct tgsi_full_declaration decl;
641
642   assert(semantic_name < TGSI_SEMANTIC_COUNT);
643
644   decl = tgsi_default_full_declaration();
645   decl.Declaration.File = TGSI_FILE_INPUT;
646   decl.Declaration.UsageMask = usage_mask;
647   decl.Declaration.Semantic = semantic_info;
648   decl.DeclarationRange.First = index;
649   decl.DeclarationRange.Last = index;
650   if (semantic_info) {
651      decl.Semantic.SemanticName = semantic_name;
652      decl.Semantic.SemanticIndex = semantic_index;
653   }
654   if (interpolate_info) {
655      decl.Declaration.Interpolate = interpolate;
656   }
657   if (input_flags & PROG_PARAM_BIT_CENTROID)
658      decl.Declaration.Centroid = 1;
659   if (input_flags & PROG_PARAM_BIT_INVARIANT)
660      decl.Declaration.Invariant = 1;
661
662   return decl;
663}
664
665/**
666 * \param usage_mask  bitfield of TGSI_WRITEMASK_{XYZW} tokens
667 */
668static struct tgsi_full_declaration
669make_output_decl(
670   GLuint index,
671   GLuint semantic_name,
672   GLuint semantic_index,
673   GLuint usage_mask,
674   GLbitfield output_flags)
675{
676   struct tgsi_full_declaration decl;
677
678   assert(semantic_name < TGSI_SEMANTIC_COUNT);
679
680   decl = tgsi_default_full_declaration();
681   decl.Declaration.File = TGSI_FILE_OUTPUT;
682   decl.Declaration.UsageMask = usage_mask;
683   decl.Declaration.Semantic = 1;
684   decl.DeclarationRange.First = index;
685   decl.DeclarationRange.Last = index;
686   decl.Semantic.SemanticName = semantic_name;
687   decl.Semantic.SemanticIndex = semantic_index;
688   if (output_flags & PROG_PARAM_BIT_CENTROID)
689      decl.Declaration.Centroid = 1;
690   if (output_flags & PROG_PARAM_BIT_INVARIANT)
691      decl.Declaration.Invariant = 1;
692
693   return decl;
694}
695
696
697static struct tgsi_full_declaration
698make_temp_decl(
699   GLuint start_index,
700   GLuint end_index )
701{
702   struct tgsi_full_declaration decl;
703   decl = tgsi_default_full_declaration();
704   decl.Declaration.File = TGSI_FILE_TEMPORARY;
705   decl.DeclarationRange.First = start_index;
706   decl.DeclarationRange.Last = end_index;
707   return decl;
708}
709
710static struct tgsi_full_declaration
711make_addr_decl(
712   GLuint start_index,
713   GLuint end_index )
714{
715   struct tgsi_full_declaration decl;
716
717   decl = tgsi_default_full_declaration();
718   decl.Declaration.File = TGSI_FILE_ADDRESS;
719   decl.DeclarationRange.First = start_index;
720   decl.DeclarationRange.Last = end_index;
721   return decl;
722}
723
724static struct tgsi_full_declaration
725make_sampler_decl(GLuint index)
726{
727   struct tgsi_full_declaration decl;
728   decl = tgsi_default_full_declaration();
729   decl.Declaration.File = TGSI_FILE_SAMPLER;
730   decl.DeclarationRange.First = index;
731   decl.DeclarationRange.Last = index;
732   return decl;
733}
734
735/** Reference into a constant buffer */
736static struct tgsi_full_declaration
737make_constant_decl(GLuint first, GLuint last)
738{
739   struct tgsi_full_declaration decl;
740   decl = tgsi_default_full_declaration();
741   decl.Declaration.File = TGSI_FILE_CONSTANT;
742   decl.DeclarationRange.First = first;
743   decl.DeclarationRange.Last = last;
744   return decl;
745}
746
747
748
749/**
750 * Find the temporaries which are used in the given program.
751 */
752static void
753find_temporaries(const struct gl_program *program,
754                 GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
755{
756   GLuint i, j;
757
758   for (i = 0; i < MAX_PROGRAM_TEMPS; i++)
759      tempsUsed[i] = GL_FALSE;
760
761   for (i = 0; i < program->NumInstructions; i++) {
762      const struct prog_instruction *inst = program->Instructions + i;
763      const GLuint n = _mesa_num_inst_src_regs( inst->Opcode );
764      for (j = 0; j < n; j++) {
765         if (inst->SrcReg[j].File == PROGRAM_TEMPORARY)
766            tempsUsed[inst->SrcReg[j].Index] = GL_TRUE;
767         if (inst->DstReg.File == PROGRAM_TEMPORARY)
768            tempsUsed[inst->DstReg.Index] = GL_TRUE;
769      }
770   }
771}
772
773
774/**
775 * Find an unused temporary in the tempsUsed array.
776 */
777static int
778find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
779{
780   int i;
781   for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
782      if (!tempsUsed[i]) {
783         tempsUsed[i] = GL_TRUE;
784         return i;
785      }
786   }
787   return -1;
788}
789
790
791/** helper for building simple TGSI instruction, one src register */
792static void
793build_tgsi_instruction1(struct tgsi_full_instruction *inst,
794                        int opcode,
795                        int dstFile, int dstIndex, int writemask,
796                        int srcFile1, int srcIndex1)
797{
798   *inst = tgsi_default_full_instruction();
799
800   inst->Instruction.Opcode = opcode;
801
802   inst->Instruction.NumDstRegs = 1;
803   inst->FullDstRegisters[0].DstRegister.File = dstFile;
804   inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
805   inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
806
807   inst->Instruction.NumSrcRegs = 1;
808   inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
809   inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
810}
811
812
813/** helper for building simple TGSI instruction, two src registers */
814static void
815build_tgsi_instruction2(struct tgsi_full_instruction *inst,
816                        int opcode,
817                        int dstFile, int dstIndex, int writemask,
818                        int srcFile1, int srcIndex1,
819                        int srcFile2, int srcIndex2)
820{
821   *inst = tgsi_default_full_instruction();
822
823   inst->Instruction.Opcode = opcode;
824
825   inst->Instruction.NumDstRegs = 1;
826   inst->FullDstRegisters[0].DstRegister.File = dstFile;
827   inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
828   inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
829
830   inst->Instruction.NumSrcRegs = 2;
831   inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
832   inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
833   inst->FullSrcRegisters[1].SrcRegister.File = srcFile2;
834   inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2;
835}
836
837
838
839/**
840 * Emit the TGSI instructions for inverting the WPOS y coordinate.
841 */
842static int
843emit_inverted_wpos(struct tgsi_token *tokens,
844                   int wpos_temp,
845                   int winsize_const,
846                   int wpos_input,
847                   struct tgsi_header *header, int maxTokens)
848{
849   struct tgsi_full_instruction fullinst;
850   int ti = 0;
851
852   /* MOV wpos_temp.xzw, input[wpos]; */
853   build_tgsi_instruction1(&fullinst,
854                           TGSI_OPCODE_MOV,
855                           TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW,
856                           TGSI_FILE_INPUT, 0);
857
858   ti += tgsi_build_full_instruction(&fullinst,
859                                     &tokens[ti],
860                                     header,
861                                     maxTokens - ti);
862
863   /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */
864   build_tgsi_instruction2(&fullinst,
865                           TGSI_OPCODE_SUB,
866                           TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y,
867                           TGSI_FILE_CONSTANT, winsize_const,
868                           TGSI_FILE_INPUT, wpos_input);
869
870   ti += tgsi_build_full_instruction(&fullinst,
871                                     &tokens[ti],
872                                     header,
873                                     maxTokens - ti);
874
875   return ti;
876}
877
878
879
880
881/**
882 * Translate Mesa program to TGSI format.
883 * \param program  the program to translate
884 * \param numInputs  number of input registers used
885 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
886 *                      input indexes
887 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
888 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for each input
889 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
890
891 * \param numOutputs  number of output registers used
892 * \param outputMapping  maps Mesa fragment program outputs to TGSI
893 *                       generic outputs
894 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
895 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for each output
896 * \param tokens  array to store translated tokens in
897 * \param maxTokens  size of the tokens array
898 *
899 * \return number of tokens placed in 'tokens' buffer, or zero if error
900 */
901GLuint
902st_translate_mesa_program(
903   GLcontext *ctx,
904   uint procType,
905   const struct gl_program *program,
906   GLuint numInputs,
907   const GLuint inputMapping[],
908   const ubyte inputSemanticName[],
909   const ubyte inputSemanticIndex[],
910   const GLuint interpMode[],
911   const GLbitfield inputFlags[],
912   GLuint numOutputs,
913   const GLuint outputMapping[],
914   const ubyte outputSemanticName[],
915   const ubyte outputSemanticIndex[],
916   const GLbitfield outputFlags[],
917   struct tgsi_token *tokens,
918   GLuint maxTokens )
919{
920   GLuint i;
921   GLuint ti;  /* token index */
922   struct tgsi_header *header;
923   struct tgsi_processor *processor;
924   GLuint preamble_size = 0;
925   GLuint immediates[1000];
926   GLuint numImmediates = 0;
927   GLboolean insideSubroutine = GL_FALSE;
928   GLboolean indirectAccess = GL_FALSE;
929   GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
930   GLint wposTemp = -1, winHeightConst = -1;
931
932   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
933          procType == TGSI_PROCESSOR_VERTEX);
934
935   find_temporaries(program, tempsUsed);
936
937   if (procType == TGSI_PROCESSOR_FRAGMENT) {
938      if (program->InputsRead & FRAG_BIT_WPOS) {
939         /* Fragment program uses fragment position input.
940          * Need to replace instances of INPUT[WPOS] with temp T
941          * where T = INPUT[WPOS] by y is inverted.
942          */
943         static const gl_state_index winSizeState[STATE_LENGTH]
944            = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
945         winHeightConst = _mesa_add_state_reference(program->Parameters,
946                                                    winSizeState);
947         wposTemp = find_free_temporary(tempsUsed);
948      }
949   }
950
951
952   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
953
954   header = (struct tgsi_header *) &tokens[1];
955   *header = tgsi_build_header();
956
957   processor = (struct tgsi_processor *) &tokens[2];
958   *processor = tgsi_build_processor( procType, header );
959
960   ti = 3;
961
962   /*
963    * Declare input attributes.
964    */
965   if (procType == TGSI_PROCESSOR_FRAGMENT) {
966      for (i = 0; i < numInputs; i++) {
967         struct tgsi_full_declaration fulldecl;
968         fulldecl = make_input_decl(i,
969                                    GL_TRUE, interpMode[i],
970                                    TGSI_WRITEMASK_XYZW,
971                                    GL_TRUE, inputSemanticName[i],
972                                    inputSemanticIndex[i],
973                                    inputFlags[i]);
974         ti += tgsi_build_full_declaration(&fulldecl,
975                                           &tokens[ti],
976                                           header,
977                                           maxTokens - ti );
978      }
979   }
980   else {
981      /* vertex prog */
982      /* XXX: this could probaby be merged with the clause above.
983       * the only difference is the semantic tags.
984       */
985      for (i = 0; i < numInputs; i++) {
986         struct tgsi_full_declaration fulldecl;
987         fulldecl = make_input_decl(i,
988                                    GL_FALSE, 0,
989                                    TGSI_WRITEMASK_XYZW,
990                                    GL_FALSE, 0, 0,
991                                    inputFlags[i]);
992         ti += tgsi_build_full_declaration(&fulldecl,
993                                           &tokens[ti],
994                                           header,
995                                           maxTokens - ti );
996      }
997   }
998
999   /*
1000    * Declare output attributes.
1001    */
1002   if (procType == TGSI_PROCESSOR_FRAGMENT) {
1003      for (i = 0; i < numOutputs; i++) {
1004         struct tgsi_full_declaration fulldecl;
1005         switch (outputSemanticName[i]) {
1006         case TGSI_SEMANTIC_POSITION:
1007            fulldecl = make_output_decl(i,
1008                                        TGSI_SEMANTIC_POSITION, /* Z / Depth */
1009                                        outputSemanticIndex[i],
1010                                        TGSI_WRITEMASK_Z,
1011                                        outputFlags[i]);
1012            break;
1013         case TGSI_SEMANTIC_COLOR:
1014            fulldecl = make_output_decl(i,
1015                                        TGSI_SEMANTIC_COLOR,
1016                                        outputSemanticIndex[i],
1017                                        TGSI_WRITEMASK_XYZW,
1018                                        outputFlags[i]);
1019            break;
1020         default:
1021            assert(0);
1022            return 0;
1023         }
1024         ti += tgsi_build_full_declaration(&fulldecl,
1025                                           &tokens[ti],
1026                                           header,
1027                                           maxTokens - ti );
1028      }
1029   }
1030   else {
1031      /* vertex prog */
1032      for (i = 0; i < numOutputs; i++) {
1033         struct tgsi_full_declaration fulldecl;
1034         fulldecl = make_output_decl(i,
1035                                     outputSemanticName[i],
1036                                     outputSemanticIndex[i],
1037                                     TGSI_WRITEMASK_XYZW,
1038                                     outputFlags[i]);
1039         ti += tgsi_build_full_declaration(&fulldecl,
1040                                           &tokens[ti],
1041                                           header,
1042                                           maxTokens - ti );
1043      }
1044   }
1045
1046   /* temporary decls */
1047   {
1048      GLboolean inside_range = GL_FALSE;
1049      GLuint start_range = 0;
1050
1051      tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE;
1052      for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) {
1053         if (tempsUsed[i] && !inside_range) {
1054            inside_range = GL_TRUE;
1055            start_range = i;
1056         }
1057         else if (!tempsUsed[i] && inside_range) {
1058            struct tgsi_full_declaration fulldecl;
1059
1060            inside_range = GL_FALSE;
1061            fulldecl = make_temp_decl( start_range, i - 1 );
1062            ti += tgsi_build_full_declaration(
1063               &fulldecl,
1064               &tokens[ti],
1065               header,
1066               maxTokens - ti );
1067         }
1068      }
1069   }
1070
1071   /* Declare address register.
1072   */
1073   if (program->NumAddressRegs > 0) {
1074      struct tgsi_full_declaration fulldecl;
1075
1076      assert( program->NumAddressRegs == 1 );
1077
1078      fulldecl = make_addr_decl( 0, 0 );
1079      ti += tgsi_build_full_declaration(
1080         &fulldecl,
1081         &tokens[ti],
1082         header,
1083         maxTokens - ti );
1084
1085      indirectAccess = GL_TRUE;
1086   }
1087
1088   /* immediates/literals */
1089   memset(immediates, ~0, sizeof(immediates));
1090
1091   /* Emit immediates only when there is no address register in use.
1092    * FIXME: Be smarter and recognize param arrays -- indirect addressing is
1093    *        only valid within the referenced array.
1094    */
1095   if (program->Parameters && !indirectAccess) {
1096      for (i = 0; i < program->Parameters->NumParameters; i++) {
1097         if (program->Parameters->Parameters[i].Type == PROGRAM_CONSTANT) {
1098            struct tgsi_full_immediate fullimm;
1099
1100            fullimm = make_immediate( program->Parameters->ParameterValues[i], 4 );
1101            ti += tgsi_build_full_immediate(
1102               &fullimm,
1103               &tokens[ti],
1104               header,
1105               maxTokens - ti );
1106            immediates[i] = numImmediates;
1107            numImmediates++;
1108         }
1109      }
1110   }
1111
1112   /* constant buffer refs */
1113   if (program->Parameters) {
1114      GLint start = -1, end = -1;
1115
1116      for (i = 0; i < program->Parameters->NumParameters; i++) {
1117         GLboolean emit = (i == program->Parameters->NumParameters - 1);
1118         GLboolean matches;
1119
1120         switch (program->Parameters->Parameters[i].Type) {
1121         case PROGRAM_ENV_PARAM:
1122         case PROGRAM_STATE_VAR:
1123         case PROGRAM_NAMED_PARAM:
1124         case PROGRAM_UNIFORM:
1125            matches = GL_TRUE;
1126            break;
1127         case PROGRAM_CONSTANT:
1128            matches = indirectAccess;
1129            break;
1130         default:
1131            matches = GL_FALSE;
1132         }
1133
1134         if (matches) {
1135            if (start == -1) {
1136               /* begin a sequence */
1137               start = i;
1138               end = i;
1139            }
1140            else {
1141               /* continue sequence */
1142               end = i;
1143            }
1144         }
1145         else {
1146            if (start != -1) {
1147               /* end of sequence */
1148               emit = GL_TRUE;
1149            }
1150         }
1151
1152         if (emit && start >= 0) {
1153            struct tgsi_full_declaration fulldecl;
1154
1155            fulldecl = make_constant_decl( start, end );
1156            ti += tgsi_build_full_declaration(
1157               &fulldecl,
1158               &tokens[ti],
1159               header,
1160               maxTokens - ti );
1161            start = end = -1;
1162         }
1163      }
1164   }
1165
1166   /* texture samplers */
1167   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1168      if (program->SamplersUsed & (1 << i)) {
1169         struct tgsi_full_declaration fulldecl;
1170
1171         fulldecl = make_sampler_decl( i );
1172         ti += tgsi_build_full_declaration(
1173            &fulldecl,
1174            &tokens[ti],
1175            header,
1176            maxTokens - ti );
1177      }
1178   }
1179
1180   /* invert WPOS fragment input */
1181   if (wposTemp >= 0) {
1182      ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst,
1183                               inputMapping[FRAG_ATTRIB_WPOS],
1184                               header, maxTokens - ti);
1185      preamble_size = 2; /* two instructions added */
1186   }
1187
1188   for (i = 0; i < program->NumInstructions; i++) {
1189      struct tgsi_full_instruction fullinst;
1190
1191      compile_instruction(
1192         &program->Instructions[i],
1193         &fullinst,
1194         inputMapping,
1195         outputMapping,
1196         immediates,
1197         indirectAccess,
1198         preamble_size,
1199         procType,
1200         &insideSubroutine,
1201         wposTemp);
1202
1203      ti += tgsi_build_full_instruction(
1204         &fullinst,
1205         &tokens[ti],
1206         header,
1207         maxTokens - ti );
1208   }
1209
1210#if DEBUG
1211   if(!tgsi_sanity_check(tokens)) {
1212      debug_printf("Due to sanity check failure(s) above the following shader program is invalid:\n");
1213      debug_printf("\nOriginal program:\n%s", program->String);
1214      debug_printf("\nMesa program:\n");
1215      _mesa_print_program(program);
1216      debug_printf("\nTGSI program:\n");
1217      tgsi_dump(tokens, 0);
1218      assert(0);
1219   }
1220#endif
1221
1222   return ti;
1223}
1224