st_glsl_to_tgsi.cpp revision 39348bf79fb247eec895c93e52f23afe138be46a
1/*
2 * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
3 * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 * Copyright © 2011 Bryan Cain
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27/**
28 * \file glsl_to_tgsi.cpp
29 *
30 * Translate GLSL IR to TGSI.
31 */
32
33#include <stdio.h>
34#include "main/compiler.h"
35#include "ir.h"
36#include "ir_visitor.h"
37#include "ir_print_visitor.h"
38#include "ir_expression_flattening.h"
39#include "glsl_types.h"
40#include "glsl_parser_extras.h"
41#include "../glsl/program.h"
42#include "ir_optimization.h"
43#include "ast.h"
44
45extern "C" {
46#include "main/mtypes.h"
47#include "main/shaderapi.h"
48#include "main/shaderobj.h"
49#include "main/uniforms.h"
50#include "program/hash_table.h"
51#include "program/prog_instruction.h"
52#include "program/prog_optimize.h"
53#include "program/prog_print.h"
54#include "program/program.h"
55#include "program/prog_uniform.h"
56#include "program/prog_parameter.h"
57#include "program/sampler.h"
58
59#include "pipe/p_compiler.h"
60#include "pipe/p_context.h"
61#include "pipe/p_screen.h"
62#include "pipe/p_shader_tokens.h"
63#include "pipe/p_state.h"
64#include "util/u_math.h"
65#include "tgsi/tgsi_ureg.h"
66#include "tgsi/tgsi_info.h"
67#include "st_context.h"
68#include "st_program.h"
69#include "st_glsl_to_tgsi.h"
70#include "st_mesa_to_tgsi.h"
71}
72
73#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
74#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
75                           (1 << PROGRAM_ENV_PARAM) |    \
76                           (1 << PROGRAM_STATE_VAR) |    \
77                           (1 << PROGRAM_NAMED_PARAM) |  \
78                           (1 << PROGRAM_CONSTANT) |     \
79                           (1 << PROGRAM_UNIFORM))
80
81#define MAX_TEMPS         4096
82
83/* will be 4 for GLSL 4.00 */
84#define MAX_GLSL_TEXTURE_OFFSET 1
85
86class st_src_reg;
87class st_dst_reg;
88
89static int swizzle_for_size(int size);
90
91/**
92 * This struct is a corresponding struct to TGSI ureg_src.
93 */
94class st_src_reg {
95public:
96   st_src_reg(gl_register_file file, int index, const glsl_type *type)
97   {
98      this->file = file;
99      this->index = index;
100      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
101         this->swizzle = swizzle_for_size(type->vector_elements);
102      else
103         this->swizzle = SWIZZLE_XYZW;
104      this->negate = 0;
105      this->type = type ? type->base_type : GLSL_TYPE_ERROR;
106      this->reladdr = NULL;
107   }
108
109   st_src_reg(gl_register_file file, int index, int type)
110   {
111      this->type = type;
112      this->file = file;
113      this->index = index;
114      this->swizzle = SWIZZLE_XYZW;
115      this->negate = 0;
116      this->reladdr = NULL;
117   }
118
119   st_src_reg()
120   {
121      this->type = GLSL_TYPE_ERROR;
122      this->file = PROGRAM_UNDEFINED;
123      this->index = 0;
124      this->swizzle = 0;
125      this->negate = 0;
126      this->reladdr = NULL;
127   }
128
129   explicit st_src_reg(st_dst_reg reg);
130
131   gl_register_file file; /**< PROGRAM_* from Mesa */
132   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
133   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
134   int negate; /**< NEGATE_XYZW mask from mesa */
135   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
136   /** Register index should be offset by the integer in this reg. */
137   st_src_reg *reladdr;
138};
139
140class st_dst_reg {
141public:
142   st_dst_reg(gl_register_file file, int writemask, int type)
143   {
144      this->file = file;
145      this->index = 0;
146      this->writemask = writemask;
147      this->cond_mask = COND_TR;
148      this->reladdr = NULL;
149      this->type = type;
150   }
151
152   st_dst_reg()
153   {
154      this->type = GLSL_TYPE_ERROR;
155      this->file = PROGRAM_UNDEFINED;
156      this->index = 0;
157      this->writemask = 0;
158      this->cond_mask = COND_TR;
159      this->reladdr = NULL;
160   }
161
162   explicit st_dst_reg(st_src_reg reg);
163
164   gl_register_file file; /**< PROGRAM_* from Mesa */
165   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
166   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
167   GLuint cond_mask:4;
168   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
169   /** Register index should be offset by the integer in this reg. */
170   st_src_reg *reladdr;
171};
172
173st_src_reg::st_src_reg(st_dst_reg reg)
174{
175   this->type = reg.type;
176   this->file = reg.file;
177   this->index = reg.index;
178   this->swizzle = SWIZZLE_XYZW;
179   this->negate = 0;
180   this->reladdr = reg.reladdr;
181}
182
183st_dst_reg::st_dst_reg(st_src_reg reg)
184{
185   this->type = reg.type;
186   this->file = reg.file;
187   this->index = reg.index;
188   this->writemask = WRITEMASK_XYZW;
189   this->cond_mask = COND_TR;
190   this->reladdr = reg.reladdr;
191}
192
193class glsl_to_tgsi_instruction : public exec_node {
194public:
195   /* Callers of this ralloc-based new need not call delete. It's
196    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
197   static void* operator new(size_t size, void *ctx)
198   {
199      void *node;
200
201      node = rzalloc_size(ctx, size);
202      assert(node != NULL);
203
204      return node;
205   }
206
207   unsigned op;
208   st_dst_reg dst;
209   st_src_reg src[3];
210   /** Pointer to the ir source this tree came from for debugging */
211   ir_instruction *ir;
212   GLboolean cond_update;
213   bool saturate;
214   int sampler; /**< sampler index */
215   int tex_target; /**< One of TEXTURE_*_INDEX */
216   GLboolean tex_shadow;
217   struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
218   unsigned tex_offset_num_offset;
219   int dead_mask; /**< Used in dead code elimination */
220
221   class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
222};
223
224class variable_storage : public exec_node {
225public:
226   variable_storage(ir_variable *var, gl_register_file file, int index)
227      : file(file), index(index), var(var)
228   {
229      /* empty */
230   }
231
232   gl_register_file file;
233   int index;
234   ir_variable *var; /* variable that maps to this, if any */
235};
236
237class immediate_storage : public exec_node {
238public:
239   immediate_storage(gl_constant_value *values, int size, int type)
240   {
241      memcpy(this->values, values, size * sizeof(gl_constant_value));
242      this->size = size;
243      this->type = type;
244   }
245
246   gl_constant_value values[4];
247   int size; /**< Number of components (1-4) */
248   int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
249};
250
251class function_entry : public exec_node {
252public:
253   ir_function_signature *sig;
254
255   /**
256    * identifier of this function signature used by the program.
257    *
258    * At the point that TGSI instructions for function calls are
259    * generated, we don't know the address of the first instruction of
260    * the function body.  So we make the BranchTarget that is called a
261    * small integer and rewrite them during set_branchtargets().
262    */
263   int sig_id;
264
265   /**
266    * Pointer to first instruction of the function body.
267    *
268    * Set during function body emits after main() is processed.
269    */
270   glsl_to_tgsi_instruction *bgn_inst;
271
272   /**
273    * Index of the first instruction of the function body in actual TGSI.
274    *
275    * Set after conversion from glsl_to_tgsi_instruction to TGSI.
276    */
277   int inst;
278
279   /** Storage for the return value. */
280   st_src_reg return_reg;
281};
282
283class glsl_to_tgsi_visitor : public ir_visitor {
284public:
285   glsl_to_tgsi_visitor();
286   ~glsl_to_tgsi_visitor();
287
288   function_entry *current_function;
289
290   struct gl_context *ctx;
291   struct gl_program *prog;
292   struct gl_shader_program *shader_program;
293   struct gl_shader_compiler_options *options;
294
295   int next_temp;
296
297   int num_address_regs;
298   int samplers_used;
299   bool indirect_addr_temps;
300   bool indirect_addr_consts;
301
302   int glsl_version;
303   bool native_integers;
304
305   variable_storage *find_variable_storage(ir_variable *var);
306
307   int add_constant(gl_register_file file, gl_constant_value values[4],
308                    int size, int datatype, GLuint *swizzle_out);
309
310   function_entry *get_function_signature(ir_function_signature *sig);
311
312   st_src_reg get_temp(const glsl_type *type);
313   void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
314
315   st_src_reg st_src_reg_for_float(float val);
316   st_src_reg st_src_reg_for_int(int val);
317   st_src_reg st_src_reg_for_type(int type, int val);
318
319   /**
320    * \name Visit methods
321    *
322    * As typical for the visitor pattern, there must be one \c visit method for
323    * each concrete subclass of \c ir_instruction.  Virtual base classes within
324    * the hierarchy should not have \c visit methods.
325    */
326   /*@{*/
327   virtual void visit(ir_variable *);
328   virtual void visit(ir_loop *);
329   virtual void visit(ir_loop_jump *);
330   virtual void visit(ir_function_signature *);
331   virtual void visit(ir_function *);
332   virtual void visit(ir_expression *);
333   virtual void visit(ir_swizzle *);
334   virtual void visit(ir_dereference_variable  *);
335   virtual void visit(ir_dereference_array *);
336   virtual void visit(ir_dereference_record *);
337   virtual void visit(ir_assignment *);
338   virtual void visit(ir_constant *);
339   virtual void visit(ir_call *);
340   virtual void visit(ir_return *);
341   virtual void visit(ir_discard *);
342   virtual void visit(ir_texture *);
343   virtual void visit(ir_if *);
344   /*@}*/
345
346   st_src_reg result;
347
348   /** List of variable_storage */
349   exec_list variables;
350
351   /** List of immediate_storage */
352   exec_list immediates;
353   int num_immediates;
354
355   /** List of function_entry */
356   exec_list function_signatures;
357   int next_signature_id;
358
359   /** List of glsl_to_tgsi_instruction */
360   exec_list instructions;
361
362   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
363
364   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
365        		        st_dst_reg dst, st_src_reg src0);
366
367   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
368        		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
369
370   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
371        		        st_dst_reg dst,
372        		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
373
374   unsigned get_opcode(ir_instruction *ir, unsigned op,
375                    st_dst_reg dst,
376                    st_src_reg src0, st_src_reg src1);
377
378   /**
379    * Emit the correct dot-product instruction for the type of arguments
380    */
381   glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
382                                     st_dst_reg dst,
383                                     st_src_reg src0,
384                                     st_src_reg src1,
385                                     unsigned elements);
386
387   void emit_scalar(ir_instruction *ir, unsigned op,
388        	    st_dst_reg dst, st_src_reg src0);
389
390   void emit_scalar(ir_instruction *ir, unsigned op,
391        	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
392
393   void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
394
395   void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
396
397   void emit_scs(ir_instruction *ir, unsigned op,
398        	 st_dst_reg dst, const st_src_reg &src);
399
400   bool try_emit_mad(ir_expression *ir,
401              int mul_operand);
402   bool try_emit_mad_for_and_not(ir_expression *ir,
403              int mul_operand);
404   bool try_emit_sat(ir_expression *ir);
405
406   void emit_swz(ir_expression *ir);
407
408   bool process_move_condition(ir_rvalue *ir);
409
410   void remove_output_reads(gl_register_file type);
411   void simplify_cmp(void);
412
413   void rename_temp_register(int index, int new_index);
414   int get_first_temp_read(int index);
415   int get_first_temp_write(int index);
416   int get_last_temp_read(int index);
417   int get_last_temp_write(int index);
418
419   void copy_propagate(void);
420   void eliminate_dead_code(void);
421   int eliminate_dead_code_advanced(void);
422   void merge_registers(void);
423   void renumber_registers(void);
424
425   void *mem_ctx;
426};
427
428static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
429
430static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
431
432static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
433
434static void
435fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
436
437static void
438fail_link(struct gl_shader_program *prog, const char *fmt, ...)
439{
440   va_list args;
441   va_start(args, fmt);
442   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
443   va_end(args);
444
445   prog->LinkStatus = GL_FALSE;
446}
447
448static int
449swizzle_for_size(int size)
450{
451   int size_swizzles[4] = {
452      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
453      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
454      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
455      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
456   };
457
458   assert((size >= 1) && (size <= 4));
459   return size_swizzles[size - 1];
460}
461
462static bool
463is_tex_instruction(unsigned opcode)
464{
465   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
466   return info->is_tex;
467}
468
469static unsigned
470num_inst_dst_regs(unsigned opcode)
471{
472   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
473   return info->num_dst;
474}
475
476static unsigned
477num_inst_src_regs(unsigned opcode)
478{
479   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
480   return info->is_tex ? info->num_src - 1 : info->num_src;
481}
482
483glsl_to_tgsi_instruction *
484glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
485        		 st_dst_reg dst,
486        		 st_src_reg src0, st_src_reg src1, st_src_reg src2)
487{
488   glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
489   int num_reladdr = 0, i;
490
491   op = get_opcode(ir, op, dst, src0, src1);
492
493   /* If we have to do relative addressing, we want to load the ARL
494    * reg directly for one of the regs, and preload the other reladdr
495    * sources into temps.
496    */
497   num_reladdr += dst.reladdr != NULL;
498   num_reladdr += src0.reladdr != NULL;
499   num_reladdr += src1.reladdr != NULL;
500   num_reladdr += src2.reladdr != NULL;
501
502   reladdr_to_temp(ir, &src2, &num_reladdr);
503   reladdr_to_temp(ir, &src1, &num_reladdr);
504   reladdr_to_temp(ir, &src0, &num_reladdr);
505
506   if (dst.reladdr) {
507      emit_arl(ir, address_reg, *dst.reladdr);
508      num_reladdr--;
509   }
510   assert(num_reladdr == 0);
511
512   inst->op = op;
513   inst->dst = dst;
514   inst->src[0] = src0;
515   inst->src[1] = src1;
516   inst->src[2] = src2;
517   inst->ir = ir;
518   inst->dead_mask = 0;
519
520   inst->function = NULL;
521
522   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
523      this->num_address_regs = 1;
524
525   /* Update indirect addressing status used by TGSI */
526   if (dst.reladdr) {
527      switch(dst.file) {
528      case PROGRAM_TEMPORARY:
529         this->indirect_addr_temps = true;
530         break;
531      case PROGRAM_LOCAL_PARAM:
532      case PROGRAM_ENV_PARAM:
533      case PROGRAM_STATE_VAR:
534      case PROGRAM_NAMED_PARAM:
535      case PROGRAM_CONSTANT:
536      case PROGRAM_UNIFORM:
537         this->indirect_addr_consts = true;
538         break;
539      case PROGRAM_IMMEDIATE:
540         assert(!"immediates should not have indirect addressing");
541         break;
542      default:
543         break;
544      }
545   }
546   else {
547      for (i=0; i<3; i++) {
548         if(inst->src[i].reladdr) {
549            switch(inst->src[i].file) {
550            case PROGRAM_TEMPORARY:
551               this->indirect_addr_temps = true;
552               break;
553            case PROGRAM_LOCAL_PARAM:
554            case PROGRAM_ENV_PARAM:
555            case PROGRAM_STATE_VAR:
556            case PROGRAM_NAMED_PARAM:
557            case PROGRAM_CONSTANT:
558            case PROGRAM_UNIFORM:
559               this->indirect_addr_consts = true;
560               break;
561            case PROGRAM_IMMEDIATE:
562               assert(!"immediates should not have indirect addressing");
563               break;
564            default:
565               break;
566            }
567         }
568      }
569   }
570
571   this->instructions.push_tail(inst);
572
573   if (native_integers)
574      try_emit_float_set(ir, op, dst);
575
576   return inst;
577}
578
579
580glsl_to_tgsi_instruction *
581glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
582        		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
583{
584   return emit(ir, op, dst, src0, src1, undef_src);
585}
586
587glsl_to_tgsi_instruction *
588glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
589        		 st_dst_reg dst, st_src_reg src0)
590{
591   assert(dst.writemask != 0);
592   return emit(ir, op, dst, src0, undef_src, undef_src);
593}
594
595glsl_to_tgsi_instruction *
596glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
597{
598   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
599}
600
601 /**
602 * Emits the code to convert the result of float SET instructions to integers.
603 */
604void
605glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
606        		 st_dst_reg dst)
607{
608   if ((op == TGSI_OPCODE_SEQ ||
609        op == TGSI_OPCODE_SNE ||
610        op == TGSI_OPCODE_SGE ||
611        op == TGSI_OPCODE_SLT))
612   {
613      st_src_reg src = st_src_reg(dst);
614      src.negate = ~src.negate;
615      dst.type = GLSL_TYPE_FLOAT;
616      emit(ir, TGSI_OPCODE_F2I, dst, src);
617   }
618}
619
620/**
621 * Determines whether to use an integer, unsigned integer, or float opcode
622 * based on the operands and input opcode, then emits the result.
623 */
624unsigned
625glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
626        		 st_dst_reg dst,
627        		 st_src_reg src0, st_src_reg src1)
628{
629   int type = GLSL_TYPE_FLOAT;
630
631   if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
632      type = GLSL_TYPE_FLOAT;
633   else if (native_integers)
634      type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
635
636#define case4(c, f, i, u) \
637   case TGSI_OPCODE_##c: \
638      if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
639      else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
640      else op = TGSI_OPCODE_##f; \
641      break;
642#define case3(f, i, u)  case4(f, f, i, u)
643#define case2fi(f, i)   case4(f, f, i, i)
644#define case2iu(i, u)   case4(i, LAST, i, u)
645
646   switch(op) {
647      case2fi(ADD, UADD);
648      case2fi(MUL, UMUL);
649      case2fi(MAD, UMAD);
650      case3(DIV, IDIV, UDIV);
651      case3(MAX, IMAX, UMAX);
652      case3(MIN, IMIN, UMIN);
653      case2iu(MOD, UMOD);
654
655      case2fi(SEQ, USEQ);
656      case2fi(SNE, USNE);
657      case3(SGE, ISGE, USGE);
658      case3(SLT, ISLT, USLT);
659
660      case2iu(ISHR, USHR);
661
662      default: break;
663   }
664
665   assert(op != TGSI_OPCODE_LAST);
666   return op;
667}
668
669glsl_to_tgsi_instruction *
670glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
671        		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
672        		    unsigned elements)
673{
674   static const unsigned dot_opcodes[] = {
675      TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
676   };
677
678   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
679}
680
681/**
682 * Emits TGSI scalar opcodes to produce unique answers across channels.
683 *
684 * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
685 * channel determines the result across all channels.  So to do a vec4
686 * of this operation, we want to emit a scalar per source channel used
687 * to produce dest channels.
688 */
689void
690glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
691        		        st_dst_reg dst,
692        			st_src_reg orig_src0, st_src_reg orig_src1)
693{
694   int i, j;
695   int done_mask = ~dst.writemask;
696
697   /* TGSI RCP is a scalar operation splatting results to all channels,
698    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
699    * dst channels.
700    */
701   for (i = 0; i < 4; i++) {
702      GLuint this_mask = (1 << i);
703      glsl_to_tgsi_instruction *inst;
704      st_src_reg src0 = orig_src0;
705      st_src_reg src1 = orig_src1;
706
707      if (done_mask & this_mask)
708         continue;
709
710      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
711      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
712      for (j = i + 1; j < 4; j++) {
713         /* If there is another enabled component in the destination that is
714          * derived from the same inputs, generate its value on this pass as
715          * well.
716          */
717         if (!(done_mask & (1 << j)) &&
718             GET_SWZ(src0.swizzle, j) == src0_swiz &&
719             GET_SWZ(src1.swizzle, j) == src1_swiz) {
720            this_mask |= (1 << j);
721         }
722      }
723      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
724        			   src0_swiz, src0_swiz);
725      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
726        			  src1_swiz, src1_swiz);
727
728      inst = emit(ir, op, dst, src0, src1);
729      inst->dst.writemask = this_mask;
730      done_mask |= this_mask;
731   }
732}
733
734void
735glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
736        		        st_dst_reg dst, st_src_reg src0)
737{
738   st_src_reg undef = undef_src;
739
740   undef.swizzle = SWIZZLE_XXXX;
741
742   emit_scalar(ir, op, dst, src0, undef);
743}
744
745void
746glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
747        		        st_dst_reg dst, st_src_reg src0)
748{
749   int op = TGSI_OPCODE_ARL;
750
751   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
752      op = TGSI_OPCODE_UARL;
753
754   emit(NULL, op, dst, src0);
755}
756
757/**
758 * Emit an TGSI_OPCODE_SCS instruction
759 *
760 * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
761 * Instead of splatting its result across all four components of the
762 * destination, it writes one value to the \c x component and another value to
763 * the \c y component.
764 *
765 * \param ir        IR instruction being processed
766 * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
767 *                  on which value is desired.
768 * \param dst       Destination register
769 * \param src       Source register
770 */
771void
772glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
773        		     st_dst_reg dst,
774        		     const st_src_reg &src)
775{
776   /* Vertex programs cannot use the SCS opcode.
777    */
778   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
779      emit_scalar(ir, op, dst, src);
780      return;
781   }
782
783   const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
784   const unsigned scs_mask = (1U << component);
785   int done_mask = ~dst.writemask;
786   st_src_reg tmp;
787
788   assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
789
790   /* If there are compnents in the destination that differ from the component
791    * that will be written by the SCS instrution, we'll need a temporary.
792    */
793   if (scs_mask != unsigned(dst.writemask)) {
794      tmp = get_temp(glsl_type::vec4_type);
795   }
796
797   for (unsigned i = 0; i < 4; i++) {
798      unsigned this_mask = (1U << i);
799      st_src_reg src0 = src;
800
801      if ((done_mask & this_mask) != 0)
802         continue;
803
804      /* The source swizzle specified which component of the source generates
805       * sine / cosine for the current component in the destination.  The SCS
806       * instruction requires that this value be swizzle to the X component.
807       * Replace the current swizzle with a swizzle that puts the source in
808       * the X component.
809       */
810      unsigned src0_swiz = GET_SWZ(src.swizzle, i);
811
812      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
813        			   src0_swiz, src0_swiz);
814      for (unsigned j = i + 1; j < 4; j++) {
815         /* If there is another enabled component in the destination that is
816          * derived from the same inputs, generate its value on this pass as
817          * well.
818          */
819         if (!(done_mask & (1 << j)) &&
820             GET_SWZ(src0.swizzle, j) == src0_swiz) {
821            this_mask |= (1 << j);
822         }
823      }
824
825      if (this_mask != scs_mask) {
826         glsl_to_tgsi_instruction *inst;
827         st_dst_reg tmp_dst = st_dst_reg(tmp);
828
829         /* Emit the SCS instruction.
830          */
831         inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
832         inst->dst.writemask = scs_mask;
833
834         /* Move the result of the SCS instruction to the desired location in
835          * the destination.
836          */
837         tmp.swizzle = MAKE_SWIZZLE4(component, component,
838        			     component, component);
839         inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
840         inst->dst.writemask = this_mask;
841      } else {
842         /* Emit the SCS instruction to write directly to the destination.
843          */
844         glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
845         inst->dst.writemask = scs_mask;
846      }
847
848      done_mask |= this_mask;
849   }
850}
851
852int
853glsl_to_tgsi_visitor::add_constant(gl_register_file file,
854        		     gl_constant_value values[4], int size, int datatype,
855        		     GLuint *swizzle_out)
856{
857   if (file == PROGRAM_CONSTANT) {
858      return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
859                                              size, datatype, swizzle_out);
860   } else {
861      int index = 0;
862      immediate_storage *entry;
863      assert(file == PROGRAM_IMMEDIATE);
864
865      /* Search immediate storage to see if we already have an identical
866       * immediate that we can use instead of adding a duplicate entry.
867       */
868      foreach_iter(exec_list_iterator, iter, this->immediates) {
869         entry = (immediate_storage *)iter.get();
870
871         if (entry->size == size &&
872             entry->type == datatype &&
873             !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
874             return index;
875         }
876         index++;
877      }
878
879      /* Add this immediate to the list. */
880      entry = new(mem_ctx) immediate_storage(values, size, datatype);
881      this->immediates.push_tail(entry);
882      this->num_immediates++;
883      return index;
884   }
885}
886
887st_src_reg
888glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
889{
890   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
891   union gl_constant_value uval;
892
893   uval.f = val;
894   src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
895
896   return src;
897}
898
899st_src_reg
900glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
901{
902   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
903   union gl_constant_value uval;
904
905   assert(native_integers);
906
907   uval.i = val;
908   src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
909
910   return src;
911}
912
913st_src_reg
914glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
915{
916   if (native_integers)
917      return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
918                                       st_src_reg_for_int(val);
919   else
920      return st_src_reg_for_float(val);
921}
922
923static int
924type_size(const struct glsl_type *type)
925{
926   unsigned int i;
927   int size;
928
929   switch (type->base_type) {
930   case GLSL_TYPE_UINT:
931   case GLSL_TYPE_INT:
932   case GLSL_TYPE_FLOAT:
933   case GLSL_TYPE_BOOL:
934      if (type->is_matrix()) {
935         return type->matrix_columns;
936      } else {
937         /* Regardless of size of vector, it gets a vec4. This is bad
938          * packing for things like floats, but otherwise arrays become a
939          * mess.  Hopefully a later pass over the code can pack scalars
940          * down if appropriate.
941          */
942         return 1;
943      }
944   case GLSL_TYPE_ARRAY:
945      assert(type->length > 0);
946      return type_size(type->fields.array) * type->length;
947   case GLSL_TYPE_STRUCT:
948      size = 0;
949      for (i = 0; i < type->length; i++) {
950         size += type_size(type->fields.structure[i].type);
951      }
952      return size;
953   case GLSL_TYPE_SAMPLER:
954      /* Samplers take up one slot in UNIFORMS[], but they're baked in
955       * at link time.
956       */
957      return 1;
958   default:
959      assert(0);
960      return 0;
961   }
962}
963
964/**
965 * In the initial pass of codegen, we assign temporary numbers to
966 * intermediate results.  (not SSA -- variable assignments will reuse
967 * storage).
968 */
969st_src_reg
970glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
971{
972   st_src_reg src;
973
974   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
975   src.file = PROGRAM_TEMPORARY;
976   src.index = next_temp;
977   src.reladdr = NULL;
978   next_temp += type_size(type);
979
980   if (type->is_array() || type->is_record()) {
981      src.swizzle = SWIZZLE_NOOP;
982   } else {
983      src.swizzle = swizzle_for_size(type->vector_elements);
984   }
985   src.negate = 0;
986
987   return src;
988}
989
990variable_storage *
991glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
992{
993
994   variable_storage *entry;
995
996   foreach_iter(exec_list_iterator, iter, this->variables) {
997      entry = (variable_storage *)iter.get();
998
999      if (entry->var == var)
1000         return entry;
1001   }
1002
1003   return NULL;
1004}
1005
1006void
1007glsl_to_tgsi_visitor::visit(ir_variable *ir)
1008{
1009   if (strcmp(ir->name, "gl_FragCoord") == 0) {
1010      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1011
1012      fp->OriginUpperLeft = ir->origin_upper_left;
1013      fp->PixelCenterInteger = ir->pixel_center_integer;
1014
1015   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
1016      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1017      switch (ir->depth_layout) {
1018      case ir_depth_layout_none:
1019         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
1020         break;
1021      case ir_depth_layout_any:
1022         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
1023         break;
1024      case ir_depth_layout_greater:
1025         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
1026         break;
1027      case ir_depth_layout_less:
1028         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
1029         break;
1030      case ir_depth_layout_unchanged:
1031         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
1032         break;
1033      default:
1034         assert(0);
1035         break;
1036      }
1037   }
1038
1039   if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1040      unsigned int i;
1041      const ir_state_slot *const slots = ir->state_slots;
1042      assert(ir->state_slots != NULL);
1043
1044      /* Check if this statevar's setup in the STATE file exactly
1045       * matches how we'll want to reference it as a
1046       * struct/array/whatever.  If not, then we need to move it into
1047       * temporary storage and hope that it'll get copy-propagated
1048       * out.
1049       */
1050      for (i = 0; i < ir->num_state_slots; i++) {
1051         if (slots[i].swizzle != SWIZZLE_XYZW) {
1052            break;
1053         }
1054      }
1055
1056      variable_storage *storage;
1057      st_dst_reg dst;
1058      if (i == ir->num_state_slots) {
1059         /* We'll set the index later. */
1060         storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1061         this->variables.push_tail(storage);
1062
1063         dst = undef_dst;
1064      } else {
1065         /* The variable_storage constructor allocates slots based on the size
1066          * of the type.  However, this had better match the number of state
1067          * elements that we're going to copy into the new temporary.
1068          */
1069         assert((int) ir->num_state_slots == type_size(ir->type));
1070
1071         storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
1072        					 this->next_temp);
1073         this->variables.push_tail(storage);
1074         this->next_temp += type_size(ir->type);
1075
1076         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
1077               native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
1078      }
1079
1080
1081      for (unsigned int i = 0; i < ir->num_state_slots; i++) {
1082         int index = _mesa_add_state_reference(this->prog->Parameters,
1083        				       (gl_state_index *)slots[i].tokens);
1084
1085         if (storage->file == PROGRAM_STATE_VAR) {
1086            if (storage->index == -1) {
1087               storage->index = index;
1088            } else {
1089               assert(index == storage->index + (int)i);
1090            }
1091         } else {
1092            st_src_reg src(PROGRAM_STATE_VAR, index,
1093                  native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
1094            src.swizzle = slots[i].swizzle;
1095            emit(ir, TGSI_OPCODE_MOV, dst, src);
1096            /* even a float takes up a whole vec4 reg in a struct/array. */
1097            dst.index++;
1098         }
1099      }
1100
1101      if (storage->file == PROGRAM_TEMPORARY &&
1102          dst.index != storage->index + (int) ir->num_state_slots) {
1103         fail_link(this->shader_program,
1104        	   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
1105        	   ir->name, dst.index - storage->index,
1106        	   type_size(ir->type));
1107      }
1108   }
1109}
1110
1111void
1112glsl_to_tgsi_visitor::visit(ir_loop *ir)
1113{
1114   ir_dereference_variable *counter = NULL;
1115
1116   if (ir->counter != NULL)
1117      counter = new(ir) ir_dereference_variable(ir->counter);
1118
1119   if (ir->from != NULL) {
1120      assert(ir->counter != NULL);
1121
1122      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
1123
1124      a->accept(this);
1125      delete a;
1126   }
1127
1128   emit(NULL, TGSI_OPCODE_BGNLOOP);
1129
1130   if (ir->to) {
1131      ir_expression *e =
1132         new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
1133        		       counter, ir->to);
1134      ir_if *if_stmt =  new(ir) ir_if(e);
1135
1136      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
1137
1138      if_stmt->then_instructions.push_tail(brk);
1139
1140      if_stmt->accept(this);
1141
1142      delete if_stmt;
1143      delete e;
1144      delete brk;
1145   }
1146
1147   visit_exec_list(&ir->body_instructions, this);
1148
1149   if (ir->increment) {
1150      ir_expression *e =
1151         new(ir) ir_expression(ir_binop_add, counter->type,
1152        		       counter, ir->increment);
1153
1154      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
1155
1156      a->accept(this);
1157      delete a;
1158      delete e;
1159   }
1160
1161   emit(NULL, TGSI_OPCODE_ENDLOOP);
1162}
1163
1164void
1165glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1166{
1167   switch (ir->mode) {
1168   case ir_loop_jump::jump_break:
1169      emit(NULL, TGSI_OPCODE_BRK);
1170      break;
1171   case ir_loop_jump::jump_continue:
1172      emit(NULL, TGSI_OPCODE_CONT);
1173      break;
1174   }
1175}
1176
1177
1178void
1179glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1180{
1181   assert(0);
1182   (void)ir;
1183}
1184
1185void
1186glsl_to_tgsi_visitor::visit(ir_function *ir)
1187{
1188   /* Ignore function bodies other than main() -- we shouldn't see calls to
1189    * them since they should all be inlined before we get to glsl_to_tgsi.
1190    */
1191   if (strcmp(ir->name, "main") == 0) {
1192      const ir_function_signature *sig;
1193      exec_list empty;
1194
1195      sig = ir->matching_signature(&empty);
1196
1197      assert(sig);
1198
1199      foreach_iter(exec_list_iterator, iter, sig->body) {
1200         ir_instruction *ir = (ir_instruction *)iter.get();
1201
1202         ir->accept(this);
1203      }
1204   }
1205}
1206
1207bool
1208glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1209{
1210   int nonmul_operand = 1 - mul_operand;
1211   st_src_reg a, b, c;
1212   st_dst_reg result_dst;
1213
1214   ir_expression *expr = ir->operands[mul_operand]->as_expression();
1215   if (!expr || expr->operation != ir_binop_mul)
1216      return false;
1217
1218   expr->operands[0]->accept(this);
1219   a = this->result;
1220   expr->operands[1]->accept(this);
1221   b = this->result;
1222   ir->operands[nonmul_operand]->accept(this);
1223   c = this->result;
1224
1225   this->result = get_temp(ir->type);
1226   result_dst = st_dst_reg(this->result);
1227   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1228   emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1229
1230   return true;
1231}
1232
1233/**
1234 * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1235 *
1236 * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
1237 * implemented using multiplication, and logical-or is implemented using
1238 * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
1239 * As result, the logical expression (a & !b) can be rewritten as:
1240 *
1241 *     - a * !b
1242 *     - a * (1 - b)
1243 *     - (a * 1) - (a * b)
1244 *     - a + -(a * b)
1245 *     - a + (a * -b)
1246 *
1247 * This final expression can be implemented as a single MAD(a, -b, a)
1248 * instruction.
1249 */
1250bool
1251glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1252{
1253   const int other_operand = 1 - try_operand;
1254   st_src_reg a, b;
1255
1256   ir_expression *expr = ir->operands[try_operand]->as_expression();
1257   if (!expr || expr->operation != ir_unop_logic_not)
1258      return false;
1259
1260   ir->operands[other_operand]->accept(this);
1261   a = this->result;
1262   expr->operands[0]->accept(this);
1263   b = this->result;
1264
1265   b.negate = ~b.negate;
1266
1267   this->result = get_temp(ir->type);
1268   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1269
1270   return true;
1271}
1272
1273bool
1274glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
1275{
1276   /* Saturates were only introduced to vertex programs in
1277    * NV_vertex_program3, so don't give them to drivers in the VP.
1278    */
1279   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
1280      return false;
1281
1282   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1283   if (!sat_src)
1284      return false;
1285
1286   sat_src->accept(this);
1287   st_src_reg src = this->result;
1288
1289   /* If we generated an expression instruction into a temporary in
1290    * processing the saturate's operand, apply the saturate to that
1291    * instruction.  Otherwise, generate a MOV to do the saturate.
1292    *
1293    * Note that we have to be careful to only do this optimization if
1294    * the instruction in question was what generated src->result.  For
1295    * example, ir_dereference_array might generate a MUL instruction
1296    * to create the reladdr, and return us a src reg using that
1297    * reladdr.  That MUL result is not the value we're trying to
1298    * saturate.
1299    */
1300   ir_expression *sat_src_expr = sat_src->as_expression();
1301   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
1302			sat_src_expr->operation == ir_binop_add ||
1303			sat_src_expr->operation == ir_binop_dot)) {
1304      glsl_to_tgsi_instruction *new_inst;
1305      new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
1306      new_inst->saturate = true;
1307   } else {
1308      this->result = get_temp(ir->type);
1309      st_dst_reg result_dst = st_dst_reg(this->result);
1310      result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1311      glsl_to_tgsi_instruction *inst;
1312      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
1313      inst->saturate = true;
1314   }
1315
1316   return true;
1317}
1318
1319void
1320glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1321        			    st_src_reg *reg, int *num_reladdr)
1322{
1323   if (!reg->reladdr)
1324      return;
1325
1326   emit_arl(ir, address_reg, *reg->reladdr);
1327
1328   if (*num_reladdr != 1) {
1329      st_src_reg temp = get_temp(glsl_type::vec4_type);
1330
1331      emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1332      *reg = temp;
1333   }
1334
1335   (*num_reladdr)--;
1336}
1337
1338void
1339glsl_to_tgsi_visitor::visit(ir_expression *ir)
1340{
1341   unsigned int operand;
1342   st_src_reg op[Elements(ir->operands)];
1343   st_src_reg result_src;
1344   st_dst_reg result_dst;
1345
1346   /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1347    */
1348   if (ir->operation == ir_binop_add) {
1349      if (try_emit_mad(ir, 1))
1350         return;
1351      if (try_emit_mad(ir, 0))
1352         return;
1353   }
1354
1355   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1356    */
1357   if (ir->operation == ir_binop_logic_and) {
1358      if (try_emit_mad_for_and_not(ir, 1))
1359	 return;
1360      if (try_emit_mad_for_and_not(ir, 0))
1361	 return;
1362   }
1363
1364   if (try_emit_sat(ir))
1365      return;
1366
1367   if (ir->operation == ir_quadop_vector)
1368      assert(!"ir_quadop_vector should have been lowered");
1369
1370   for (operand = 0; operand < ir->get_num_operands(); operand++) {
1371      this->result.file = PROGRAM_UNDEFINED;
1372      ir->operands[operand]->accept(this);
1373      if (this->result.file == PROGRAM_UNDEFINED) {
1374         ir_print_visitor v;
1375         printf("Failed to get tree for expression operand:\n");
1376         ir->operands[operand]->accept(&v);
1377         exit(1);
1378      }
1379      op[operand] = this->result;
1380
1381      /* Matrix expression operands should have been broken down to vector
1382       * operations already.
1383       */
1384      assert(!ir->operands[operand]->type->is_matrix());
1385   }
1386
1387   int vector_elements = ir->operands[0]->type->vector_elements;
1388   if (ir->operands[1]) {
1389      vector_elements = MAX2(vector_elements,
1390        		     ir->operands[1]->type->vector_elements);
1391   }
1392
1393   this->result.file = PROGRAM_UNDEFINED;
1394
1395   /* Storage for our result.  Ideally for an assignment we'd be using
1396    * the actual storage for the result here, instead.
1397    */
1398   result_src = get_temp(ir->type);
1399   /* convenience for the emit functions below. */
1400   result_dst = st_dst_reg(result_src);
1401   /* Limit writes to the channels that will be used by result_src later.
1402    * This does limit this temp's use as a temporary for multi-instruction
1403    * sequences.
1404    */
1405   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1406
1407   switch (ir->operation) {
1408   case ir_unop_logic_not:
1409      if (result_dst.type != GLSL_TYPE_FLOAT)
1410         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1411      else {
1412         /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1413          * older GPUs implement SEQ using multiple instructions (i915 uses two
1414          * SGE instructions and a MUL instruction).  Since our logic values are
1415          * 0.0 and 1.0, 1-x also implements !x.
1416          */
1417         op[0].negate = ~op[0].negate;
1418         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1419      }
1420      break;
1421   case ir_unop_neg:
1422      assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
1423      if (result_dst.type == GLSL_TYPE_INT)
1424         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1425      else {
1426         op[0].negate = ~op[0].negate;
1427         result_src = op[0];
1428      }
1429      break;
1430   case ir_unop_abs:
1431      assert(result_dst.type == GLSL_TYPE_FLOAT);
1432      emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1433      break;
1434   case ir_unop_sign:
1435      emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1436      break;
1437   case ir_unop_rcp:
1438      emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1439      break;
1440
1441   case ir_unop_exp2:
1442      emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1443      break;
1444   case ir_unop_exp:
1445   case ir_unop_log:
1446      assert(!"not reached: should be handled by ir_explog_to_explog2");
1447      break;
1448   case ir_unop_log2:
1449      emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1450      break;
1451   case ir_unop_sin:
1452      emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1453      break;
1454   case ir_unop_cos:
1455      emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1456      break;
1457   case ir_unop_sin_reduced:
1458      emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1459      break;
1460   case ir_unop_cos_reduced:
1461      emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1462      break;
1463
1464   case ir_unop_dFdx:
1465      emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1466      break;
1467   case ir_unop_dFdy:
1468      op[0].negate = ~op[0].negate;
1469      emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
1470      break;
1471
1472   case ir_unop_noise: {
1473      /* At some point, a motivated person could add a better
1474       * implementation of noise.  Currently not even the nvidia
1475       * binary drivers do anything more than this.  In any case, the
1476       * place to do this is in the GL state tracker, not the poor
1477       * driver.
1478       */
1479      emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1480      break;
1481   }
1482
1483   case ir_binop_add:
1484      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1485      break;
1486   case ir_binop_sub:
1487      emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1488      break;
1489
1490   case ir_binop_mul:
1491      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1492      break;
1493   case ir_binop_div:
1494      if (result_dst.type == GLSL_TYPE_FLOAT)
1495         assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1496      else
1497         emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1498      break;
1499   case ir_binop_mod:
1500      if (result_dst.type == GLSL_TYPE_FLOAT)
1501         assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1502      else
1503         emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1504      break;
1505
1506   case ir_binop_less:
1507      emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1508      break;
1509   case ir_binop_greater:
1510      emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
1511      break;
1512   case ir_binop_lequal:
1513      emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
1514      break;
1515   case ir_binop_gequal:
1516      emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1517      break;
1518   case ir_binop_equal:
1519      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1520      break;
1521   case ir_binop_nequal:
1522      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1523      break;
1524   case ir_binop_all_equal:
1525      /* "==" operator producing a scalar boolean. */
1526      if (ir->operands[0]->type->is_vector() ||
1527          ir->operands[1]->type->is_vector()) {
1528         st_src_reg temp = get_temp(native_integers ?
1529               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1530               glsl_type::vec4_type);
1531
1532         if (native_integers) {
1533            st_dst_reg temp_dst = st_dst_reg(temp);
1534            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1535
1536            emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1537
1538            /* Emit 1-3 AND operations to combine the SEQ results. */
1539            switch (ir->operands[0]->type->vector_elements) {
1540            case 2:
1541               break;
1542            case 3:
1543               temp_dst.writemask = WRITEMASK_Y;
1544               temp1.swizzle = SWIZZLE_YYYY;
1545               temp2.swizzle = SWIZZLE_ZZZZ;
1546               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1547               break;
1548            case 4:
1549               temp_dst.writemask = WRITEMASK_X;
1550               temp1.swizzle = SWIZZLE_XXXX;
1551               temp2.swizzle = SWIZZLE_YYYY;
1552               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1553               temp_dst.writemask = WRITEMASK_Y;
1554               temp1.swizzle = SWIZZLE_ZZZZ;
1555               temp2.swizzle = SWIZZLE_WWWW;
1556               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1557            }
1558
1559            temp1.swizzle = SWIZZLE_XXXX;
1560            temp2.swizzle = SWIZZLE_YYYY;
1561            emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1562         } else {
1563            emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1564
1565            /* After the dot-product, the value will be an integer on the
1566             * range [0,4].  Zero becomes 1.0, and positive values become zero.
1567             */
1568            emit_dp(ir, result_dst, temp, temp, vector_elements);
1569
1570            /* Negating the result of the dot-product gives values on the range
1571             * [-4, 0].  Zero becomes 1.0, and negative values become zero.
1572             * This is achieved using SGE.
1573             */
1574            st_src_reg sge_src = result_src;
1575            sge_src.negate = ~sge_src.negate;
1576            emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1577         }
1578      } else {
1579         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1580      }
1581      break;
1582   case ir_binop_any_nequal:
1583      /* "!=" operator producing a scalar boolean. */
1584      if (ir->operands[0]->type->is_vector() ||
1585          ir->operands[1]->type->is_vector()) {
1586         st_src_reg temp = get_temp(native_integers ?
1587               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1588               glsl_type::vec4_type);
1589         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1590
1591         if (native_integers) {
1592            st_dst_reg temp_dst = st_dst_reg(temp);
1593            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1594
1595            /* Emit 1-3 OR operations to combine the SNE results. */
1596            switch (ir->operands[0]->type->vector_elements) {
1597            case 2:
1598               break;
1599            case 3:
1600               temp_dst.writemask = WRITEMASK_Y;
1601               temp1.swizzle = SWIZZLE_YYYY;
1602               temp2.swizzle = SWIZZLE_ZZZZ;
1603               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1604               break;
1605            case 4:
1606               temp_dst.writemask = WRITEMASK_X;
1607               temp1.swizzle = SWIZZLE_XXXX;
1608               temp2.swizzle = SWIZZLE_YYYY;
1609               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1610               temp_dst.writemask = WRITEMASK_Y;
1611               temp1.swizzle = SWIZZLE_ZZZZ;
1612               temp2.swizzle = SWIZZLE_WWWW;
1613               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1614            }
1615
1616            temp1.swizzle = SWIZZLE_XXXX;
1617            temp2.swizzle = SWIZZLE_YYYY;
1618            emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1619         } else {
1620            /* After the dot-product, the value will be an integer on the
1621             * range [0,4].  Zero stays zero, and positive values become 1.0.
1622             */
1623            glsl_to_tgsi_instruction *const dp =
1624                  emit_dp(ir, result_dst, temp, temp, vector_elements);
1625            if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1626               /* The clamping to [0,1] can be done for free in the fragment
1627                * shader with a saturate.
1628                */
1629               dp->saturate = true;
1630            } else {
1631               /* Negating the result of the dot-product gives values on the range
1632                * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1633                * achieved using SLT.
1634                */
1635               st_src_reg slt_src = result_src;
1636               slt_src.negate = ~slt_src.negate;
1637               emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1638            }
1639         }
1640      } else {
1641         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1642      }
1643      break;
1644
1645   case ir_unop_any: {
1646      assert(ir->operands[0]->type->is_vector());
1647
1648      /* After the dot-product, the value will be an integer on the
1649       * range [0,4].  Zero stays zero, and positive values become 1.0.
1650       */
1651      glsl_to_tgsi_instruction *const dp =
1652         emit_dp(ir, result_dst, op[0], op[0],
1653                 ir->operands[0]->type->vector_elements);
1654      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1655          result_dst.type == GLSL_TYPE_FLOAT) {
1656	      /* The clamping to [0,1] can be done for free in the fragment
1657	       * shader with a saturate.
1658	       */
1659	      dp->saturate = true;
1660      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1661	      /* Negating the result of the dot-product gives values on the range
1662	       * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1663	       * is achieved using SLT.
1664	       */
1665	      st_src_reg slt_src = result_src;
1666	      slt_src.negate = ~slt_src.negate;
1667	      emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1668      }
1669      else {
1670         /* Use SNE 0 if integers are being used as boolean values. */
1671         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1672      }
1673      break;
1674   }
1675
1676   case ir_binop_logic_xor:
1677      if (native_integers)
1678         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1679      else
1680         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1681      break;
1682
1683   case ir_binop_logic_or: {
1684      if (native_integers) {
1685         /* If integers are used as booleans, we can use an actual "or"
1686          * instruction.
1687          */
1688         assert(native_integers);
1689         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1690      } else {
1691         /* After the addition, the value will be an integer on the
1692          * range [0,2].  Zero stays zero, and positive values become 1.0.
1693          */
1694         glsl_to_tgsi_instruction *add =
1695            emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1696         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1697            /* The clamping to [0,1] can be done for free in the fragment
1698             * shader with a saturate if floats are being used as boolean values.
1699             */
1700            add->saturate = true;
1701         } else {
1702            /* Negating the result of the addition gives values on the range
1703             * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1704             * is achieved using SLT.
1705             */
1706            st_src_reg slt_src = result_src;
1707            slt_src.negate = ~slt_src.negate;
1708            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1709         }
1710      }
1711      break;
1712   }
1713
1714   case ir_binop_logic_and:
1715      /* If native integers are disabled, the bool args are stored as float 0.0
1716       * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
1717       * actual AND opcode.
1718       */
1719      if (native_integers)
1720         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1721      else
1722         emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1723      break;
1724
1725   case ir_binop_dot:
1726      assert(ir->operands[0]->type->is_vector());
1727      assert(ir->operands[0]->type == ir->operands[1]->type);
1728      emit_dp(ir, result_dst, op[0], op[1],
1729              ir->operands[0]->type->vector_elements);
1730      break;
1731
1732   case ir_unop_sqrt:
1733      /* sqrt(x) = x * rsq(x). */
1734      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1735      emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1736      /* For incoming channels <= 0, set the result to 0. */
1737      op[0].negate = ~op[0].negate;
1738      emit(ir, TGSI_OPCODE_CMP, result_dst,
1739        		  op[0], result_src, st_src_reg_for_float(0.0));
1740      break;
1741   case ir_unop_rsq:
1742      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1743      break;
1744   case ir_unop_i2f:
1745      if (native_integers) {
1746         emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1747         break;
1748      }
1749      /* fallthrough to next case otherwise */
1750   case ir_unop_b2f:
1751      if (native_integers) {
1752         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1753         break;
1754      }
1755      /* fallthrough to next case otherwise */
1756   case ir_unop_i2u:
1757   case ir_unop_u2i:
1758      /* Converting between signed and unsigned integers is a no-op. */
1759      result_src = op[0];
1760      break;
1761   case ir_unop_b2i:
1762      if (native_integers) {
1763         /* Booleans are stored as integers using ~0 for true and 0 for false.
1764          * GLSL requires that int(bool) return 1 for true and 0 for false.
1765          * This conversion is done with AND, but it could be done with NEG.
1766          */
1767         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1768      } else {
1769         /* Booleans and integers are both stored as floats when native
1770          * integers are disabled.
1771          */
1772         result_src = op[0];
1773      }
1774      break;
1775   case ir_unop_f2i:
1776      if (native_integers)
1777         emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1778      else
1779         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1780      break;
1781   case ir_unop_f2b:
1782      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1783      break;
1784   case ir_unop_i2b:
1785      if (native_integers)
1786         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1787      else
1788         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1789      break;
1790   case ir_unop_trunc:
1791      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1792      break;
1793   case ir_unop_ceil:
1794      op[0].negate = ~op[0].negate;
1795      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1796      result_src.negate = ~result_src.negate;
1797      break;
1798   case ir_unop_floor:
1799      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1800      break;
1801   case ir_unop_fract:
1802      emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1803      break;
1804
1805   case ir_binop_min:
1806      emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
1807      break;
1808   case ir_binop_max:
1809      emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
1810      break;
1811   case ir_binop_pow:
1812      emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
1813      break;
1814
1815   case ir_unop_bit_not:
1816      if (native_integers) {
1817         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1818         break;
1819      }
1820   case ir_unop_u2f:
1821      if (native_integers) {
1822         emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
1823         break;
1824      }
1825   case ir_binop_lshift:
1826      if (native_integers) {
1827         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
1828         break;
1829      }
1830   case ir_binop_rshift:
1831      if (native_integers) {
1832         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
1833         break;
1834      }
1835   case ir_binop_bit_and:
1836      if (native_integers) {
1837         emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
1838         break;
1839      }
1840   case ir_binop_bit_xor:
1841      if (native_integers) {
1842         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
1843         break;
1844      }
1845   case ir_binop_bit_or:
1846      if (native_integers) {
1847         emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
1848         break;
1849      }
1850   case ir_unop_round_even:
1851      assert(!"GLSL 1.30 features unsupported");
1852      break;
1853
1854   case ir_quadop_vector:
1855      /* This operation should have already been handled.
1856       */
1857      assert(!"Should not get here.");
1858      break;
1859   }
1860
1861   this->result = result_src;
1862}
1863
1864
1865void
1866glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
1867{
1868   st_src_reg src;
1869   int i;
1870   int swizzle[4];
1871
1872   /* Note that this is only swizzles in expressions, not those on the left
1873    * hand side of an assignment, which do write masking.  See ir_assignment
1874    * for that.
1875    */
1876
1877   ir->val->accept(this);
1878   src = this->result;
1879   assert(src.file != PROGRAM_UNDEFINED);
1880
1881   for (i = 0; i < 4; i++) {
1882      if (i < ir->type->vector_elements) {
1883         switch (i) {
1884         case 0:
1885            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1886            break;
1887         case 1:
1888            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1889            break;
1890         case 2:
1891            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1892            break;
1893         case 3:
1894            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1895            break;
1896         }
1897      } else {
1898         /* If the type is smaller than a vec4, replicate the last
1899          * channel out.
1900          */
1901         swizzle[i] = swizzle[ir->type->vector_elements - 1];
1902      }
1903   }
1904
1905   src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1906
1907   this->result = src;
1908}
1909
1910void
1911glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
1912{
1913   variable_storage *entry = find_variable_storage(ir->var);
1914   ir_variable *var = ir->var;
1915
1916   if (!entry) {
1917      switch (var->mode) {
1918      case ir_var_uniform:
1919         entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1920        				       var->location);
1921         this->variables.push_tail(entry);
1922         break;
1923      case ir_var_in:
1924      case ir_var_inout:
1925         /* The linker assigns locations for varyings and attributes,
1926          * including deprecated builtins (like gl_Color), user-assign
1927          * generic attributes (glBindVertexLocation), and
1928          * user-defined varyings.
1929          *
1930          * FINISHME: We would hit this path for function arguments.  Fix!
1931          */
1932         assert(var->location != -1);
1933         entry = new(mem_ctx) variable_storage(var,
1934                                               PROGRAM_INPUT,
1935                                               var->location);
1936         break;
1937      case ir_var_out:
1938         assert(var->location != -1);
1939         entry = new(mem_ctx) variable_storage(var,
1940                                               PROGRAM_OUTPUT,
1941                                               var->location);
1942         break;
1943      case ir_var_system_value:
1944         entry = new(mem_ctx) variable_storage(var,
1945                                               PROGRAM_SYSTEM_VALUE,
1946                                               var->location);
1947         break;
1948      case ir_var_auto:
1949      case ir_var_temporary:
1950         entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1951        				       this->next_temp);
1952         this->variables.push_tail(entry);
1953
1954         next_temp += type_size(var->type);
1955         break;
1956      }
1957
1958      if (!entry) {
1959         printf("Failed to make storage for %s\n", var->name);
1960         exit(1);
1961      }
1962   }
1963
1964   this->result = st_src_reg(entry->file, entry->index, var->type);
1965   if (!native_integers)
1966      this->result.type = GLSL_TYPE_FLOAT;
1967}
1968
1969void
1970glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
1971{
1972   ir_constant *index;
1973   st_src_reg src;
1974   int element_size = type_size(ir->type);
1975
1976   index = ir->array_index->constant_expression_value();
1977
1978   ir->array->accept(this);
1979   src = this->result;
1980
1981   if (index) {
1982      src.index += index->value.i[0] * element_size;
1983   } else {
1984      /* Variable index array dereference.  It eats the "vec4" of the
1985       * base of the array and an index that offsets the TGSI register
1986       * index.
1987       */
1988      ir->array_index->accept(this);
1989
1990      st_src_reg index_reg;
1991
1992      if (element_size == 1) {
1993         index_reg = this->result;
1994      } else {
1995         index_reg = get_temp(native_integers ?
1996                              glsl_type::int_type : glsl_type::float_type);
1997
1998         emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
1999              this->result, st_src_reg_for_type(index_reg.type, element_size));
2000      }
2001
2002      /* If there was already a relative address register involved, add the
2003       * new and the old together to get the new offset.
2004       */
2005      if (src.reladdr != NULL) {
2006         st_src_reg accum_reg = get_temp(native_integers ?
2007                                glsl_type::int_type : glsl_type::float_type);
2008
2009         emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
2010              index_reg, *src.reladdr);
2011
2012         index_reg = accum_reg;
2013      }
2014
2015      src.reladdr = ralloc(mem_ctx, st_src_reg);
2016      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2017   }
2018
2019   /* If the type is smaller than a vec4, replicate the last channel out. */
2020   if (ir->type->is_scalar() || ir->type->is_vector())
2021      src.swizzle = swizzle_for_size(ir->type->vector_elements);
2022   else
2023      src.swizzle = SWIZZLE_NOOP;
2024
2025   this->result = src;
2026}
2027
2028void
2029glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2030{
2031   unsigned int i;
2032   const glsl_type *struct_type = ir->record->type;
2033   int offset = 0;
2034
2035   ir->record->accept(this);
2036
2037   for (i = 0; i < struct_type->length; i++) {
2038      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2039         break;
2040      offset += type_size(struct_type->fields.structure[i].type);
2041   }
2042
2043   /* If the type is smaller than a vec4, replicate the last channel out. */
2044   if (ir->type->is_scalar() || ir->type->is_vector())
2045      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2046   else
2047      this->result.swizzle = SWIZZLE_NOOP;
2048
2049   this->result.index += offset;
2050}
2051
2052/**
2053 * We want to be careful in assignment setup to hit the actual storage
2054 * instead of potentially using a temporary like we might with the
2055 * ir_dereference handler.
2056 */
2057static st_dst_reg
2058get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
2059{
2060   /* The LHS must be a dereference.  If the LHS is a variable indexed array
2061    * access of a vector, it must be separated into a series conditional moves
2062    * before reaching this point (see ir_vec_index_to_cond_assign).
2063    */
2064   assert(ir->as_dereference());
2065   ir_dereference_array *deref_array = ir->as_dereference_array();
2066   if (deref_array) {
2067      assert(!deref_array->array->type->is_vector());
2068   }
2069
2070   /* Use the rvalue deref handler for the most part.  We'll ignore
2071    * swizzles in it and write swizzles using writemask, though.
2072    */
2073   ir->accept(v);
2074   return st_dst_reg(v->result);
2075}
2076
2077/**
2078 * Process the condition of a conditional assignment
2079 *
2080 * Examines the condition of a conditional assignment to generate the optimal
2081 * first operand of a \c CMP instruction.  If the condition is a relational
2082 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2083 * used as the source for the \c CMP instruction.  Otherwise the comparison
2084 * is processed to a boolean result, and the boolean result is used as the
2085 * operand to the CMP instruction.
2086 */
2087bool
2088glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2089{
2090   ir_rvalue *src_ir = ir;
2091   bool negate = true;
2092   bool switch_order = false;
2093
2094   ir_expression *const expr = ir->as_expression();
2095   if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2096      bool zero_on_left = false;
2097
2098      if (expr->operands[0]->is_zero()) {
2099         src_ir = expr->operands[1];
2100         zero_on_left = true;
2101      } else if (expr->operands[1]->is_zero()) {
2102         src_ir = expr->operands[0];
2103         zero_on_left = false;
2104      }
2105
2106      /*      a is -  0  +            -  0  +
2107       * (a <  0)  T  F  F  ( a < 0)  T  F  F
2108       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
2109       * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2110       * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2111       * (a >  0)  F  F  T  (-a < 0)  F  F  T
2112       * (0 >  a)  T  F  F  ( a < 0)  T  F  F
2113       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2114       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2115       *
2116       * Note that exchanging the order of 0 and 'a' in the comparison simply
2117       * means that the value of 'a' should be negated.
2118       */
2119      if (src_ir != ir) {
2120         switch (expr->operation) {
2121         case ir_binop_less:
2122            switch_order = false;
2123            negate = zero_on_left;
2124            break;
2125
2126         case ir_binop_greater:
2127            switch_order = false;
2128            negate = !zero_on_left;
2129            break;
2130
2131         case ir_binop_lequal:
2132            switch_order = true;
2133            negate = !zero_on_left;
2134            break;
2135
2136         case ir_binop_gequal:
2137            switch_order = true;
2138            negate = zero_on_left;
2139            break;
2140
2141         default:
2142            /* This isn't the right kind of comparison afterall, so make sure
2143             * the whole condition is visited.
2144             */
2145            src_ir = ir;
2146            break;
2147         }
2148      }
2149   }
2150
2151   src_ir->accept(this);
2152
2153   /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2154    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
2155    * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2156    * computing the condition.
2157    */
2158   if (negate)
2159      this->result.negate = ~this->result.negate;
2160
2161   return switch_order;
2162}
2163
2164void
2165glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2166{
2167   st_dst_reg l;
2168   st_src_reg r;
2169   int i;
2170
2171   ir->rhs->accept(this);
2172   r = this->result;
2173
2174   l = get_assignment_lhs(ir->lhs, this);
2175
2176   /* FINISHME: This should really set to the correct maximal writemask for each
2177    * FINISHME: component written (in the loops below).  This case can only
2178    * FINISHME: occur for matrices, arrays, and structures.
2179    */
2180   if (ir->write_mask == 0) {
2181      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2182      l.writemask = WRITEMASK_XYZW;
2183   } else if (ir->lhs->type->is_scalar() &&
2184              ir->lhs->variable_referenced()->mode == ir_var_out) {
2185      /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
2186       * FINISHME: W component of fragment shader output zero, work correctly.
2187       */
2188      l.writemask = WRITEMASK_XYZW;
2189   } else {
2190      int swizzles[4];
2191      int first_enabled_chan = 0;
2192      int rhs_chan = 0;
2193
2194      l.writemask = ir->write_mask;
2195
2196      for (int i = 0; i < 4; i++) {
2197         if (l.writemask & (1 << i)) {
2198            first_enabled_chan = GET_SWZ(r.swizzle, i);
2199            break;
2200         }
2201      }
2202
2203      /* Swizzle a small RHS vector into the channels being written.
2204       *
2205       * glsl ir treats write_mask as dictating how many channels are
2206       * present on the RHS while TGSI treats write_mask as just
2207       * showing which channels of the vec4 RHS get written.
2208       */
2209      for (int i = 0; i < 4; i++) {
2210         if (l.writemask & (1 << i))
2211            swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2212         else
2213            swizzles[i] = first_enabled_chan;
2214      }
2215      r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2216        			swizzles[2], swizzles[3]);
2217   }
2218
2219   assert(l.file != PROGRAM_UNDEFINED);
2220   assert(r.file != PROGRAM_UNDEFINED);
2221
2222   if (ir->condition) {
2223      const bool switch_order = this->process_move_condition(ir->condition);
2224      st_src_reg condition = this->result;
2225
2226      for (i = 0; i < type_size(ir->lhs->type); i++) {
2227         st_src_reg l_src = st_src_reg(l);
2228         st_src_reg condition_temp = condition;
2229         l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
2230
2231         if (native_integers) {
2232            /* This is necessary because TGSI's CMP instruction expects the
2233             * condition to be a float, and we store booleans as integers.
2234             * If TGSI had a UCMP instruction or similar, this extra
2235             * instruction would not be necessary.
2236             */
2237            condition_temp = get_temp(glsl_type::vec4_type);
2238            condition.negate = 0;
2239            emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
2240            condition_temp.swizzle = condition.swizzle;
2241         }
2242
2243         if (switch_order) {
2244            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
2245         } else {
2246            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
2247         }
2248
2249         l.index++;
2250         r.index++;
2251      }
2252   } else if (ir->rhs->as_expression() &&
2253              this->instructions.get_tail() &&
2254              ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2255              type_size(ir->lhs->type) == 1 &&
2256              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
2257      /* To avoid emitting an extra MOV when assigning an expression to a
2258       * variable, emit the last instruction of the expression again, but
2259       * replace the destination register with the target of the assignment.
2260       * Dead code elimination will remove the original instruction.
2261       */
2262      glsl_to_tgsi_instruction *inst, *new_inst;
2263      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2264      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
2265      new_inst->saturate = inst->saturate;
2266      inst->dead_mask = inst->dst.writemask;
2267   } else {
2268      for (i = 0; i < type_size(ir->lhs->type); i++) {
2269         emit(ir, TGSI_OPCODE_MOV, l, r);
2270         l.index++;
2271         r.index++;
2272      }
2273   }
2274}
2275
2276
2277void
2278glsl_to_tgsi_visitor::visit(ir_constant *ir)
2279{
2280   st_src_reg src;
2281   GLfloat stack_vals[4] = { 0 };
2282   gl_constant_value *values = (gl_constant_value *) stack_vals;
2283   GLenum gl_type = GL_NONE;
2284   unsigned int i;
2285   static int in_array = 0;
2286   gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
2287
2288   /* Unfortunately, 4 floats is all we can get into
2289    * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
2290    * aggregate constant and move each constant value into it.  If we
2291    * get lucky, copy propagation will eliminate the extra moves.
2292    */
2293   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2294      st_src_reg temp_base = get_temp(ir->type);
2295      st_dst_reg temp = st_dst_reg(temp_base);
2296
2297      foreach_iter(exec_list_iterator, iter, ir->components) {
2298         ir_constant *field_value = (ir_constant *)iter.get();
2299         int size = type_size(field_value->type);
2300
2301         assert(size > 0);
2302
2303         field_value->accept(this);
2304         src = this->result;
2305
2306         for (i = 0; i < (unsigned int)size; i++) {
2307            emit(ir, TGSI_OPCODE_MOV, temp, src);
2308
2309            src.index++;
2310            temp.index++;
2311         }
2312      }
2313      this->result = temp_base;
2314      return;
2315   }
2316
2317   if (ir->type->is_array()) {
2318      st_src_reg temp_base = get_temp(ir->type);
2319      st_dst_reg temp = st_dst_reg(temp_base);
2320      int size = type_size(ir->type->fields.array);
2321
2322      assert(size > 0);
2323      in_array++;
2324
2325      for (i = 0; i < ir->type->length; i++) {
2326         ir->array_elements[i]->accept(this);
2327         src = this->result;
2328         for (int j = 0; j < size; j++) {
2329            emit(ir, TGSI_OPCODE_MOV, temp, src);
2330
2331            src.index++;
2332            temp.index++;
2333         }
2334      }
2335      this->result = temp_base;
2336      in_array--;
2337      return;
2338   }
2339
2340   if (ir->type->is_matrix()) {
2341      st_src_reg mat = get_temp(ir->type);
2342      st_dst_reg mat_column = st_dst_reg(mat);
2343
2344      for (i = 0; i < ir->type->matrix_columns; i++) {
2345         assert(ir->type->base_type == GLSL_TYPE_FLOAT);
2346         values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
2347
2348         src = st_src_reg(file, -1, ir->type->base_type);
2349         src.index = add_constant(file,
2350                                  values,
2351                                  ir->type->vector_elements,
2352                                  GL_FLOAT,
2353                                  &src.swizzle);
2354         emit(ir, TGSI_OPCODE_MOV, mat_column, src);
2355
2356         mat_column.index++;
2357      }
2358
2359      this->result = mat;
2360      return;
2361   }
2362
2363   switch (ir->type->base_type) {
2364   case GLSL_TYPE_FLOAT:
2365      gl_type = GL_FLOAT;
2366      for (i = 0; i < ir->type->vector_elements; i++) {
2367         values[i].f = ir->value.f[i];
2368      }
2369      break;
2370   case GLSL_TYPE_UINT:
2371      gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
2372      for (i = 0; i < ir->type->vector_elements; i++) {
2373         if (native_integers)
2374            values[i].u = ir->value.u[i];
2375         else
2376            values[i].f = ir->value.u[i];
2377      }
2378      break;
2379   case GLSL_TYPE_INT:
2380      gl_type = native_integers ? GL_INT : GL_FLOAT;
2381      for (i = 0; i < ir->type->vector_elements; i++) {
2382         if (native_integers)
2383            values[i].i = ir->value.i[i];
2384         else
2385            values[i].f = ir->value.i[i];
2386      }
2387      break;
2388   case GLSL_TYPE_BOOL:
2389      gl_type = native_integers ? GL_BOOL : GL_FLOAT;
2390      for (i = 0; i < ir->type->vector_elements; i++) {
2391         if (native_integers)
2392            values[i].b = ir->value.b[i];
2393         else
2394            values[i].f = ir->value.b[i];
2395      }
2396      break;
2397   default:
2398      assert(!"Non-float/uint/int/bool constant");
2399   }
2400
2401   this->result = st_src_reg(file, -1, ir->type);
2402   this->result.index = add_constant(file,
2403                                     values,
2404                                     ir->type->vector_elements,
2405                                     gl_type,
2406                                     &this->result.swizzle);
2407}
2408
2409function_entry *
2410glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
2411{
2412   function_entry *entry;
2413
2414   foreach_iter(exec_list_iterator, iter, this->function_signatures) {
2415      entry = (function_entry *)iter.get();
2416
2417      if (entry->sig == sig)
2418         return entry;
2419   }
2420
2421   entry = ralloc(mem_ctx, function_entry);
2422   entry->sig = sig;
2423   entry->sig_id = this->next_signature_id++;
2424   entry->bgn_inst = NULL;
2425
2426   /* Allocate storage for all the parameters. */
2427   foreach_iter(exec_list_iterator, iter, sig->parameters) {
2428      ir_variable *param = (ir_variable *)iter.get();
2429      variable_storage *storage;
2430
2431      storage = find_variable_storage(param);
2432      assert(!storage);
2433
2434      storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
2435        				      this->next_temp);
2436      this->variables.push_tail(storage);
2437
2438      this->next_temp += type_size(param->type);
2439   }
2440
2441   if (!sig->return_type->is_void()) {
2442      entry->return_reg = get_temp(sig->return_type);
2443   } else {
2444      entry->return_reg = undef_src;
2445   }
2446
2447   this->function_signatures.push_tail(entry);
2448   return entry;
2449}
2450
2451void
2452glsl_to_tgsi_visitor::visit(ir_call *ir)
2453{
2454   glsl_to_tgsi_instruction *call_inst;
2455   ir_function_signature *sig = ir->get_callee();
2456   function_entry *entry = get_function_signature(sig);
2457   int i;
2458
2459   /* Process in parameters. */
2460   exec_list_iterator sig_iter = sig->parameters.iterator();
2461   foreach_iter(exec_list_iterator, iter, *ir) {
2462      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2463      ir_variable *param = (ir_variable *)sig_iter.get();
2464
2465      if (param->mode == ir_var_in ||
2466          param->mode == ir_var_inout) {
2467         variable_storage *storage = find_variable_storage(param);
2468         assert(storage);
2469
2470         param_rval->accept(this);
2471         st_src_reg r = this->result;
2472
2473         st_dst_reg l;
2474         l.file = storage->file;
2475         l.index = storage->index;
2476         l.reladdr = NULL;
2477         l.writemask = WRITEMASK_XYZW;
2478         l.cond_mask = COND_TR;
2479
2480         for (i = 0; i < type_size(param->type); i++) {
2481            emit(ir, TGSI_OPCODE_MOV, l, r);
2482            l.index++;
2483            r.index++;
2484         }
2485      }
2486
2487      sig_iter.next();
2488   }
2489   assert(!sig_iter.has_next());
2490
2491   /* Emit call instruction */
2492   call_inst = emit(ir, TGSI_OPCODE_CAL);
2493   call_inst->function = entry;
2494
2495   /* Process out parameters. */
2496   sig_iter = sig->parameters.iterator();
2497   foreach_iter(exec_list_iterator, iter, *ir) {
2498      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2499      ir_variable *param = (ir_variable *)sig_iter.get();
2500
2501      if (param->mode == ir_var_out ||
2502          param->mode == ir_var_inout) {
2503         variable_storage *storage = find_variable_storage(param);
2504         assert(storage);
2505
2506         st_src_reg r;
2507         r.file = storage->file;
2508         r.index = storage->index;
2509         r.reladdr = NULL;
2510         r.swizzle = SWIZZLE_NOOP;
2511         r.negate = 0;
2512
2513         param_rval->accept(this);
2514         st_dst_reg l = st_dst_reg(this->result);
2515
2516         for (i = 0; i < type_size(param->type); i++) {
2517            emit(ir, TGSI_OPCODE_MOV, l, r);
2518            l.index++;
2519            r.index++;
2520         }
2521      }
2522
2523      sig_iter.next();
2524   }
2525   assert(!sig_iter.has_next());
2526
2527   /* Process return value. */
2528   this->result = entry->return_reg;
2529}
2530
2531void
2532glsl_to_tgsi_visitor::visit(ir_texture *ir)
2533{
2534   st_src_reg result_src, coord, lod_info, projector, dx, dy, offset;
2535   st_dst_reg result_dst, coord_dst;
2536   glsl_to_tgsi_instruction *inst = NULL;
2537   unsigned opcode = TGSI_OPCODE_NOP;
2538
2539   if (ir->coordinate) {
2540      ir->coordinate->accept(this);
2541
2542      /* Put our coords in a temp.  We'll need to modify them for shadow,
2543       * projection, or LOD, so the only case we'd use it as is is if
2544       * we're doing plain old texturing.  The optimization passes on
2545       * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
2546       */
2547      coord = get_temp(glsl_type::vec4_type);
2548      coord_dst = st_dst_reg(coord);
2549      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2550   }
2551
2552   if (ir->projector) {
2553      ir->projector->accept(this);
2554      projector = this->result;
2555   }
2556
2557   /* Storage for our result.  Ideally for an assignment we'd be using
2558    * the actual storage for the result here, instead.
2559    */
2560   result_src = get_temp(glsl_type::vec4_type);
2561   result_dst = st_dst_reg(result_src);
2562
2563   switch (ir->op) {
2564   case ir_tex:
2565      opcode = TGSI_OPCODE_TEX;
2566      break;
2567   case ir_txb:
2568      opcode = TGSI_OPCODE_TXB;
2569      ir->lod_info.bias->accept(this);
2570      lod_info = this->result;
2571      break;
2572   case ir_txl:
2573      opcode = TGSI_OPCODE_TXL;
2574      ir->lod_info.lod->accept(this);
2575      lod_info = this->result;
2576      break;
2577   case ir_txd:
2578      opcode = TGSI_OPCODE_TXD;
2579      ir->lod_info.grad.dPdx->accept(this);
2580      dx = this->result;
2581      ir->lod_info.grad.dPdy->accept(this);
2582      dy = this->result;
2583      break;
2584   case ir_txs:
2585      opcode = TGSI_OPCODE_TXQ;
2586      ir->lod_info.lod->accept(this);
2587      lod_info = this->result;
2588      break;
2589   case ir_txf:
2590      opcode = TGSI_OPCODE_TXF;
2591      ir->lod_info.lod->accept(this);
2592      lod_info = this->result;
2593      if (ir->offset) {
2594	 ir->offset->accept(this);
2595	 offset = this->result;
2596      }
2597      break;
2598   }
2599
2600   const glsl_type *sampler_type = ir->sampler->type;
2601
2602   if (ir->projector) {
2603      if (opcode == TGSI_OPCODE_TEX) {
2604         /* Slot the projector in as the last component of the coord. */
2605         coord_dst.writemask = WRITEMASK_W;
2606         emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
2607         coord_dst.writemask = WRITEMASK_XYZW;
2608         opcode = TGSI_OPCODE_TXP;
2609      } else {
2610         st_src_reg coord_w = coord;
2611         coord_w.swizzle = SWIZZLE_WWWW;
2612
2613         /* For the other TEX opcodes there's no projective version
2614          * since the last slot is taken up by LOD info.  Do the
2615          * projective divide now.
2616          */
2617         coord_dst.writemask = WRITEMASK_W;
2618         emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
2619
2620         /* In the case where we have to project the coordinates "by hand,"
2621          * the shadow comparator value must also be projected.
2622          */
2623         st_src_reg tmp_src = coord;
2624         if (ir->shadow_comparitor) {
2625            /* Slot the shadow value in as the second to last component of the
2626             * coord.
2627             */
2628            ir->shadow_comparitor->accept(this);
2629
2630            tmp_src = get_temp(glsl_type::vec4_type);
2631            st_dst_reg tmp_dst = st_dst_reg(tmp_src);
2632
2633	    /* Projective division not allowed for array samplers. */
2634	    assert(!sampler_type->sampler_array);
2635
2636            tmp_dst.writemask = WRITEMASK_Z;
2637            emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
2638
2639            tmp_dst.writemask = WRITEMASK_XY;
2640            emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
2641         }
2642
2643         coord_dst.writemask = WRITEMASK_XYZ;
2644         emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
2645
2646         coord_dst.writemask = WRITEMASK_XYZW;
2647         coord.swizzle = SWIZZLE_XYZW;
2648      }
2649   }
2650
2651   /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
2652    * comparator was put in the correct place (and projected) by the code,
2653    * above, that handles by-hand projection.
2654    */
2655   if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
2656      /* Slot the shadow value in as the second to last component of the
2657       * coord.
2658       */
2659      ir->shadow_comparitor->accept(this);
2660
2661      /* XXX This will need to be updated for cubemap array samplers. */
2662      if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
2663          sampler_type->sampler_array) {
2664         coord_dst.writemask = WRITEMASK_W;
2665      } else {
2666         coord_dst.writemask = WRITEMASK_Z;
2667      }
2668
2669      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2670      coord_dst.writemask = WRITEMASK_XYZW;
2671   }
2672
2673   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
2674       opcode == TGSI_OPCODE_TXF) {
2675      /* TGSI stores LOD or LOD bias in the last channel of the coords. */
2676      coord_dst.writemask = WRITEMASK_W;
2677      emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
2678      coord_dst.writemask = WRITEMASK_XYZW;
2679   }
2680
2681   if (opcode == TGSI_OPCODE_TXD)
2682      inst = emit(ir, opcode, result_dst, coord, dx, dy);
2683   else if (opcode == TGSI_OPCODE_TXQ)
2684      inst = emit(ir, opcode, result_dst, lod_info);
2685   else if (opcode == TGSI_OPCODE_TXF) {
2686      inst = emit(ir, opcode, result_dst, coord);
2687   } else
2688      inst = emit(ir, opcode, result_dst, coord);
2689
2690   if (ir->shadow_comparitor)
2691      inst->tex_shadow = GL_TRUE;
2692
2693   inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2694        					   this->shader_program,
2695        					   this->prog);
2696
2697   if (ir->offset) {
2698       inst->tex_offset_num_offset = 1;
2699       inst->tex_offsets[0].Index = offset.index;
2700       inst->tex_offsets[0].File = offset.file;
2701       inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
2702       inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
2703       inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
2704   }
2705
2706   switch (sampler_type->sampler_dimensionality) {
2707   case GLSL_SAMPLER_DIM_1D:
2708      inst->tex_target = (sampler_type->sampler_array)
2709         ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2710      break;
2711   case GLSL_SAMPLER_DIM_2D:
2712      inst->tex_target = (sampler_type->sampler_array)
2713         ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2714      break;
2715   case GLSL_SAMPLER_DIM_3D:
2716      inst->tex_target = TEXTURE_3D_INDEX;
2717      break;
2718   case GLSL_SAMPLER_DIM_CUBE:
2719      inst->tex_target = TEXTURE_CUBE_INDEX;
2720      break;
2721   case GLSL_SAMPLER_DIM_RECT:
2722      inst->tex_target = TEXTURE_RECT_INDEX;
2723      break;
2724   case GLSL_SAMPLER_DIM_BUF:
2725      assert(!"FINISHME: Implement ARB_texture_buffer_object");
2726      break;
2727   default:
2728      assert(!"Should not get here.");
2729   }
2730
2731   this->result = result_src;
2732}
2733
2734void
2735glsl_to_tgsi_visitor::visit(ir_return *ir)
2736{
2737   if (ir->get_value()) {
2738      st_dst_reg l;
2739      int i;
2740
2741      assert(current_function);
2742
2743      ir->get_value()->accept(this);
2744      st_src_reg r = this->result;
2745
2746      l = st_dst_reg(current_function->return_reg);
2747
2748      for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2749         emit(ir, TGSI_OPCODE_MOV, l, r);
2750         l.index++;
2751         r.index++;
2752      }
2753   }
2754
2755   emit(ir, TGSI_OPCODE_RET);
2756}
2757
2758void
2759glsl_to_tgsi_visitor::visit(ir_discard *ir)
2760{
2761   struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2762
2763   if (ir->condition) {
2764      ir->condition->accept(this);
2765      this->result.negate = ~this->result.negate;
2766      emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
2767   } else {
2768      emit(ir, TGSI_OPCODE_KILP);
2769   }
2770
2771   fp->UsesKill = GL_TRUE;
2772}
2773
2774void
2775glsl_to_tgsi_visitor::visit(ir_if *ir)
2776{
2777   glsl_to_tgsi_instruction *cond_inst, *if_inst;
2778   glsl_to_tgsi_instruction *prev_inst;
2779
2780   prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2781
2782   ir->condition->accept(this);
2783   assert(this->result.file != PROGRAM_UNDEFINED);
2784
2785   if (this->options->EmitCondCodes) {
2786      cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2787
2788      /* See if we actually generated any instruction for generating
2789       * the condition.  If not, then cook up a move to a temp so we
2790       * have something to set cond_update on.
2791       */
2792      if (cond_inst == prev_inst) {
2793         st_src_reg temp = get_temp(glsl_type::bool_type);
2794         cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
2795      }
2796      cond_inst->cond_update = GL_TRUE;
2797
2798      if_inst = emit(ir->condition, TGSI_OPCODE_IF);
2799      if_inst->dst.cond_mask = COND_NE;
2800   } else {
2801      if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
2802   }
2803
2804   this->instructions.push_tail(if_inst);
2805
2806   visit_exec_list(&ir->then_instructions, this);
2807
2808   if (!ir->else_instructions.is_empty()) {
2809      emit(ir->condition, TGSI_OPCODE_ELSE);
2810      visit_exec_list(&ir->else_instructions, this);
2811   }
2812
2813   if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
2814}
2815
2816glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
2817{
2818   result.file = PROGRAM_UNDEFINED;
2819   next_temp = 1;
2820   next_signature_id = 1;
2821   num_immediates = 0;
2822   current_function = NULL;
2823   num_address_regs = 0;
2824   indirect_addr_temps = false;
2825   indirect_addr_consts = false;
2826   mem_ctx = ralloc_context(NULL);
2827}
2828
2829glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
2830{
2831   ralloc_free(mem_ctx);
2832}
2833
2834extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
2835{
2836   delete v;
2837}
2838
2839
2840/**
2841 * Count resources used by the given gpu program (number of texture
2842 * samplers, etc).
2843 */
2844static void
2845count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
2846{
2847   v->samplers_used = 0;
2848
2849   foreach_iter(exec_list_iterator, iter, v->instructions) {
2850      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2851
2852      if (is_tex_instruction(inst->op)) {
2853         v->samplers_used |= 1 << inst->sampler;
2854
2855         prog->SamplerTargets[inst->sampler] =
2856            (gl_texture_index)inst->tex_target;
2857         if (inst->tex_shadow) {
2858            prog->ShadowSamplers |= 1 << inst->sampler;
2859         }
2860      }
2861   }
2862
2863   prog->SamplersUsed = v->samplers_used;
2864   _mesa_update_shader_textures_used(prog);
2865}
2866
2867
2868/**
2869 * Check if the given vertex/fragment/shader program is within the
2870 * resource limits of the context (number of texture units, etc).
2871 * If any of those checks fail, record a linker error.
2872 *
2873 * XXX more checks are needed...
2874 */
2875static void
2876check_resources(const struct gl_context *ctx,
2877                struct gl_shader_program *shader_program,
2878                glsl_to_tgsi_visitor *prog,
2879                struct gl_program *proginfo)
2880{
2881   switch (proginfo->Target) {
2882   case GL_VERTEX_PROGRAM_ARB:
2883      if (_mesa_bitcount(prog->samplers_used) >
2884          ctx->Const.MaxVertexTextureImageUnits) {
2885         fail_link(shader_program, "Too many vertex shader texture samplers");
2886      }
2887      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2888         fail_link(shader_program, "Too many vertex shader constants");
2889      }
2890      break;
2891   case MESA_GEOMETRY_PROGRAM:
2892      if (_mesa_bitcount(prog->samplers_used) >
2893          ctx->Const.MaxGeometryTextureImageUnits) {
2894         fail_link(shader_program, "Too many geometry shader texture samplers");
2895      }
2896      if (proginfo->Parameters->NumParameters >
2897          MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
2898         fail_link(shader_program, "Too many geometry shader constants");
2899      }
2900      break;
2901   case GL_FRAGMENT_PROGRAM_ARB:
2902      if (_mesa_bitcount(prog->samplers_used) >
2903          ctx->Const.MaxTextureImageUnits) {
2904         fail_link(shader_program, "Too many fragment shader texture samplers");
2905      }
2906      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2907         fail_link(shader_program, "Too many fragment shader constants");
2908      }
2909      break;
2910   default:
2911      _mesa_problem(ctx, "unexpected program type in check_resources()");
2912   }
2913}
2914
2915
2916
2917struct uniform_sort {
2918   struct gl_uniform *u;
2919   int pos;
2920};
2921
2922/* The shader_program->Uniforms list is almost sorted in increasing
2923 * uniform->{Frag,Vert}Pos locations, but not quite when there are
2924 * uniforms shared between targets.  We need to add parameters in
2925 * increasing order for the targets.
2926 */
2927static int
2928sort_uniforms(const void *a, const void *b)
2929{
2930   struct uniform_sort *u1 = (struct uniform_sort *)a;
2931   struct uniform_sort *u2 = (struct uniform_sort *)b;
2932
2933   return u1->pos - u2->pos;
2934}
2935
2936/* Add the uniforms to the parameters.  The linker chose locations
2937 * in our parameters lists (which weren't created yet), which the
2938 * uniforms code will use to poke values into our parameters list
2939 * when uniforms are updated.
2940 */
2941static void
2942add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
2943        			struct gl_shader *shader,
2944        			struct gl_program *prog)
2945{
2946   unsigned int i;
2947   unsigned int next_sampler = 0, num_uniforms = 0;
2948   struct uniform_sort *sorted_uniforms;
2949
2950   sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
2951        			  shader_program->Uniforms->NumUniforms);
2952
2953   for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
2954      struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
2955      int parameter_index = -1;
2956
2957      switch (shader->Type) {
2958      case GL_VERTEX_SHADER:
2959         parameter_index = uniform->VertPos;
2960         break;
2961      case GL_FRAGMENT_SHADER:
2962         parameter_index = uniform->FragPos;
2963         break;
2964      case GL_GEOMETRY_SHADER:
2965         parameter_index = uniform->GeomPos;
2966         break;
2967      }
2968
2969      /* Only add uniforms used in our target. */
2970      if (parameter_index != -1) {
2971         sorted_uniforms[num_uniforms].pos = parameter_index;
2972         sorted_uniforms[num_uniforms].u = uniform;
2973         num_uniforms++;
2974      }
2975   }
2976
2977   qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
2978         sort_uniforms);
2979
2980   for (i = 0; i < num_uniforms; i++) {
2981      struct gl_uniform *uniform = sorted_uniforms[i].u;
2982      int parameter_index = sorted_uniforms[i].pos;
2983      const glsl_type *type = uniform->Type;
2984      unsigned int size;
2985
2986      if (type->is_vector() ||
2987          type->is_scalar()) {
2988         size = type->vector_elements;
2989      } else {
2990         size = type_size(type) * 4;
2991      }
2992
2993      gl_register_file file;
2994      if (type->is_sampler() ||
2995          (type->is_array() && type->fields.array->is_sampler())) {
2996         file = PROGRAM_SAMPLER;
2997      } else {
2998         file = PROGRAM_UNIFORM;
2999      }
3000
3001      GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
3002        					 uniform->Name);
3003
3004      if (index < 0) {
3005         index = _mesa_add_parameter(prog->Parameters, file,
3006        			     uniform->Name, size, type->gl_type,
3007        			     NULL, NULL, 0x0);
3008
3009         /* Sampler uniform values are stored in prog->SamplerUnits,
3010          * and the entry in that array is selected by this index we
3011          * store in ParameterValues[].
3012          */
3013         if (file == PROGRAM_SAMPLER) {
3014            for (unsigned int j = 0; j < size / 4; j++)
3015               prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
3016         }
3017
3018         /* The location chosen in the Parameters list here (returned
3019          * from _mesa_add_uniform) has to match what the linker chose.
3020          */
3021         if (index != parameter_index) {
3022            fail_link(shader_program, "Allocation of uniform `%s' to target "
3023        	      "failed (%d vs %d)\n",
3024        	      uniform->Name, index, parameter_index);
3025         }
3026      }
3027   }
3028
3029   ralloc_free(sorted_uniforms);
3030}
3031
3032static void
3033set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
3034        		struct gl_shader_program *shader_program,
3035        		const char *name, const glsl_type *type,
3036        		ir_constant *val)
3037{
3038   if (type->is_record()) {
3039      ir_constant *field_constant;
3040
3041      field_constant = (ir_constant *)val->components.get_head();
3042
3043      for (unsigned int i = 0; i < type->length; i++) {
3044         const glsl_type *field_type = type->fields.structure[i].type;
3045         const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
3046        				    type->fields.structure[i].name);
3047         set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
3048        			 field_type, field_constant);
3049         field_constant = (ir_constant *)field_constant->next;
3050      }
3051      return;
3052   }
3053
3054   int loc = _mesa_get_uniform_location(ctx, shader_program, name);
3055
3056   if (loc == -1) {
3057      fail_link(shader_program,
3058        	"Couldn't find uniform for initializer %s\n", name);
3059      return;
3060   }
3061
3062   for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
3063      ir_constant *element;
3064      const glsl_type *element_type;
3065      if (type->is_array()) {
3066         element = val->array_elements[i];
3067         element_type = type->fields.array;
3068      } else {
3069         element = val;
3070         element_type = type;
3071      }
3072
3073      void *values;
3074
3075      if (element_type->base_type == GLSL_TYPE_BOOL) {
3076         int *conv = ralloc_array(mem_ctx, int, element_type->components());
3077         for (unsigned int j = 0; j < element_type->components(); j++) {
3078            conv[j] = element->value.b[j];
3079         }
3080         values = (void *)conv;
3081         element_type = glsl_type::get_instance(GLSL_TYPE_INT,
3082        					element_type->vector_elements,
3083        					1);
3084      } else {
3085         values = &element->value;
3086      }
3087
3088      if (element_type->is_matrix()) {
3089         _mesa_uniform_matrix(ctx, shader_program,
3090        		      element_type->matrix_columns,
3091        		      element_type->vector_elements,
3092        		      loc, 1, GL_FALSE, (GLfloat *)values);
3093         loc += element_type->matrix_columns;
3094      } else {
3095         _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
3096        	       values, element_type->gl_type);
3097         loc += type_size(element_type);
3098      }
3099   }
3100}
3101
3102/*
3103 * Scan/rewrite program to remove reads of custom (output) registers.
3104 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
3105 * (for vertex shaders).
3106 * In GLSL shaders, varying vars can be read and written.
3107 * On some hardware, trying to read an output register causes trouble.
3108 * So, rewrite the program to use a temporary register in this case.
3109 *
3110 * Based on _mesa_remove_output_reads from programopt.c.
3111 */
3112void
3113glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
3114{
3115   GLuint i;
3116   GLint outputMap[VERT_RESULT_MAX];
3117   GLint outputTypes[VERT_RESULT_MAX];
3118   GLuint numVaryingReads = 0;
3119   GLboolean usedTemps[MAX_TEMPS];
3120   GLuint firstTemp = 0;
3121
3122   _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
3123                             usedTemps, MAX_TEMPS);
3124
3125   assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
3126   assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
3127
3128   for (i = 0; i < VERT_RESULT_MAX; i++)
3129      outputMap[i] = -1;
3130
3131   /* look for instructions which read from varying vars */
3132   foreach_iter(exec_list_iterator, iter, this->instructions) {
3133      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3134      const GLuint numSrc = num_inst_src_regs(inst->op);
3135      GLuint j;
3136      for (j = 0; j < numSrc; j++) {
3137         if (inst->src[j].file == type) {
3138            /* replace the read with a temp reg */
3139            const GLuint var = inst->src[j].index;
3140            if (outputMap[var] == -1) {
3141               numVaryingReads++;
3142               outputMap[var] = _mesa_find_free_register(usedTemps,
3143                                                         MAX_TEMPS,
3144                                                         firstTemp);
3145               outputTypes[var] = inst->src[j].type;
3146               firstTemp = outputMap[var] + 1;
3147            }
3148            inst->src[j].file = PROGRAM_TEMPORARY;
3149            inst->src[j].index = outputMap[var];
3150         }
3151      }
3152   }
3153
3154   if (numVaryingReads == 0)
3155      return; /* nothing to be done */
3156
3157   /* look for instructions which write to the varying vars identified above */
3158   foreach_iter(exec_list_iterator, iter, this->instructions) {
3159      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3160      if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
3161         /* change inst to write to the temp reg, instead of the varying */
3162         inst->dst.file = PROGRAM_TEMPORARY;
3163         inst->dst.index = outputMap[inst->dst.index];
3164      }
3165   }
3166
3167   /* insert new MOV instructions at the end */
3168   for (i = 0; i < VERT_RESULT_MAX; i++) {
3169      if (outputMap[i] >= 0) {
3170         /* MOV VAR[i], TEMP[tmp]; */
3171         st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
3172         st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
3173         dst.index = i;
3174         this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
3175      }
3176   }
3177}
3178
3179/**
3180 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
3181 * are read from the given src in this instruction
3182 */
3183static int
3184get_src_arg_mask(st_dst_reg dst, st_src_reg src)
3185{
3186   int read_mask = 0, comp;
3187
3188   /* Now, given the src swizzle and the written channels, find which
3189    * components are actually read
3190    */
3191   for (comp = 0; comp < 4; ++comp) {
3192      const unsigned coord = GET_SWZ(src.swizzle, comp);
3193      ASSERT(coord < 4);
3194      if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
3195         read_mask |= 1 << coord;
3196   }
3197
3198   return read_mask;
3199}
3200
3201/**
3202 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
3203 * instruction is the first instruction to write to register T0.  There are
3204 * several lowering passes done in GLSL IR (e.g. branches and
3205 * relative addressing) that create a large number of conditional assignments
3206 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
3207 *
3208 * Here is why this conversion is safe:
3209 * CMP T0, T1 T2 T0 can be expanded to:
3210 * if (T1 < 0.0)
3211 * 	MOV T0, T2;
3212 * else
3213 * 	MOV T0, T0;
3214 *
3215 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
3216 * as the original program.  If (T1 < 0.0) evaluates to false, executing
3217 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
3218 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
3219 * because any instruction that was going to read from T0 after this was going
3220 * to read a garbage value anyway.
3221 */
3222void
3223glsl_to_tgsi_visitor::simplify_cmp(void)
3224{
3225   unsigned tempWrites[MAX_TEMPS];
3226   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
3227
3228   memset(tempWrites, 0, sizeof(tempWrites));
3229   memset(outputWrites, 0, sizeof(outputWrites));
3230
3231   foreach_iter(exec_list_iterator, iter, this->instructions) {
3232      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3233      unsigned prevWriteMask = 0;
3234
3235      /* Give up if we encounter relative addressing or flow control. */
3236      if (inst->dst.reladdr ||
3237          tgsi_get_opcode_info(inst->op)->is_branch ||
3238          inst->op == TGSI_OPCODE_BGNSUB ||
3239          inst->op == TGSI_OPCODE_CONT ||
3240          inst->op == TGSI_OPCODE_END ||
3241          inst->op == TGSI_OPCODE_ENDSUB ||
3242          inst->op == TGSI_OPCODE_RET) {
3243         return;
3244      }
3245
3246      if (inst->dst.file == PROGRAM_OUTPUT) {
3247         assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
3248         prevWriteMask = outputWrites[inst->dst.index];
3249         outputWrites[inst->dst.index] |= inst->dst.writemask;
3250      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
3251         assert(inst->dst.index < MAX_TEMPS);
3252         prevWriteMask = tempWrites[inst->dst.index];
3253         tempWrites[inst->dst.index] |= inst->dst.writemask;
3254      }
3255
3256      /* For a CMP to be considered a conditional write, the destination
3257       * register and source register two must be the same. */
3258      if (inst->op == TGSI_OPCODE_CMP
3259          && !(inst->dst.writemask & prevWriteMask)
3260          && inst->src[2].file == inst->dst.file
3261          && inst->src[2].index == inst->dst.index
3262          && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
3263
3264         inst->op = TGSI_OPCODE_MOV;
3265         inst->src[0] = inst->src[1];
3266      }
3267   }
3268}
3269
3270/* Replaces all references to a temporary register index with another index. */
3271void
3272glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
3273{
3274   foreach_iter(exec_list_iterator, iter, this->instructions) {
3275      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3276      unsigned j;
3277
3278      for (j=0; j < num_inst_src_regs(inst->op); j++) {
3279         if (inst->src[j].file == PROGRAM_TEMPORARY &&
3280             inst->src[j].index == index) {
3281            inst->src[j].index = new_index;
3282         }
3283      }
3284
3285      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3286         inst->dst.index = new_index;
3287      }
3288   }
3289}
3290
3291int
3292glsl_to_tgsi_visitor::get_first_temp_read(int index)
3293{
3294   int depth = 0; /* loop depth */
3295   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3296   unsigned i = 0, j;
3297
3298   foreach_iter(exec_list_iterator, iter, this->instructions) {
3299      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3300
3301      for (j=0; j < num_inst_src_regs(inst->op); j++) {
3302         if (inst->src[j].file == PROGRAM_TEMPORARY &&
3303             inst->src[j].index == index) {
3304            return (depth == 0) ? i : loop_start;
3305         }
3306      }
3307
3308      if (inst->op == TGSI_OPCODE_BGNLOOP) {
3309         if(depth++ == 0)
3310            loop_start = i;
3311      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3312         if (--depth == 0)
3313            loop_start = -1;
3314      }
3315      assert(depth >= 0);
3316
3317      i++;
3318   }
3319
3320   return -1;
3321}
3322
3323int
3324glsl_to_tgsi_visitor::get_first_temp_write(int index)
3325{
3326   int depth = 0; /* loop depth */
3327   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3328   int i = 0;
3329
3330   foreach_iter(exec_list_iterator, iter, this->instructions) {
3331      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3332
3333      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3334         return (depth == 0) ? i : loop_start;
3335      }
3336
3337      if (inst->op == TGSI_OPCODE_BGNLOOP) {
3338         if(depth++ == 0)
3339            loop_start = i;
3340      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3341         if (--depth == 0)
3342            loop_start = -1;
3343      }
3344      assert(depth >= 0);
3345
3346      i++;
3347   }
3348
3349   return -1;
3350}
3351
3352int
3353glsl_to_tgsi_visitor::get_last_temp_read(int index)
3354{
3355   int depth = 0; /* loop depth */
3356   int last = -1; /* index of last instruction that reads the temporary */
3357   unsigned i = 0, j;
3358
3359   foreach_iter(exec_list_iterator, iter, this->instructions) {
3360      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3361
3362      for (j=0; j < num_inst_src_regs(inst->op); j++) {
3363         if (inst->src[j].file == PROGRAM_TEMPORARY &&
3364             inst->src[j].index == index) {
3365            last = (depth == 0) ? i : -2;
3366         }
3367      }
3368
3369      if (inst->op == TGSI_OPCODE_BGNLOOP)
3370         depth++;
3371      else if (inst->op == TGSI_OPCODE_ENDLOOP)
3372         if (--depth == 0 && last == -2)
3373            last = i;
3374      assert(depth >= 0);
3375
3376      i++;
3377   }
3378
3379   assert(last >= -1);
3380   return last;
3381}
3382
3383int
3384glsl_to_tgsi_visitor::get_last_temp_write(int index)
3385{
3386   int depth = 0; /* loop depth */
3387   int last = -1; /* index of last instruction that writes to the temporary */
3388   int i = 0;
3389
3390   foreach_iter(exec_list_iterator, iter, this->instructions) {
3391      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3392
3393      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
3394         last = (depth == 0) ? i : -2;
3395
3396      if (inst->op == TGSI_OPCODE_BGNLOOP)
3397         depth++;
3398      else if (inst->op == TGSI_OPCODE_ENDLOOP)
3399         if (--depth == 0 && last == -2)
3400            last = i;
3401      assert(depth >= 0);
3402
3403      i++;
3404   }
3405
3406   assert(last >= -1);
3407   return last;
3408}
3409
3410/*
3411 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
3412 * channels for copy propagation and updates following instructions to
3413 * use the original versions.
3414 *
3415 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3416 * will occur.  As an example, a TXP production before this pass:
3417 *
3418 * 0: MOV TEMP[1], INPUT[4].xyyy;
3419 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3420 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
3421 *
3422 * and after:
3423 *
3424 * 0: MOV TEMP[1], INPUT[4].xyyy;
3425 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3426 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3427 *
3428 * which allows for dead code elimination on TEMP[1]'s writes.
3429 */
3430void
3431glsl_to_tgsi_visitor::copy_propagate(void)
3432{
3433   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
3434        					    glsl_to_tgsi_instruction *,
3435        					    this->next_temp * 4);
3436   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3437   int level = 0;
3438
3439   foreach_iter(exec_list_iterator, iter, this->instructions) {
3440      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3441
3442      assert(inst->dst.file != PROGRAM_TEMPORARY
3443             || inst->dst.index < this->next_temp);
3444
3445      /* First, do any copy propagation possible into the src regs. */
3446      for (int r = 0; r < 3; r++) {
3447         glsl_to_tgsi_instruction *first = NULL;
3448         bool good = true;
3449         int acp_base = inst->src[r].index * 4;
3450
3451         if (inst->src[r].file != PROGRAM_TEMPORARY ||
3452             inst->src[r].reladdr)
3453            continue;
3454
3455         /* See if we can find entries in the ACP consisting of MOVs
3456          * from the same src register for all the swizzled channels
3457          * of this src register reference.
3458          */
3459         for (int i = 0; i < 4; i++) {
3460            int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3461            glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
3462
3463            if (!copy_chan) {
3464               good = false;
3465               break;
3466            }
3467
3468            assert(acp_level[acp_base + src_chan] <= level);
3469
3470            if (!first) {
3471               first = copy_chan;
3472            } else {
3473               if (first->src[0].file != copy_chan->src[0].file ||
3474        	   first->src[0].index != copy_chan->src[0].index) {
3475        	  good = false;
3476        	  break;
3477               }
3478            }
3479         }
3480
3481         if (good) {
3482            /* We've now validated that we can copy-propagate to
3483             * replace this src register reference.  Do it.
3484             */
3485            inst->src[r].file = first->src[0].file;
3486            inst->src[r].index = first->src[0].index;
3487
3488            int swizzle = 0;
3489            for (int i = 0; i < 4; i++) {
3490               int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3491               glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
3492               swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
3493        		   (3 * i));
3494            }
3495            inst->src[r].swizzle = swizzle;
3496         }
3497      }
3498
3499      switch (inst->op) {
3500      case TGSI_OPCODE_BGNLOOP:
3501      case TGSI_OPCODE_ENDLOOP:
3502         /* End of a basic block, clear the ACP entirely. */
3503         memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3504         break;
3505
3506      case TGSI_OPCODE_IF:
3507         ++level;
3508         break;
3509
3510      case TGSI_OPCODE_ENDIF:
3511      case TGSI_OPCODE_ELSE:
3512         /* Clear all channels written inside the block from the ACP, but
3513          * leaving those that were not touched.
3514          */
3515         for (int r = 0; r < this->next_temp; r++) {
3516            for (int c = 0; c < 4; c++) {
3517               if (!acp[4 * r + c])
3518        	  continue;
3519
3520               if (acp_level[4 * r + c] >= level)
3521        	  acp[4 * r + c] = NULL;
3522            }
3523         }
3524         if (inst->op == TGSI_OPCODE_ENDIF)
3525            --level;
3526         break;
3527
3528      default:
3529         /* Continuing the block, clear any written channels from
3530          * the ACP.
3531          */
3532         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
3533            /* Any temporary might be written, so no copy propagation
3534             * across this instruction.
3535             */
3536            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3537         } else if (inst->dst.file == PROGRAM_OUTPUT &&
3538        	    inst->dst.reladdr) {
3539            /* Any output might be written, so no copy propagation
3540             * from outputs across this instruction.
3541             */
3542            for (int r = 0; r < this->next_temp; r++) {
3543               for (int c = 0; c < 4; c++) {
3544        	  if (!acp[4 * r + c])
3545        	     continue;
3546
3547        	  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3548        	     acp[4 * r + c] = NULL;
3549               }
3550            }
3551         } else if (inst->dst.file == PROGRAM_TEMPORARY ||
3552        	    inst->dst.file == PROGRAM_OUTPUT) {
3553            /* Clear where it's used as dst. */
3554            if (inst->dst.file == PROGRAM_TEMPORARY) {
3555               for (int c = 0; c < 4; c++) {
3556        	  if (inst->dst.writemask & (1 << c)) {
3557        	     acp[4 * inst->dst.index + c] = NULL;
3558        	  }
3559               }
3560            }
3561
3562            /* Clear where it's used as src. */
3563            for (int r = 0; r < this->next_temp; r++) {
3564               for (int c = 0; c < 4; c++) {
3565        	  if (!acp[4 * r + c])
3566        	     continue;
3567
3568        	  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3569
3570        	  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
3571        	      acp[4 * r + c]->src[0].index == inst->dst.index &&
3572        	      inst->dst.writemask & (1 << src_chan))
3573        	  {
3574        	     acp[4 * r + c] = NULL;
3575        	  }
3576               }
3577            }
3578         }
3579         break;
3580      }
3581
3582      /* If this is a copy, add it to the ACP. */
3583      if (inst->op == TGSI_OPCODE_MOV &&
3584          inst->dst.file == PROGRAM_TEMPORARY &&
3585          !inst->dst.reladdr &&
3586          !inst->saturate &&
3587          !inst->src[0].reladdr &&
3588          !inst->src[0].negate) {
3589         for (int i = 0; i < 4; i++) {
3590            if (inst->dst.writemask & (1 << i)) {
3591               acp[4 * inst->dst.index + i] = inst;
3592               acp_level[4 * inst->dst.index + i] = level;
3593            }
3594         }
3595      }
3596   }
3597
3598   ralloc_free(acp_level);
3599   ralloc_free(acp);
3600}
3601
3602/*
3603 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
3604 *
3605 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3606 * will occur.  As an example, a TXP production after copy propagation but
3607 * before this pass:
3608 *
3609 * 0: MOV TEMP[1], INPUT[4].xyyy;
3610 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3611 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3612 *
3613 * and after this pass:
3614 *
3615 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3616 *
3617 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
3618 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
3619 */
3620void
3621glsl_to_tgsi_visitor::eliminate_dead_code(void)
3622{
3623   int i;
3624
3625   for (i=0; i < this->next_temp; i++) {
3626      int last_read = get_last_temp_read(i);
3627      int j = 0;
3628
3629      foreach_iter(exec_list_iterator, iter, this->instructions) {
3630         glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3631
3632         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
3633             j > last_read)
3634         {
3635            iter.remove();
3636            delete inst;
3637         }
3638
3639         j++;
3640      }
3641   }
3642}
3643
3644/*
3645 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
3646 * code elimination.  This is less primitive than eliminate_dead_code(), as it
3647 * is per-channel and can detect consecutive writes without a read between them
3648 * as dead code.  However, there is some dead code that can be eliminated by
3649 * eliminate_dead_code() but not this function - for example, this function
3650 * cannot eliminate an instruction writing to a register that is never read and
3651 * is the only instruction writing to that register.
3652 *
3653 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3654 * will occur.
3655 */
3656int
3657glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
3658{
3659   glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
3660                                                     glsl_to_tgsi_instruction *,
3661                                                     this->next_temp * 4);
3662   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3663   int level = 0;
3664   int removed = 0;
3665
3666   foreach_iter(exec_list_iterator, iter, this->instructions) {
3667      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3668
3669      assert(inst->dst.file != PROGRAM_TEMPORARY
3670             || inst->dst.index < this->next_temp);
3671
3672      switch (inst->op) {
3673      case TGSI_OPCODE_BGNLOOP:
3674      case TGSI_OPCODE_ENDLOOP:
3675         /* End of a basic block, clear the write array entirely.
3676          * FIXME: This keeps us from killing dead code when the writes are
3677          * on either side of a loop, even when the register isn't touched
3678          * inside the loop.
3679          */
3680         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3681         break;
3682
3683      case TGSI_OPCODE_ENDIF:
3684         --level;
3685         break;
3686
3687      case TGSI_OPCODE_ELSE:
3688         /* Clear all channels written inside the preceding if block from the
3689          * write array, but leave those that were not touched.
3690          *
3691          * FIXME: This destroys opportunities to remove dead code inside of
3692          * IF blocks that are followed by an ELSE block.
3693          */
3694         for (int r = 0; r < this->next_temp; r++) {
3695            for (int c = 0; c < 4; c++) {
3696               if (!writes[4 * r + c])
3697        	         continue;
3698
3699               if (write_level[4 * r + c] >= level)
3700        	         writes[4 * r + c] = NULL;
3701            }
3702         }
3703         break;
3704
3705      case TGSI_OPCODE_IF:
3706         ++level;
3707         /* fallthrough to default case to mark the condition as read */
3708
3709      default:
3710         /* Continuing the block, clear any channels from the write array that
3711          * are read by this instruction.
3712          */
3713         for (unsigned i = 0; i < Elements(inst->src); i++) {
3714            if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
3715               /* Any temporary might be read, so no dead code elimination
3716                * across this instruction.
3717                */
3718               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3719            } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
3720               /* Clear where it's used as src. */
3721               int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
3722               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
3723               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
3724               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
3725
3726               for (int c = 0; c < 4; c++) {
3727              	   if (src_chans & (1 << c)) {
3728              	      writes[4 * inst->src[i].index + c] = NULL;
3729              	   }
3730               }
3731            }
3732         }
3733         break;
3734      }
3735
3736      /* If this instruction writes to a temporary, add it to the write array.
3737       * If there is already an instruction in the write array for one or more
3738       * of the channels, flag that channel write as dead.
3739       */
3740      if (inst->dst.file == PROGRAM_TEMPORARY &&
3741          !inst->dst.reladdr &&
3742          !inst->saturate) {
3743         for (int c = 0; c < 4; c++) {
3744            if (inst->dst.writemask & (1 << c)) {
3745               if (writes[4 * inst->dst.index + c]) {
3746                  if (write_level[4 * inst->dst.index + c] < level)
3747                     continue;
3748                  else
3749                     writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
3750               }
3751               writes[4 * inst->dst.index + c] = inst;
3752               write_level[4 * inst->dst.index + c] = level;
3753            }
3754         }
3755      }
3756   }
3757
3758   /* Anything still in the write array at this point is dead code. */
3759   for (int r = 0; r < this->next_temp; r++) {
3760      for (int c = 0; c < 4; c++) {
3761         glsl_to_tgsi_instruction *inst = writes[4 * r + c];
3762         if (inst)
3763            inst->dead_mask |= (1 << c);
3764      }
3765   }
3766
3767   /* Now actually remove the instructions that are completely dead and update
3768    * the writemask of other instructions with dead channels.
3769    */
3770   foreach_iter(exec_list_iterator, iter, this->instructions) {
3771      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3772
3773      if (!inst->dead_mask || !inst->dst.writemask)
3774         continue;
3775      else if (inst->dead_mask == inst->dst.writemask) {
3776         iter.remove();
3777         delete inst;
3778         removed++;
3779      } else
3780         inst->dst.writemask &= ~(inst->dead_mask);
3781   }
3782
3783   ralloc_free(write_level);
3784   ralloc_free(writes);
3785
3786   return removed;
3787}
3788
3789/* Merges temporary registers together where possible to reduce the number of
3790 * registers needed to run a program.
3791 *
3792 * Produces optimal code only after copy propagation and dead code elimination
3793 * have been run. */
3794void
3795glsl_to_tgsi_visitor::merge_registers(void)
3796{
3797   int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
3798   int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
3799   int i, j;
3800
3801   /* Read the indices of the last read and first write to each temp register
3802    * into an array so that we don't have to traverse the instruction list as
3803    * much. */
3804   for (i=0; i < this->next_temp; i++) {
3805      last_reads[i] = get_last_temp_read(i);
3806      first_writes[i] = get_first_temp_write(i);
3807   }
3808
3809   /* Start looking for registers with non-overlapping usages that can be
3810    * merged together. */
3811   for (i=0; i < this->next_temp; i++) {
3812      /* Don't touch unused registers. */
3813      if (last_reads[i] < 0 || first_writes[i] < 0) continue;
3814
3815      for (j=0; j < this->next_temp; j++) {
3816         /* Don't touch unused registers. */
3817         if (last_reads[j] < 0 || first_writes[j] < 0) continue;
3818
3819         /* We can merge the two registers if the first write to j is after or
3820          * in the same instruction as the last read from i.  Note that the
3821          * register at index i will always be used earlier or at the same time
3822          * as the register at index j. */
3823         if (first_writes[i] <= first_writes[j] &&
3824             last_reads[i] <= first_writes[j])
3825         {
3826            rename_temp_register(j, i); /* Replace all references to j with i.*/
3827
3828            /* Update the first_writes and last_reads arrays with the new
3829             * values for the merged register index, and mark the newly unused
3830             * register index as such. */
3831            last_reads[i] = last_reads[j];
3832            first_writes[j] = -1;
3833            last_reads[j] = -1;
3834         }
3835      }
3836   }
3837
3838   ralloc_free(last_reads);
3839   ralloc_free(first_writes);
3840}
3841
3842/* Reassign indices to temporary registers by reusing unused indices created
3843 * by optimization passes. */
3844void
3845glsl_to_tgsi_visitor::renumber_registers(void)
3846{
3847   int i = 0;
3848   int new_index = 0;
3849
3850   for (i=0; i < this->next_temp; i++) {
3851      if (get_first_temp_read(i) < 0) continue;
3852      if (i != new_index)
3853         rename_temp_register(i, new_index);
3854      new_index++;
3855   }
3856
3857   this->next_temp = new_index;
3858}
3859
3860/**
3861 * Returns a fragment program which implements the current pixel transfer ops.
3862 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
3863 */
3864extern "C" void
3865get_pixel_transfer_visitor(struct st_fragment_program *fp,
3866                           glsl_to_tgsi_visitor *original,
3867                           int scale_and_bias, int pixel_maps)
3868{
3869   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3870   struct st_context *st = st_context(original->ctx);
3871   struct gl_program *prog = &fp->Base.Base;
3872   struct gl_program_parameter_list *params = _mesa_new_parameter_list();
3873   st_src_reg coord, src0;
3874   st_dst_reg dst0;
3875   glsl_to_tgsi_instruction *inst;
3876
3877   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3878   v->ctx = original->ctx;
3879   v->prog = prog;
3880   v->glsl_version = original->glsl_version;
3881   v->native_integers = original->native_integers;
3882   v->options = original->options;
3883   v->next_temp = original->next_temp;
3884   v->num_address_regs = original->num_address_regs;
3885   v->samplers_used = prog->SamplersUsed = original->samplers_used;
3886   v->indirect_addr_temps = original->indirect_addr_temps;
3887   v->indirect_addr_consts = original->indirect_addr_consts;
3888   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3889
3890   /*
3891    * Get initial pixel color from the texture.
3892    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
3893    */
3894   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3895   src0 = v->get_temp(glsl_type::vec4_type);
3896   dst0 = st_dst_reg(src0);
3897   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3898   inst->sampler = 0;
3899   inst->tex_target = TEXTURE_2D_INDEX;
3900
3901   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
3902   prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
3903   v->samplers_used |= (1 << 0);
3904
3905   if (scale_and_bias) {
3906      static const gl_state_index scale_state[STATE_LENGTH] =
3907         { STATE_INTERNAL, STATE_PT_SCALE,
3908           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3909      static const gl_state_index bias_state[STATE_LENGTH] =
3910         { STATE_INTERNAL, STATE_PT_BIAS,
3911           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3912      GLint scale_p, bias_p;
3913      st_src_reg scale, bias;
3914
3915      scale_p = _mesa_add_state_reference(params, scale_state);
3916      bias_p = _mesa_add_state_reference(params, bias_state);
3917
3918      /* MAD colorTemp, colorTemp, scale, bias; */
3919      scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
3920      bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
3921      inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
3922   }
3923
3924   if (pixel_maps) {
3925      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
3926      st_dst_reg temp_dst = st_dst_reg(temp);
3927
3928      assert(st->pixel_xfer.pixelmap_texture);
3929
3930      /* With a little effort, we can do four pixel map look-ups with
3931       * two TEX instructions:
3932       */
3933
3934      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
3935      temp_dst.writemask = WRITEMASK_XY; /* write R,G */
3936      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3937      inst->sampler = 1;
3938      inst->tex_target = TEXTURE_2D_INDEX;
3939
3940      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
3941      src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
3942      temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
3943      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3944      inst->sampler = 1;
3945      inst->tex_target = TEXTURE_2D_INDEX;
3946
3947      prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
3948      v->samplers_used |= (1 << 1);
3949
3950      /* MOV colorTemp, temp; */
3951      inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
3952   }
3953
3954   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3955    * new visitor. */
3956   foreach_iter(exec_list_iterator, iter, original->instructions) {
3957      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3958      st_src_reg src_regs[3];
3959
3960      if (inst->dst.file == PROGRAM_OUTPUT)
3961         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3962
3963      for (int i=0; i<3; i++) {
3964         src_regs[i] = inst->src[i];
3965         if (src_regs[i].file == PROGRAM_INPUT &&
3966             src_regs[i].index == FRAG_ATTRIB_COL0)
3967         {
3968            src_regs[i].file = PROGRAM_TEMPORARY;
3969            src_regs[i].index = src0.index;
3970         }
3971         else if (src_regs[i].file == PROGRAM_INPUT)
3972            prog->InputsRead |= (1 << src_regs[i].index);
3973      }
3974
3975      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3976   }
3977
3978   /* Make modifications to fragment program info. */
3979   prog->Parameters = _mesa_combine_parameter_lists(params,
3980                                                    original->prog->Parameters);
3981   _mesa_free_parameter_list(params);
3982   count_resources(v, prog);
3983   fp->glsl_to_tgsi = v;
3984}
3985
3986/**
3987 * Make fragment program for glBitmap:
3988 *   Sample the texture and kill the fragment if the bit is 0.
3989 * This program will be combined with the user's fragment program.
3990 *
3991 * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
3992 */
3993extern "C" void
3994get_bitmap_visitor(struct st_fragment_program *fp,
3995                   glsl_to_tgsi_visitor *original, int samplerIndex)
3996{
3997   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3998   struct st_context *st = st_context(original->ctx);
3999   struct gl_program *prog = &fp->Base.Base;
4000   st_src_reg coord, src0;
4001   st_dst_reg dst0;
4002   glsl_to_tgsi_instruction *inst;
4003
4004   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
4005   v->ctx = original->ctx;
4006   v->prog = prog;
4007   v->glsl_version = original->glsl_version;
4008   v->native_integers = original->native_integers;
4009   v->options = original->options;
4010   v->next_temp = original->next_temp;
4011   v->num_address_regs = original->num_address_regs;
4012   v->samplers_used = prog->SamplersUsed = original->samplers_used;
4013   v->indirect_addr_temps = original->indirect_addr_temps;
4014   v->indirect_addr_consts = original->indirect_addr_consts;
4015   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
4016
4017   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
4018   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
4019   src0 = v->get_temp(glsl_type::vec4_type);
4020   dst0 = st_dst_reg(src0);
4021   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
4022   inst->sampler = samplerIndex;
4023   inst->tex_target = TEXTURE_2D_INDEX;
4024
4025   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
4026   prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
4027   v->samplers_used |= (1 << samplerIndex);
4028
4029   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
4030   src0.negate = NEGATE_XYZW;
4031   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
4032      src0.swizzle = SWIZZLE_XXXX;
4033   inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
4034
4035   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
4036    * new visitor. */
4037   foreach_iter(exec_list_iterator, iter, original->instructions) {
4038      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
4039      st_src_reg src_regs[3];
4040
4041      if (inst->dst.file == PROGRAM_OUTPUT)
4042         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
4043
4044      for (int i=0; i<3; i++) {
4045         src_regs[i] = inst->src[i];
4046         if (src_regs[i].file == PROGRAM_INPUT)
4047            prog->InputsRead |= (1 << src_regs[i].index);
4048      }
4049
4050      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
4051   }
4052
4053   /* Make modifications to fragment program info. */
4054   prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
4055   count_resources(v, prog);
4056   fp->glsl_to_tgsi = v;
4057}
4058
4059/* ------------------------- TGSI conversion stuff -------------------------- */
4060struct label {
4061   unsigned branch_target;
4062   unsigned token;
4063};
4064
4065/**
4066 * Intermediate state used during shader translation.
4067 */
4068struct st_translate {
4069   struct ureg_program *ureg;
4070
4071   struct ureg_dst temps[MAX_TEMPS];
4072   struct ureg_src *constants;
4073   struct ureg_src *immediates;
4074   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
4075   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
4076   struct ureg_dst address[1];
4077   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
4078   struct ureg_src systemValues[SYSTEM_VALUE_MAX];
4079
4080   /* Extra info for handling point size clamping in vertex shader */
4081   struct ureg_dst pointSizeResult; /**< Actual point size output register */
4082   struct ureg_src pointSizeConst;  /**< Point size range constant register */
4083   GLint pointSizeOutIndex;         /**< Temp point size output register */
4084   GLboolean prevInstWrotePointSize;
4085
4086   const GLuint *inputMapping;
4087   const GLuint *outputMapping;
4088
4089   /* For every instruction that contains a label (eg CALL), keep
4090    * details so that we can go back afterwards and emit the correct
4091    * tgsi instruction number for each label.
4092    */
4093   struct label *labels;
4094   unsigned labels_size;
4095   unsigned labels_count;
4096
4097   /* Keep a record of the tgsi instruction number that each mesa
4098    * instruction starts at, will be used to fix up labels after
4099    * translation.
4100    */
4101   unsigned *insn;
4102   unsigned insn_size;
4103   unsigned insn_count;
4104
4105   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
4106
4107   boolean error;
4108};
4109
4110/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
4111static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
4112   TGSI_SEMANTIC_FACE,
4113   TGSI_SEMANTIC_INSTANCEID
4114};
4115
4116/**
4117 * Make note of a branch to a label in the TGSI code.
4118 * After we've emitted all instructions, we'll go over the list
4119 * of labels built here and patch the TGSI code with the actual
4120 * location of each label.
4121 */
4122static unsigned *get_label(struct st_translate *t, unsigned branch_target)
4123{
4124   unsigned i;
4125
4126   if (t->labels_count + 1 >= t->labels_size) {
4127      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
4128      t->labels = (struct label *)realloc(t->labels,
4129                                          t->labels_size * sizeof(struct label));
4130      if (t->labels == NULL) {
4131         static unsigned dummy;
4132         t->error = TRUE;
4133         return &dummy;
4134      }
4135   }
4136
4137   i = t->labels_count++;
4138   t->labels[i].branch_target = branch_target;
4139   return &t->labels[i].token;
4140}
4141
4142/**
4143 * Called prior to emitting the TGSI code for each instruction.
4144 * Allocate additional space for instructions if needed.
4145 * Update the insn[] array so the next glsl_to_tgsi_instruction points to
4146 * the next TGSI instruction.
4147 */
4148static void set_insn_start(struct st_translate *t, unsigned start)
4149{
4150   if (t->insn_count + 1 >= t->insn_size) {
4151      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
4152      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
4153      if (t->insn == NULL) {
4154         t->error = TRUE;
4155         return;
4156      }
4157   }
4158
4159   t->insn[t->insn_count++] = start;
4160}
4161
4162/**
4163 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
4164 */
4165static struct ureg_src
4166emit_immediate(struct st_translate *t,
4167               gl_constant_value values[4],
4168               int type, int size)
4169{
4170   struct ureg_program *ureg = t->ureg;
4171
4172   switch(type)
4173   {
4174   case GL_FLOAT:
4175      return ureg_DECL_immediate(ureg, &values[0].f, size);
4176   case GL_INT:
4177      return ureg_DECL_immediate_int(ureg, &values[0].i, size);
4178   case GL_UNSIGNED_INT:
4179   case GL_BOOL:
4180      return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
4181   default:
4182      assert(!"should not get here - type must be float, int, uint, or bool");
4183      return ureg_src_undef();
4184   }
4185}
4186
4187/**
4188 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
4189 */
4190static struct ureg_dst
4191dst_register(struct st_translate *t,
4192             gl_register_file file,
4193             GLuint index)
4194{
4195   switch(file) {
4196   case PROGRAM_UNDEFINED:
4197      return ureg_dst_undef();
4198
4199   case PROGRAM_TEMPORARY:
4200      if (ureg_dst_is_undef(t->temps[index]))
4201         t->temps[index] = ureg_DECL_temporary(t->ureg);
4202
4203      return t->temps[index];
4204
4205   case PROGRAM_OUTPUT:
4206      if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
4207         t->prevInstWrotePointSize = GL_TRUE;
4208
4209      if (t->procType == TGSI_PROCESSOR_VERTEX)
4210         assert(index < VERT_RESULT_MAX);
4211      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
4212         assert(index < FRAG_RESULT_MAX);
4213      else
4214         assert(index < GEOM_RESULT_MAX);
4215
4216      assert(t->outputMapping[index] < Elements(t->outputs));
4217
4218      return t->outputs[t->outputMapping[index]];
4219
4220   case PROGRAM_ADDRESS:
4221      return t->address[index];
4222
4223   default:
4224      assert(!"unknown dst register file");
4225      return ureg_dst_undef();
4226   }
4227}
4228
4229/**
4230 * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
4231 */
4232static struct ureg_src
4233src_register(struct st_translate *t,
4234             gl_register_file file,
4235             GLuint index)
4236{
4237   switch(file) {
4238   case PROGRAM_UNDEFINED:
4239      return ureg_src_undef();
4240
4241   case PROGRAM_TEMPORARY:
4242      assert(index >= 0);
4243      assert(index < Elements(t->temps));
4244      if (ureg_dst_is_undef(t->temps[index]))
4245         t->temps[index] = ureg_DECL_temporary(t->ureg);
4246      return ureg_src(t->temps[index]);
4247
4248   case PROGRAM_NAMED_PARAM:
4249   case PROGRAM_ENV_PARAM:
4250   case PROGRAM_LOCAL_PARAM:
4251   case PROGRAM_UNIFORM:
4252      assert(index >= 0);
4253      return t->constants[index];
4254   case PROGRAM_STATE_VAR:
4255   case PROGRAM_CONSTANT:       /* ie, immediate */
4256      if (index < 0)
4257         return ureg_DECL_constant(t->ureg, 0);
4258      else
4259         return t->constants[index];
4260
4261   case PROGRAM_IMMEDIATE:
4262      return t->immediates[index];
4263
4264   case PROGRAM_INPUT:
4265      assert(t->inputMapping[index] < Elements(t->inputs));
4266      return t->inputs[t->inputMapping[index]];
4267
4268   case PROGRAM_OUTPUT:
4269      assert(t->outputMapping[index] < Elements(t->outputs));
4270      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
4271
4272   case PROGRAM_ADDRESS:
4273      return ureg_src(t->address[index]);
4274
4275   case PROGRAM_SYSTEM_VALUE:
4276      assert(index < Elements(t->systemValues));
4277      return t->systemValues[index];
4278
4279   default:
4280      assert(!"unknown src register file");
4281      return ureg_src_undef();
4282   }
4283}
4284
4285/**
4286 * Create a TGSI ureg_dst register from an st_dst_reg.
4287 */
4288static struct ureg_dst
4289translate_dst(struct st_translate *t,
4290              const st_dst_reg *dst_reg,
4291              bool saturate)
4292{
4293   struct ureg_dst dst = dst_register(t,
4294                                      dst_reg->file,
4295                                      dst_reg->index);
4296
4297   dst = ureg_writemask(dst, dst_reg->writemask);
4298
4299   if (saturate)
4300      dst = ureg_saturate(dst);
4301
4302   if (dst_reg->reladdr != NULL)
4303      dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
4304
4305   return dst;
4306}
4307
4308/**
4309 * Create a TGSI ureg_src register from an st_src_reg.
4310 */
4311static struct ureg_src
4312translate_src(struct st_translate *t, const st_src_reg *src_reg)
4313{
4314   struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
4315
4316   src = ureg_swizzle(src,
4317                      GET_SWZ(src_reg->swizzle, 0) & 0x3,
4318                      GET_SWZ(src_reg->swizzle, 1) & 0x3,
4319                      GET_SWZ(src_reg->swizzle, 2) & 0x3,
4320                      GET_SWZ(src_reg->swizzle, 3) & 0x3);
4321
4322   if ((src_reg->negate & 0xf) == NEGATE_XYZW)
4323      src = ureg_negate(src);
4324
4325   if (src_reg->reladdr != NULL) {
4326      /* Normally ureg_src_indirect() would be used here, but a stupid compiler
4327       * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
4328       * set the bit for src.Negate.  So we have to do the operation manually
4329       * here to work around the compiler's problems. */
4330      /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
4331      struct ureg_src addr = ureg_src(t->address[0]);
4332      src.Indirect = 1;
4333      src.IndirectFile = addr.File;
4334      src.IndirectIndex = addr.Index;
4335      src.IndirectSwizzle = addr.SwizzleX;
4336
4337      if (src_reg->file != PROGRAM_INPUT &&
4338          src_reg->file != PROGRAM_OUTPUT) {
4339         /* If src_reg->index was negative, it was set to zero in
4340          * src_register().  Reassign it now.  But don't do this
4341          * for input/output regs since they get remapped while
4342          * const buffers don't.
4343          */
4344         src.Index = src_reg->index;
4345      }
4346   }
4347
4348   return src;
4349}
4350
4351static struct tgsi_texture_offset
4352translate_tex_offset(struct st_translate *t,
4353                     const struct tgsi_texture_offset *in_offset)
4354{
4355   struct tgsi_texture_offset offset;
4356
4357   assert(in_offset->File == PROGRAM_IMMEDIATE);
4358
4359   offset.File = TGSI_FILE_IMMEDIATE;
4360   offset.Index = in_offset->Index;
4361   offset.SwizzleX = in_offset->SwizzleX;
4362   offset.SwizzleY = in_offset->SwizzleY;
4363   offset.SwizzleZ = in_offset->SwizzleZ;
4364
4365   return offset;
4366}
4367
4368static void
4369compile_tgsi_instruction(struct st_translate *t,
4370                         const glsl_to_tgsi_instruction *inst)
4371{
4372   struct ureg_program *ureg = t->ureg;
4373   GLuint i;
4374   struct ureg_dst dst[1];
4375   struct ureg_src src[4];
4376   struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
4377
4378   unsigned num_dst;
4379   unsigned num_src;
4380
4381   num_dst = num_inst_dst_regs(inst->op);
4382   num_src = num_inst_src_regs(inst->op);
4383
4384   if (num_dst)
4385      dst[0] = translate_dst(t,
4386                             &inst->dst,
4387                             inst->saturate);
4388
4389   for (i = 0; i < num_src; i++)
4390      src[i] = translate_src(t, &inst->src[i]);
4391
4392   switch(inst->op) {
4393   case TGSI_OPCODE_BGNLOOP:
4394   case TGSI_OPCODE_CAL:
4395   case TGSI_OPCODE_ELSE:
4396   case TGSI_OPCODE_ENDLOOP:
4397   case TGSI_OPCODE_IF:
4398      assert(num_dst == 0);
4399      ureg_label_insn(ureg,
4400                      inst->op,
4401                      src, num_src,
4402                      get_label(t,
4403                                inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
4404      return;
4405
4406   case TGSI_OPCODE_TEX:
4407   case TGSI_OPCODE_TXB:
4408   case TGSI_OPCODE_TXD:
4409   case TGSI_OPCODE_TXL:
4410   case TGSI_OPCODE_TXP:
4411   case TGSI_OPCODE_TXQ:
4412   case TGSI_OPCODE_TXF:
4413      src[num_src++] = t->samplers[inst->sampler];
4414      for (i = 0; i < inst->tex_offset_num_offset; i++) {
4415         texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
4416      }
4417      ureg_tex_insn(ureg,
4418                    inst->op,
4419                    dst, num_dst,
4420                    translate_texture_target(inst->tex_target, inst->tex_shadow),
4421                    texoffsets, inst->tex_offset_num_offset,
4422                    src, num_src);
4423      return;
4424
4425   case TGSI_OPCODE_SCS:
4426      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
4427      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
4428      break;
4429
4430   default:
4431      ureg_insn(ureg,
4432                inst->op,
4433                dst, num_dst,
4434                src, num_src);
4435      break;
4436   }
4437}
4438
4439/**
4440 * Emit the TGSI instructions to adjust the WPOS pixel center convention
4441 * Basically, add (adjX, adjY) to the fragment position.
4442 */
4443static void
4444emit_adjusted_wpos(struct st_translate *t,
4445                   const struct gl_program *program,
4446                   float adjX, float adjY)
4447{
4448   struct ureg_program *ureg = t->ureg;
4449   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
4450   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4451
4452   /* Note that we bias X and Y and pass Z and W through unchanged.
4453    * The shader might also use gl_FragCoord.w and .z.
4454    */
4455   ureg_ADD(ureg, wpos_temp, wpos_input,
4456            ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
4457
4458   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4459}
4460
4461
4462/**
4463 * Emit the TGSI instructions for inverting the WPOS y coordinate.
4464 * This code is unavoidable because it also depends on whether
4465 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
4466 */
4467static void
4468emit_wpos_inversion(struct st_translate *t,
4469                    const struct gl_program *program,
4470                    bool invert)
4471{
4472   struct ureg_program *ureg = t->ureg;
4473
4474   /* Fragment program uses fragment position input.
4475    * Need to replace instances of INPUT[WPOS] with temp T
4476    * where T = INPUT[WPOS] by y is inverted.
4477    */
4478   static const gl_state_index wposTransformState[STATE_LENGTH]
4479      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
4480          (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4481
4482   /* XXX: note we are modifying the incoming shader here!  Need to
4483    * do this before emitting the constant decls below, or this
4484    * will be missed:
4485    */
4486   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
4487                                                       wposTransformState);
4488
4489   struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
4490   struct ureg_dst wpos_temp;
4491   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4492
4493   /* MOV wpos_temp, input[wpos]
4494    */
4495   if (wpos_input.File == TGSI_FILE_TEMPORARY)
4496      wpos_temp = ureg_dst(wpos_input);
4497   else {
4498      wpos_temp = ureg_DECL_temporary(ureg);
4499      ureg_MOV(ureg, wpos_temp, wpos_input);
4500   }
4501
4502   if (invert) {
4503      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
4504       */
4505      ureg_MAD(ureg,
4506               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
4507               wpos_input,
4508               ureg_scalar(wpostrans, 0),
4509               ureg_scalar(wpostrans, 1));
4510   } else {
4511      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
4512       */
4513      ureg_MAD(ureg,
4514               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
4515               wpos_input,
4516               ureg_scalar(wpostrans, 2),
4517               ureg_scalar(wpostrans, 3));
4518   }
4519
4520   /* Use wpos_temp as position input from here on:
4521    */
4522   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4523}
4524
4525
4526/**
4527 * Emit fragment position/ooordinate code.
4528 */
4529static void
4530emit_wpos(struct st_context *st,
4531          struct st_translate *t,
4532          const struct gl_program *program,
4533          struct ureg_program *ureg)
4534{
4535   const struct gl_fragment_program *fp =
4536      (const struct gl_fragment_program *) program;
4537   struct pipe_screen *pscreen = st->pipe->screen;
4538   boolean invert = FALSE;
4539
4540   if (fp->OriginUpperLeft) {
4541      /* Fragment shader wants origin in upper-left */
4542      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
4543         /* the driver supports upper-left origin */
4544      }
4545      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
4546         /* the driver supports lower-left origin, need to invert Y */
4547         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4548         invert = TRUE;
4549      }
4550      else
4551         assert(0);
4552   }
4553   else {
4554      /* Fragment shader wants origin in lower-left */
4555      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
4556         /* the driver supports lower-left origin */
4557         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4558      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
4559         /* the driver supports upper-left origin, need to invert Y */
4560         invert = TRUE;
4561      else
4562         assert(0);
4563   }
4564
4565   if (fp->PixelCenterInteger) {
4566      /* Fragment shader wants pixel center integer */
4567      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
4568         /* the driver supports pixel center integer */
4569         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4570      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
4571         /* the driver supports pixel center half integer, need to bias X,Y */
4572         emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
4573      else
4574         assert(0);
4575   }
4576   else {
4577      /* Fragment shader wants pixel center half integer */
4578      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4579         /* the driver supports pixel center half integer */
4580      }
4581      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4582         /* the driver supports pixel center integer, need to bias X,Y */
4583         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4584         emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
4585      }
4586      else
4587         assert(0);
4588   }
4589
4590   /* we invert after adjustment so that we avoid the MOV to temporary,
4591    * and reuse the adjustment ADD instead */
4592   emit_wpos_inversion(t, program, invert);
4593}
4594
4595/**
4596 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
4597 * TGSI uses +1 for front, -1 for back.
4598 * This function converts the TGSI value to the GL value.  Simply clamping/
4599 * saturating the value to [0,1] does the job.
4600 */
4601static void
4602emit_face_var(struct st_translate *t)
4603{
4604   struct ureg_program *ureg = t->ureg;
4605   struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
4606   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
4607
4608   /* MOV_SAT face_temp, input[face] */
4609   face_temp = ureg_saturate(face_temp);
4610   ureg_MOV(ureg, face_temp, face_input);
4611
4612   /* Use face_temp as face input from here on: */
4613   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
4614}
4615
4616static void
4617emit_edgeflags(struct st_translate *t)
4618{
4619   struct ureg_program *ureg = t->ureg;
4620   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
4621   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
4622
4623   ureg_MOV(ureg, edge_dst, edge_src);
4624}
4625
4626/**
4627 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
4628 * \param program  the program to translate
4629 * \param numInputs  number of input registers used
4630 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
4631 *                      input indexes
4632 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
4633 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
4634 *                            each input
4635 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
4636 * \param numOutputs  number of output registers used
4637 * \param outputMapping  maps Mesa fragment program outputs to TGSI
4638 *                       generic outputs
4639 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
4640 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
4641 *                             each output
4642 *
4643 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
4644 */
4645extern "C" enum pipe_error
4646st_translate_program(
4647   struct gl_context *ctx,
4648   uint procType,
4649   struct ureg_program *ureg,
4650   glsl_to_tgsi_visitor *program,
4651   const struct gl_program *proginfo,
4652   GLuint numInputs,
4653   const GLuint inputMapping[],
4654   const ubyte inputSemanticName[],
4655   const ubyte inputSemanticIndex[],
4656   const GLuint interpMode[],
4657   GLuint numOutputs,
4658   const GLuint outputMapping[],
4659   const ubyte outputSemanticName[],
4660   const ubyte outputSemanticIndex[],
4661   boolean passthrough_edgeflags)
4662{
4663   struct st_translate translate, *t;
4664   unsigned i;
4665   enum pipe_error ret = PIPE_OK;
4666
4667   assert(numInputs <= Elements(t->inputs));
4668   assert(numOutputs <= Elements(t->outputs));
4669
4670   t = &translate;
4671   memset(t, 0, sizeof *t);
4672
4673   t->procType = procType;
4674   t->inputMapping = inputMapping;
4675   t->outputMapping = outputMapping;
4676   t->ureg = ureg;
4677   t->pointSizeOutIndex = -1;
4678   t->prevInstWrotePointSize = GL_FALSE;
4679
4680   /*
4681    * Declare input attributes.
4682    */
4683   if (procType == TGSI_PROCESSOR_FRAGMENT) {
4684      for (i = 0; i < numInputs; i++) {
4685         t->inputs[i] = ureg_DECL_fs_input(ureg,
4686                                           inputSemanticName[i],
4687                                           inputSemanticIndex[i],
4688                                           interpMode[i]);
4689      }
4690
4691      if (proginfo->InputsRead & FRAG_BIT_WPOS) {
4692         /* Must do this after setting up t->inputs, and before
4693          * emitting constant references, below:
4694          */
4695          emit_wpos(st_context(ctx), t, proginfo, ureg);
4696      }
4697
4698      if (proginfo->InputsRead & FRAG_BIT_FACE)
4699         emit_face_var(t);
4700
4701      /*
4702       * Declare output attributes.
4703       */
4704      for (i = 0; i < numOutputs; i++) {
4705         switch (outputSemanticName[i]) {
4706         case TGSI_SEMANTIC_POSITION:
4707            t->outputs[i] = ureg_DECL_output(ureg,
4708                                             TGSI_SEMANTIC_POSITION, /* Z/Depth */
4709                                             outputSemanticIndex[i]);
4710            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
4711            break;
4712         case TGSI_SEMANTIC_STENCIL:
4713            t->outputs[i] = ureg_DECL_output(ureg,
4714                                             TGSI_SEMANTIC_STENCIL, /* Stencil */
4715                                             outputSemanticIndex[i]);
4716            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
4717            break;
4718         case TGSI_SEMANTIC_COLOR:
4719            t->outputs[i] = ureg_DECL_output(ureg,
4720                                             TGSI_SEMANTIC_COLOR,
4721                                             outputSemanticIndex[i]);
4722            break;
4723         default:
4724            assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
4725            return PIPE_ERROR_BAD_INPUT;
4726         }
4727      }
4728   }
4729   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
4730      for (i = 0; i < numInputs; i++) {
4731         t->inputs[i] = ureg_DECL_gs_input(ureg,
4732                                           i,
4733                                           inputSemanticName[i],
4734                                           inputSemanticIndex[i]);
4735      }
4736
4737      for (i = 0; i < numOutputs; i++) {
4738         t->outputs[i] = ureg_DECL_output(ureg,
4739                                          outputSemanticName[i],
4740                                          outputSemanticIndex[i]);
4741      }
4742   }
4743   else {
4744      assert(procType == TGSI_PROCESSOR_VERTEX);
4745
4746      for (i = 0; i < numInputs; i++) {
4747         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
4748      }
4749
4750      for (i = 0; i < numOutputs; i++) {
4751         t->outputs[i] = ureg_DECL_output(ureg,
4752                                          outputSemanticName[i],
4753                                          outputSemanticIndex[i]);
4754         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
4755            /* Writing to the point size result register requires special
4756             * handling to implement clamping.
4757             */
4758            static const gl_state_index pointSizeClampState[STATE_LENGTH]
4759               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4760               /* XXX: note we are modifying the incoming shader here!  Need to
4761               * do this before emitting the constant decls below, or this
4762               * will be missed.
4763               */
4764            unsigned pointSizeClampConst =
4765               _mesa_add_state_reference(proginfo->Parameters,
4766                                         pointSizeClampState);
4767            struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
4768            t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
4769            t->pointSizeResult = t->outputs[i];
4770            t->pointSizeOutIndex = i;
4771            t->outputs[i] = psizregtemp;
4772         }
4773      }
4774      if (passthrough_edgeflags)
4775         emit_edgeflags(t);
4776   }
4777
4778   /* Declare address register.
4779    */
4780   if (program->num_address_regs > 0) {
4781      assert(program->num_address_regs == 1);
4782      t->address[0] = ureg_DECL_address(ureg);
4783   }
4784
4785   /* Declare misc input registers
4786    */
4787   {
4788      GLbitfield sysInputs = proginfo->SystemValuesRead;
4789      unsigned numSys = 0;
4790      for (i = 0; sysInputs; i++) {
4791         if (sysInputs & (1 << i)) {
4792            unsigned semName = mesa_sysval_to_semantic[i];
4793            t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
4794            numSys++;
4795            sysInputs &= ~(1 << i);
4796         }
4797      }
4798   }
4799
4800   if (program->indirect_addr_temps) {
4801      /* If temps are accessed with indirect addressing, declare temporaries
4802       * in sequential order.  Else, we declare them on demand elsewhere.
4803       * (Note: the number of temporaries is equal to program->next_temp)
4804       */
4805      for (i = 0; i < (unsigned)program->next_temp; i++) {
4806         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
4807         t->temps[i] = ureg_DECL_temporary(t->ureg);
4808      }
4809   }
4810
4811   /* Emit constants and uniforms.  TGSI uses a single index space for these,
4812    * so we put all the translated regs in t->constants.
4813    */
4814   if (proginfo->Parameters) {
4815      t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
4816      if (t->constants == NULL) {
4817         ret = PIPE_ERROR_OUT_OF_MEMORY;
4818         goto out;
4819      }
4820
4821      for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
4822         switch (proginfo->Parameters->Parameters[i].Type) {
4823         case PROGRAM_ENV_PARAM:
4824         case PROGRAM_LOCAL_PARAM:
4825         case PROGRAM_STATE_VAR:
4826         case PROGRAM_NAMED_PARAM:
4827         case PROGRAM_UNIFORM:
4828            t->constants[i] = ureg_DECL_constant(ureg, i);
4829            break;
4830
4831         /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
4832          * addressing of the const buffer.
4833          * FIXME: Be smarter and recognize param arrays:
4834          * indirect addressing is only valid within the referenced
4835          * array.
4836          */
4837         case PROGRAM_CONSTANT:
4838            if (program->indirect_addr_consts)
4839               t->constants[i] = ureg_DECL_constant(ureg, i);
4840            else
4841               t->constants[i] = emit_immediate(t,
4842                                                proginfo->Parameters->ParameterValues[i],
4843                                                proginfo->Parameters->Parameters[i].DataType,
4844                                                4);
4845            break;
4846         default:
4847            break;
4848         }
4849      }
4850   }
4851
4852   /* Emit immediate values.
4853    */
4854   t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
4855   if (t->immediates == NULL) {
4856      ret = PIPE_ERROR_OUT_OF_MEMORY;
4857      goto out;
4858   }
4859   i = 0;
4860   foreach_iter(exec_list_iterator, iter, program->immediates) {
4861      immediate_storage *imm = (immediate_storage *)iter.get();
4862      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
4863   }
4864
4865   /* texture samplers */
4866   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
4867      if (program->samplers_used & (1 << i)) {
4868         t->samplers[i] = ureg_DECL_sampler(ureg, i);
4869      }
4870   }
4871
4872   /* Emit each instruction in turn:
4873    */
4874   foreach_iter(exec_list_iterator, iter, program->instructions) {
4875      set_insn_start(t, ureg_get_instruction_number(ureg));
4876      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
4877
4878      if (t->prevInstWrotePointSize && proginfo->Id) {
4879         /* The previous instruction wrote to the (fake) vertex point size
4880          * result register.  Now we need to clamp that value to the min/max
4881          * point size range, putting the result into the real point size
4882          * register.
4883          * Note that we can't do this easily at the end of program due to
4884          * possible early return.
4885          */
4886         set_insn_start(t, ureg_get_instruction_number(ureg));
4887         ureg_MAX(t->ureg,
4888                  ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
4889                  ureg_src(t->outputs[t->pointSizeOutIndex]),
4890                  ureg_swizzle(t->pointSizeConst, 1,1,1,1));
4891         ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
4892                  ureg_src(t->outputs[t->pointSizeOutIndex]),
4893                  ureg_swizzle(t->pointSizeConst, 2,2,2,2));
4894      }
4895      t->prevInstWrotePointSize = GL_FALSE;
4896   }
4897
4898   /* Fix up all emitted labels:
4899    */
4900   for (i = 0; i < t->labels_count; i++) {
4901      ureg_fixup_label(ureg, t->labels[i].token,
4902                       t->insn[t->labels[i].branch_target]);
4903   }
4904
4905out:
4906   FREE(t->insn);
4907   FREE(t->labels);
4908   FREE(t->constants);
4909   FREE(t->immediates);
4910
4911   if (t->error) {
4912      debug_printf("%s: translate error flag set\n", __FUNCTION__);
4913   }
4914
4915   return ret;
4916}
4917/* ----------------------------- End TGSI code ------------------------------ */
4918
4919/**
4920 * Convert a shader's GLSL IR into a Mesa gl_program, although without
4921 * generating Mesa IR.
4922 */
4923static struct gl_program *
4924get_mesa_program(struct gl_context *ctx,
4925                 struct gl_shader_program *shader_program,
4926        	 struct gl_shader *shader)
4927{
4928   glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
4929   struct gl_program *prog;
4930   GLenum target;
4931   const char *target_string;
4932   bool progress;
4933   struct gl_shader_compiler_options *options =
4934         &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
4935
4936   switch (shader->Type) {
4937   case GL_VERTEX_SHADER:
4938      target = GL_VERTEX_PROGRAM_ARB;
4939      target_string = "vertex";
4940      break;
4941   case GL_FRAGMENT_SHADER:
4942      target = GL_FRAGMENT_PROGRAM_ARB;
4943      target_string = "fragment";
4944      break;
4945   case GL_GEOMETRY_SHADER:
4946      target = GL_GEOMETRY_PROGRAM_NV;
4947      target_string = "geometry";
4948      break;
4949   default:
4950      assert(!"should not be reached");
4951      return NULL;
4952   }
4953
4954   validate_ir_tree(shader->ir);
4955
4956   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
4957   if (!prog)
4958      return NULL;
4959   prog->Parameters = _mesa_new_parameter_list();
4960   v->ctx = ctx;
4961   v->prog = prog;
4962   v->shader_program = shader_program;
4963   v->options = options;
4964   v->glsl_version = ctx->Const.GLSLVersion;
4965   v->native_integers = ctx->Const.NativeIntegers;
4966
4967   add_uniforms_to_parameters_list(shader_program, shader, prog);
4968
4969   /* Emit intermediate IR for main(). */
4970   visit_exec_list(shader->ir, v);
4971
4972   /* Now emit bodies for any functions that were used. */
4973   do {
4974      progress = GL_FALSE;
4975
4976      foreach_iter(exec_list_iterator, iter, v->function_signatures) {
4977         function_entry *entry = (function_entry *)iter.get();
4978
4979         if (!entry->bgn_inst) {
4980            v->current_function = entry;
4981
4982            entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
4983            entry->bgn_inst->function = entry;
4984
4985            visit_exec_list(&entry->sig->body, v);
4986
4987            glsl_to_tgsi_instruction *last;
4988            last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
4989            if (last->op != TGSI_OPCODE_RET)
4990               v->emit(NULL, TGSI_OPCODE_RET);
4991
4992            glsl_to_tgsi_instruction *end;
4993            end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
4994            end->function = entry;
4995
4996            progress = GL_TRUE;
4997         }
4998      }
4999   } while (progress);
5000
5001#if 0
5002   /* Print out some information (for debugging purposes) used by the
5003    * optimization passes. */
5004   for (i=0; i < v->next_temp; i++) {
5005      int fr = v->get_first_temp_read(i);
5006      int fw = v->get_first_temp_write(i);
5007      int lr = v->get_last_temp_read(i);
5008      int lw = v->get_last_temp_write(i);
5009
5010      printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
5011      assert(fw <= fr);
5012   }
5013#endif
5014
5015   /* Remove reads to output registers, and to varyings in vertex shaders. */
5016   v->remove_output_reads(PROGRAM_OUTPUT);
5017   if (target == GL_VERTEX_PROGRAM_ARB)
5018      v->remove_output_reads(PROGRAM_VARYING);
5019
5020   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
5021   v->simplify_cmp();
5022   v->copy_propagate();
5023   while (v->eliminate_dead_code_advanced());
5024
5025   /* FIXME: These passes to optimize temporary registers don't work when there
5026    * is indirect addressing of the temporary register space.  We need proper
5027    * array support so that we don't have to give up these passes in every
5028    * shader that uses arrays.
5029    */
5030   if (!v->indirect_addr_temps) {
5031      v->eliminate_dead_code();
5032      v->merge_registers();
5033      v->renumber_registers();
5034   }
5035
5036   /* Write the END instruction. */
5037   v->emit(NULL, TGSI_OPCODE_END);
5038
5039   if (ctx->Shader.Flags & GLSL_DUMP) {
5040      printf("\n");
5041      printf("GLSL IR for linked %s program %d:\n", target_string,
5042             shader_program->Name);
5043      _mesa_print_ir(shader->ir, NULL);
5044      printf("\n");
5045      printf("\n");
5046   }
5047
5048   prog->Instructions = NULL;
5049   prog->NumInstructions = 0;
5050
5051   do_set_program_inouts(shader->ir, prog);
5052   count_resources(v, prog);
5053
5054   check_resources(ctx, shader_program, v, prog);
5055
5056   _mesa_reference_program(ctx, &shader->Program, prog);
5057
5058   struct st_vertex_program *stvp;
5059   struct st_fragment_program *stfp;
5060   struct st_geometry_program *stgp;
5061
5062   switch (shader->Type) {
5063   case GL_VERTEX_SHADER:
5064      stvp = (struct st_vertex_program *)prog;
5065      stvp->glsl_to_tgsi = v;
5066      break;
5067   case GL_FRAGMENT_SHADER:
5068      stfp = (struct st_fragment_program *)prog;
5069      stfp->glsl_to_tgsi = v;
5070      break;
5071   case GL_GEOMETRY_SHADER:
5072      stgp = (struct st_geometry_program *)prog;
5073      stgp->glsl_to_tgsi = v;
5074      break;
5075   default:
5076      assert(!"should not be reached");
5077      return NULL;
5078   }
5079
5080   return prog;
5081}
5082
5083extern "C" {
5084
5085struct gl_shader *
5086st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
5087{
5088   struct gl_shader *shader;
5089   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
5090          type == GL_GEOMETRY_SHADER_ARB);
5091   shader = rzalloc(NULL, struct gl_shader);
5092   if (shader) {
5093      shader->Type = type;
5094      shader->Name = name;
5095      _mesa_init_shader(ctx, shader);
5096   }
5097   return shader;
5098}
5099
5100struct gl_shader_program *
5101st_new_shader_program(struct gl_context *ctx, GLuint name)
5102{
5103   struct gl_shader_program *shProg;
5104   shProg = rzalloc(NULL, struct gl_shader_program);
5105   if (shProg) {
5106      shProg->Name = name;
5107      _mesa_init_shader_program(ctx, shProg);
5108   }
5109   return shProg;
5110}
5111
5112/**
5113 * Link a shader.
5114 * Called via ctx->Driver.LinkShader()
5115 * This actually involves converting GLSL IR into an intermediate TGSI-like IR
5116 * with code lowering and other optimizations.
5117 */
5118GLboolean
5119st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
5120{
5121   assert(prog->LinkStatus);
5122
5123   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5124      if (prog->_LinkedShaders[i] == NULL)
5125         continue;
5126
5127      bool progress;
5128      exec_list *ir = prog->_LinkedShaders[i]->ir;
5129      const struct gl_shader_compiler_options *options =
5130            &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
5131
5132      do {
5133         progress = false;
5134
5135         /* Lowering */
5136         do_mat_op_to_vec(ir);
5137         lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
5138				 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
5139        			 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
5140
5141         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
5142
5143         progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
5144
5145         progress = lower_quadop_vector(ir, false) || progress;
5146
5147         if (options->MaxIfDepth == 0)
5148            progress = lower_discard(ir) || progress;
5149
5150         progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
5151
5152         if (options->EmitNoNoise)
5153            progress = lower_noise(ir) || progress;
5154
5155         /* If there are forms of indirect addressing that the driver
5156          * cannot handle, perform the lowering pass.
5157          */
5158         if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
5159             || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
5160           progress =
5161             lower_variable_index_to_cond_assign(ir,
5162        					 options->EmitNoIndirectInput,
5163        					 options->EmitNoIndirectOutput,
5164        					 options->EmitNoIndirectTemp,
5165        					 options->EmitNoIndirectUniform)
5166             || progress;
5167
5168         progress = do_vec_index_to_cond_assign(ir) || progress;
5169      } while (progress);
5170
5171      validate_ir_tree(ir);
5172   }
5173
5174   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5175      struct gl_program *linked_prog;
5176
5177      if (prog->_LinkedShaders[i] == NULL)
5178         continue;
5179
5180      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
5181
5182      if (linked_prog) {
5183         bool ok = true;
5184
5185         switch (prog->_LinkedShaders[i]->Type) {
5186         case GL_VERTEX_SHADER:
5187            _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
5188                                    linked_prog);
5189            ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
5190                                                 linked_prog);
5191            if (!ok) {
5192               _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
5193            }
5194            break;
5195         case GL_FRAGMENT_SHADER:
5196            _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
5197                                     (struct gl_fragment_program *)linked_prog);
5198            ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
5199                                                 linked_prog);
5200            if (!ok) {
5201               _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
5202            }
5203            break;
5204         case GL_GEOMETRY_SHADER:
5205            _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
5206				    linked_prog);
5207            ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
5208                                                 linked_prog);
5209            if (!ok) {
5210               _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
5211            }
5212            break;
5213         }
5214         if (!ok) {
5215            _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, NULL);
5216            _mesa_reference_program(ctx, &linked_prog, NULL);
5217            return GL_FALSE;
5218         }
5219      }
5220
5221      _mesa_reference_program(ctx, &linked_prog, NULL);
5222   }
5223
5224   return GL_TRUE;
5225}
5226
5227} /* extern "C" */
5228