1/*
2 * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
3 * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26/**
27 * \file ir_to_mesa.cpp
28 *
29 * Translate GLSL IR to Mesa's gl_program representation.
30 */
31
32#include <stdio.h>
33#include "main/compiler.h"
34#include "main/macros.h"
35#include "main/mtypes.h"
36#include "main/shaderapi.h"
37#include "main/shaderobj.h"
38#include "main/uniforms.h"
39#include "compiler/glsl/ast.h"
40#include "compiler/glsl/ir.h"
41#include "compiler/glsl/ir_expression_flattening.h"
42#include "compiler/glsl/ir_visitor.h"
43#include "compiler/glsl/ir_optimization.h"
44#include "compiler/glsl/ir_uniform.h"
45#include "compiler/glsl/glsl_parser_extras.h"
46#include "compiler/glsl_types.h"
47#include "compiler/glsl/linker.h"
48#include "compiler/glsl/program.h"
49#include "program/prog_instruction.h"
50#include "program/prog_optimize.h"
51#include "program/prog_print.h"
52#include "program/program.h"
53#include "program/prog_parameter.h"
54#include "util/string_to_uint_map.h"
55
56
57static int swizzle_for_size(int size);
58
59namespace {
60
61class src_reg;
62class dst_reg;
63
64/**
65 * This struct is a corresponding struct to Mesa prog_src_register, with
66 * wider fields.
67 */
68class src_reg {
69public:
70   src_reg(gl_register_file file, int index, const glsl_type *type)
71   {
72      this->file = file;
73      this->index = index;
74      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
75	 this->swizzle = swizzle_for_size(type->vector_elements);
76      else
77	 this->swizzle = SWIZZLE_XYZW;
78      this->negate = 0;
79      this->reladdr = NULL;
80   }
81
82   src_reg()
83   {
84      this->file = PROGRAM_UNDEFINED;
85      this->index = 0;
86      this->swizzle = 0;
87      this->negate = 0;
88      this->reladdr = NULL;
89   }
90
91   explicit src_reg(dst_reg reg);
92
93   gl_register_file file; /**< PROGRAM_* from Mesa */
94   int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
95   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
96   int negate; /**< NEGATE_XYZW mask from mesa */
97   /** Register index should be offset by the integer in this reg. */
98   src_reg *reladdr;
99};
100
101class dst_reg {
102public:
103   dst_reg(gl_register_file file, int writemask)
104   {
105      this->file = file;
106      this->index = 0;
107      this->writemask = writemask;
108      this->reladdr = NULL;
109   }
110
111   dst_reg()
112   {
113      this->file = PROGRAM_UNDEFINED;
114      this->index = 0;
115      this->writemask = 0;
116      this->reladdr = NULL;
117   }
118
119   explicit dst_reg(src_reg reg);
120
121   gl_register_file file; /**< PROGRAM_* from Mesa */
122   int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
123   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
124   /** Register index should be offset by the integer in this reg. */
125   src_reg *reladdr;
126};
127
128} /* anonymous namespace */
129
130src_reg::src_reg(dst_reg reg)
131{
132   this->file = reg.file;
133   this->index = reg.index;
134   this->swizzle = SWIZZLE_XYZW;
135   this->negate = 0;
136   this->reladdr = reg.reladdr;
137}
138
139dst_reg::dst_reg(src_reg reg)
140{
141   this->file = reg.file;
142   this->index = reg.index;
143   this->writemask = WRITEMASK_XYZW;
144   this->reladdr = reg.reladdr;
145}
146
147namespace {
148
149class ir_to_mesa_instruction : public exec_node {
150public:
151   DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction)
152
153   enum prog_opcode op;
154   dst_reg dst;
155   src_reg src[3];
156   /** Pointer to the ir source this tree came from for debugging */
157   ir_instruction *ir;
158   bool saturate;
159   int sampler; /**< sampler index */
160   int tex_target; /**< One of TEXTURE_*_INDEX */
161   GLboolean tex_shadow;
162};
163
164class variable_storage : public exec_node {
165public:
166   variable_storage(ir_variable *var, gl_register_file file, int index)
167      : file(file), index(index), var(var)
168   {
169      /* empty */
170   }
171
172   gl_register_file file;
173   int index;
174   ir_variable *var; /* variable that maps to this, if any */
175};
176
177class function_entry : public exec_node {
178public:
179   ir_function_signature *sig;
180
181   /**
182    * identifier of this function signature used by the program.
183    *
184    * At the point that Mesa instructions for function calls are
185    * generated, we don't know the address of the first instruction of
186    * the function body.  So we make the BranchTarget that is called a
187    * small integer and rewrite them during set_branchtargets().
188    */
189   int sig_id;
190
191   /**
192    * Pointer to first instruction of the function body.
193    *
194    * Set during function body emits after main() is processed.
195    */
196   ir_to_mesa_instruction *bgn_inst;
197
198   /**
199    * Index of the first instruction of the function body in actual
200    * Mesa IR.
201    *
202    * Set after convertion from ir_to_mesa_instruction to prog_instruction.
203    */
204   int inst;
205
206   /** Storage for the return value. */
207   src_reg return_reg;
208};
209
210class ir_to_mesa_visitor : public ir_visitor {
211public:
212   ir_to_mesa_visitor();
213   ~ir_to_mesa_visitor();
214
215   function_entry *current_function;
216
217   struct gl_context *ctx;
218   struct gl_program *prog;
219   struct gl_shader_program *shader_program;
220   struct gl_shader_compiler_options *options;
221
222   int next_temp;
223
224   variable_storage *find_variable_storage(const ir_variable *var);
225
226   src_reg get_temp(const glsl_type *type);
227   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
228
229   src_reg src_reg_for_float(float val);
230
231   /**
232    * \name Visit methods
233    *
234    * As typical for the visitor pattern, there must be one \c visit method for
235    * each concrete subclass of \c ir_instruction.  Virtual base classes within
236    * the hierarchy should not have \c visit methods.
237    */
238   /*@{*/
239   virtual void visit(ir_variable *);
240   virtual void visit(ir_loop *);
241   virtual void visit(ir_loop_jump *);
242   virtual void visit(ir_function_signature *);
243   virtual void visit(ir_function *);
244   virtual void visit(ir_expression *);
245   virtual void visit(ir_swizzle *);
246   virtual void visit(ir_dereference_variable  *);
247   virtual void visit(ir_dereference_array *);
248   virtual void visit(ir_dereference_record *);
249   virtual void visit(ir_assignment *);
250   virtual void visit(ir_constant *);
251   virtual void visit(ir_call *);
252   virtual void visit(ir_return *);
253   virtual void visit(ir_discard *);
254   virtual void visit(ir_texture *);
255   virtual void visit(ir_if *);
256   virtual void visit(ir_emit_vertex *);
257   virtual void visit(ir_end_primitive *);
258   virtual void visit(ir_barrier *);
259   /*@}*/
260
261   src_reg result;
262
263   /** List of variable_storage */
264   exec_list variables;
265
266   /** List of function_entry */
267   exec_list function_signatures;
268   int next_signature_id;
269
270   /** List of ir_to_mesa_instruction */
271   exec_list instructions;
272
273   ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op);
274
275   ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
276			        dst_reg dst, src_reg src0);
277
278   ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
279			        dst_reg dst, src_reg src0, src_reg src1);
280
281   ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
282			        dst_reg dst,
283			        src_reg src0, src_reg src1, src_reg src2);
284
285   /**
286    * Emit the correct dot-product instruction for the type of arguments
287    */
288   ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
289				    dst_reg dst,
290				    src_reg src0,
291				    src_reg src1,
292				    unsigned elements);
293
294   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
295		    dst_reg dst, src_reg src0);
296
297   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
298		    dst_reg dst, src_reg src0, src_reg src1);
299
300   bool try_emit_mad(ir_expression *ir,
301			  int mul_operand);
302   bool try_emit_mad_for_and_not(ir_expression *ir,
303				 int mul_operand);
304
305   void emit_swz(ir_expression *ir);
306
307   void emit_equality_comparison(ir_expression *ir, enum prog_opcode op,
308                                 dst_reg dst,
309                                 const src_reg &src0, const src_reg &src1);
310
311   inline void emit_sne(ir_expression *ir, dst_reg dst,
312                        const src_reg &src0, const src_reg &src1)
313   {
314      emit_equality_comparison(ir, OPCODE_SLT, dst, src0, src1);
315   }
316
317   inline void emit_seq(ir_expression *ir, dst_reg dst,
318                        const src_reg &src0, const src_reg &src1)
319   {
320      emit_equality_comparison(ir, OPCODE_SGE, dst, src0, src1);
321   }
322
323   bool process_move_condition(ir_rvalue *ir);
324
325   void copy_propagate(void);
326
327   void *mem_ctx;
328};
329
330} /* anonymous namespace */
331
332static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
333
334static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
335
336static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
337
338static int
339swizzle_for_size(int size)
340{
341   static const int size_swizzles[4] = {
342      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
343      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
344      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
345      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
346   };
347
348   assert((size >= 1) && (size <= 4));
349   return size_swizzles[size - 1];
350}
351
352ir_to_mesa_instruction *
353ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
354			 dst_reg dst,
355			 src_reg src0, src_reg src1, src_reg src2)
356{
357   ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
358   int num_reladdr = 0;
359
360   /* If we have to do relative addressing, we want to load the ARL
361    * reg directly for one of the regs, and preload the other reladdr
362    * sources into temps.
363    */
364   num_reladdr += dst.reladdr != NULL;
365   num_reladdr += src0.reladdr != NULL;
366   num_reladdr += src1.reladdr != NULL;
367   num_reladdr += src2.reladdr != NULL;
368
369   reladdr_to_temp(ir, &src2, &num_reladdr);
370   reladdr_to_temp(ir, &src1, &num_reladdr);
371   reladdr_to_temp(ir, &src0, &num_reladdr);
372
373   if (dst.reladdr) {
374      emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
375      num_reladdr--;
376   }
377   assert(num_reladdr == 0);
378
379   inst->op = op;
380   inst->dst = dst;
381   inst->src[0] = src0;
382   inst->src[1] = src1;
383   inst->src[2] = src2;
384   inst->ir = ir;
385
386   this->instructions.push_tail(inst);
387
388   return inst;
389}
390
391
392ir_to_mesa_instruction *
393ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
394			 dst_reg dst, src_reg src0, src_reg src1)
395{
396   return emit(ir, op, dst, src0, src1, undef_src);
397}
398
399ir_to_mesa_instruction *
400ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
401			 dst_reg dst, src_reg src0)
402{
403   assert(dst.writemask != 0);
404   return emit(ir, op, dst, src0, undef_src, undef_src);
405}
406
407ir_to_mesa_instruction *
408ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
409{
410   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
411}
412
413ir_to_mesa_instruction *
414ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
415			    dst_reg dst, src_reg src0, src_reg src1,
416			    unsigned elements)
417{
418   static const enum prog_opcode dot_opcodes[] = {
419      OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
420   };
421
422   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
423}
424
425/**
426 * Emits Mesa scalar opcodes to produce unique answers across channels.
427 *
428 * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
429 * channel determines the result across all channels.  So to do a vec4
430 * of this operation, we want to emit a scalar per source channel used
431 * to produce dest channels.
432 */
433void
434ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
435			        dst_reg dst,
436				src_reg orig_src0, src_reg orig_src1)
437{
438   int i, j;
439   int done_mask = ~dst.writemask;
440
441   /* Mesa RCP is a scalar operation splatting results to all channels,
442    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
443    * dst channels.
444    */
445   for (i = 0; i < 4; i++) {
446      GLuint this_mask = (1 << i);
447      ir_to_mesa_instruction *inst;
448      src_reg src0 = orig_src0;
449      src_reg src1 = orig_src1;
450
451      if (done_mask & this_mask)
452	 continue;
453
454      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
455      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
456      for (j = i + 1; j < 4; j++) {
457	 /* If there is another enabled component in the destination that is
458	  * derived from the same inputs, generate its value on this pass as
459	  * well.
460	  */
461	 if (!(done_mask & (1 << j)) &&
462	     GET_SWZ(src0.swizzle, j) == src0_swiz &&
463	     GET_SWZ(src1.swizzle, j) == src1_swiz) {
464	    this_mask |= (1 << j);
465	 }
466      }
467      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
468				   src0_swiz, src0_swiz);
469      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
470				  src1_swiz, src1_swiz);
471
472      inst = emit(ir, op, dst, src0, src1);
473      inst->dst.writemask = this_mask;
474      done_mask |= this_mask;
475   }
476}
477
478void
479ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
480			        dst_reg dst, src_reg src0)
481{
482   src_reg undef = undef_src;
483
484   undef.swizzle = SWIZZLE_XXXX;
485
486   emit_scalar(ir, op, dst, src0, undef);
487}
488
489src_reg
490ir_to_mesa_visitor::src_reg_for_float(float val)
491{
492   src_reg src(PROGRAM_CONSTANT, -1, NULL);
493
494   src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
495					  (const gl_constant_value *)&val, 1, &src.swizzle);
496
497   return src;
498}
499
500static int
501type_size(const struct glsl_type *type)
502{
503   unsigned int i;
504   int size;
505
506   switch (type->base_type) {
507   case GLSL_TYPE_UINT:
508   case GLSL_TYPE_INT:
509   case GLSL_TYPE_FLOAT:
510   case GLSL_TYPE_BOOL:
511      if (type->is_matrix()) {
512	 return type->matrix_columns;
513      } else {
514	 /* Regardless of size of vector, it gets a vec4. This is bad
515	  * packing for things like floats, but otherwise arrays become a
516	  * mess.  Hopefully a later pass over the code can pack scalars
517	  * down if appropriate.
518	  */
519	 return 1;
520      }
521      break;
522   case GLSL_TYPE_DOUBLE:
523      if (type->is_matrix()) {
524         if (type->vector_elements > 2)
525            return type->matrix_columns * 2;
526         else
527            return type->matrix_columns;
528      } else {
529         if (type->vector_elements > 2)
530            return 2;
531         else
532            return 1;
533      }
534      break;
535   case GLSL_TYPE_ARRAY:
536      assert(type->length > 0);
537      return type_size(type->fields.array) * type->length;
538   case GLSL_TYPE_STRUCT:
539      size = 0;
540      for (i = 0; i < type->length; i++) {
541	 size += type_size(type->fields.structure[i].type);
542      }
543      return size;
544   case GLSL_TYPE_SAMPLER:
545   case GLSL_TYPE_IMAGE:
546   case GLSL_TYPE_SUBROUTINE:
547      /* Samplers take up one slot in UNIFORMS[], but they're baked in
548       * at link time.
549       */
550      return 1;
551   case GLSL_TYPE_ATOMIC_UINT:
552   case GLSL_TYPE_VOID:
553   case GLSL_TYPE_ERROR:
554   case GLSL_TYPE_INTERFACE:
555   case GLSL_TYPE_FUNCTION:
556      assert(!"Invalid type in type_size");
557      break;
558   }
559
560   return 0;
561}
562
563/**
564 * In the initial pass of codegen, we assign temporary numbers to
565 * intermediate results.  (not SSA -- variable assignments will reuse
566 * storage).  Actual register allocation for the Mesa VM occurs in a
567 * pass over the Mesa IR later.
568 */
569src_reg
570ir_to_mesa_visitor::get_temp(const glsl_type *type)
571{
572   src_reg src;
573
574   src.file = PROGRAM_TEMPORARY;
575   src.index = next_temp;
576   src.reladdr = NULL;
577   next_temp += type_size(type);
578
579   if (type->is_array() || type->is_record()) {
580      src.swizzle = SWIZZLE_NOOP;
581   } else {
582      src.swizzle = swizzle_for_size(type->vector_elements);
583   }
584   src.negate = 0;
585
586   return src;
587}
588
589variable_storage *
590ir_to_mesa_visitor::find_variable_storage(const ir_variable *var)
591{
592   foreach_in_list(variable_storage, entry, &this->variables) {
593      if (entry->var == var)
594	 return entry;
595   }
596
597   return NULL;
598}
599
600void
601ir_to_mesa_visitor::visit(ir_variable *ir)
602{
603   if (strcmp(ir->name, "gl_FragCoord") == 0) {
604      this->prog->OriginUpperLeft = ir->data.origin_upper_left;
605      this->prog->PixelCenterInteger = ir->data.pixel_center_integer;
606   }
607
608   if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
609      unsigned int i;
610      const ir_state_slot *const slots = ir->get_state_slots();
611      assert(slots != NULL);
612
613      /* Check if this statevar's setup in the STATE file exactly
614       * matches how we'll want to reference it as a
615       * struct/array/whatever.  If not, then we need to move it into
616       * temporary storage and hope that it'll get copy-propagated
617       * out.
618       */
619      for (i = 0; i < ir->get_num_state_slots(); i++) {
620	 if (slots[i].swizzle != SWIZZLE_XYZW) {
621	    break;
622	 }
623      }
624
625      variable_storage *storage;
626      dst_reg dst;
627      if (i == ir->get_num_state_slots()) {
628	 /* We'll set the index later. */
629	 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
630	 this->variables.push_tail(storage);
631
632	 dst = undef_dst;
633      } else {
634	 /* The variable_storage constructor allocates slots based on the size
635	  * of the type.  However, this had better match the number of state
636	  * elements that we're going to copy into the new temporary.
637	  */
638	 assert((int) ir->get_num_state_slots() == type_size(ir->type));
639
640	 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
641						 this->next_temp);
642	 this->variables.push_tail(storage);
643	 this->next_temp += type_size(ir->type);
644
645	 dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
646      }
647
648
649      for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
650	 int index = _mesa_add_state_reference(this->prog->Parameters,
651					       (gl_state_index *)slots[i].tokens);
652
653	 if (storage->file == PROGRAM_STATE_VAR) {
654	    if (storage->index == -1) {
655	       storage->index = index;
656	    } else {
657	       assert(index == storage->index + (int)i);
658	    }
659	 } else {
660	    src_reg src(PROGRAM_STATE_VAR, index, NULL);
661	    src.swizzle = slots[i].swizzle;
662	    emit(ir, OPCODE_MOV, dst, src);
663	    /* even a float takes up a whole vec4 reg in a struct/array. */
664	    dst.index++;
665	 }
666      }
667
668      if (storage->file == PROGRAM_TEMPORARY &&
669	  dst.index != storage->index + (int) ir->get_num_state_slots()) {
670	 linker_error(this->shader_program,
671		      "failed to load builtin uniform `%s' "
672		      "(%d/%d regs loaded)\n",
673		      ir->name, dst.index - storage->index,
674		      type_size(ir->type));
675      }
676   }
677}
678
679void
680ir_to_mesa_visitor::visit(ir_loop *ir)
681{
682   emit(NULL, OPCODE_BGNLOOP);
683
684   visit_exec_list(&ir->body_instructions, this);
685
686   emit(NULL, OPCODE_ENDLOOP);
687}
688
689void
690ir_to_mesa_visitor::visit(ir_loop_jump *ir)
691{
692   switch (ir->mode) {
693   case ir_loop_jump::jump_break:
694      emit(NULL, OPCODE_BRK);
695      break;
696   case ir_loop_jump::jump_continue:
697      emit(NULL, OPCODE_CONT);
698      break;
699   }
700}
701
702
703void
704ir_to_mesa_visitor::visit(ir_function_signature *ir)
705{
706   assert(0);
707   (void)ir;
708}
709
710void
711ir_to_mesa_visitor::visit(ir_function *ir)
712{
713   /* Ignore function bodies other than main() -- we shouldn't see calls to
714    * them since they should all be inlined before we get to ir_to_mesa.
715    */
716   if (strcmp(ir->name, "main") == 0) {
717      const ir_function_signature *sig;
718      exec_list empty;
719
720      sig = ir->matching_signature(NULL, &empty, false);
721
722      assert(sig);
723
724      foreach_in_list(ir_instruction, ir, &sig->body) {
725	 ir->accept(this);
726      }
727   }
728}
729
730bool
731ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
732{
733   int nonmul_operand = 1 - mul_operand;
734   src_reg a, b, c;
735
736   ir_expression *expr = ir->operands[mul_operand]->as_expression();
737   if (!expr || expr->operation != ir_binop_mul)
738      return false;
739
740   expr->operands[0]->accept(this);
741   a = this->result;
742   expr->operands[1]->accept(this);
743   b = this->result;
744   ir->operands[nonmul_operand]->accept(this);
745   c = this->result;
746
747   this->result = get_temp(ir->type);
748   emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
749
750   return true;
751}
752
753/**
754 * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
755 *
756 * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
757 * implemented using multiplication, and logical-or is implemented using
758 * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
759 * As result, the logical expression (a & !b) can be rewritten as:
760 *
761 *     - a * !b
762 *     - a * (1 - b)
763 *     - (a * 1) - (a * b)
764 *     - a + -(a * b)
765 *     - a + (a * -b)
766 *
767 * This final expression can be implemented as a single MAD(a, -b, a)
768 * instruction.
769 */
770bool
771ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
772{
773   const int other_operand = 1 - try_operand;
774   src_reg a, b;
775
776   ir_expression *expr = ir->operands[try_operand]->as_expression();
777   if (!expr || expr->operation != ir_unop_logic_not)
778      return false;
779
780   ir->operands[other_operand]->accept(this);
781   a = this->result;
782   expr->operands[0]->accept(this);
783   b = this->result;
784
785   b.negate = ~b.negate;
786
787   this->result = get_temp(ir->type);
788   emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
789
790   return true;
791}
792
793void
794ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
795				    src_reg *reg, int *num_reladdr)
796{
797   if (!reg->reladdr)
798      return;
799
800   emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
801
802   if (*num_reladdr != 1) {
803      src_reg temp = get_temp(glsl_type::vec4_type);
804
805      emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
806      *reg = temp;
807   }
808
809   (*num_reladdr)--;
810}
811
812void
813ir_to_mesa_visitor::emit_swz(ir_expression *ir)
814{
815   /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
816    * This means that each of the operands is either an immediate value of -1,
817    * 0, or 1, or is a component from one source register (possibly with
818    * negation).
819    */
820   uint8_t components[4] = { 0 };
821   bool negate[4] = { false };
822   ir_variable *var = NULL;
823
824   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
825      ir_rvalue *op = ir->operands[i];
826
827      assert(op->type->is_scalar());
828
829      while (op != NULL) {
830	 switch (op->ir_type) {
831	 case ir_type_constant: {
832
833	    assert(op->type->is_scalar());
834
835	    const ir_constant *const c = op->as_constant();
836	    if (c->is_one()) {
837	       components[i] = SWIZZLE_ONE;
838	    } else if (c->is_zero()) {
839	       components[i] = SWIZZLE_ZERO;
840	    } else if (c->is_negative_one()) {
841	       components[i] = SWIZZLE_ONE;
842	       negate[i] = true;
843	    } else {
844	       assert(!"SWZ constant must be 0.0 or 1.0.");
845	    }
846
847	    op = NULL;
848	    break;
849	 }
850
851	 case ir_type_dereference_variable: {
852	    ir_dereference_variable *const deref =
853	       (ir_dereference_variable *) op;
854
855	    assert((var == NULL) || (deref->var == var));
856	    components[i] = SWIZZLE_X;
857	    var = deref->var;
858	    op = NULL;
859	    break;
860	 }
861
862	 case ir_type_expression: {
863	    ir_expression *const expr = (ir_expression *) op;
864
865	    assert(expr->operation == ir_unop_neg);
866	    negate[i] = true;
867
868	    op = expr->operands[0];
869	    break;
870	 }
871
872	 case ir_type_swizzle: {
873	    ir_swizzle *const swiz = (ir_swizzle *) op;
874
875	    components[i] = swiz->mask.x;
876	    op = swiz->val;
877	    break;
878	 }
879
880	 default:
881	    assert(!"Should not get here.");
882	    return;
883	 }
884      }
885   }
886
887   assert(var != NULL);
888
889   ir_dereference_variable *const deref =
890      new(mem_ctx) ir_dereference_variable(var);
891
892   this->result.file = PROGRAM_UNDEFINED;
893   deref->accept(this);
894   if (this->result.file == PROGRAM_UNDEFINED) {
895      printf("Failed to get tree for expression operand:\n");
896      deref->print();
897      printf("\n");
898      exit(1);
899   }
900
901   src_reg src;
902
903   src = this->result;
904   src.swizzle = MAKE_SWIZZLE4(components[0],
905			       components[1],
906			       components[2],
907			       components[3]);
908   src.negate = ((unsigned(negate[0]) << 0)
909		 | (unsigned(negate[1]) << 1)
910		 | (unsigned(negate[2]) << 2)
911		 | (unsigned(negate[3]) << 3));
912
913   /* Storage for our result.  Ideally for an assignment we'd be using the
914    * actual storage for the result here, instead.
915    */
916   const src_reg result_src = get_temp(ir->type);
917   dst_reg result_dst = dst_reg(result_src);
918
919   /* Limit writes to the channels that will be used by result_src later.
920    * This does limit this temp's use as a temporary for multi-instruction
921    * sequences.
922    */
923   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
924
925   emit(ir, OPCODE_SWZ, result_dst, src);
926   this->result = result_src;
927}
928
929void
930ir_to_mesa_visitor::emit_equality_comparison(ir_expression *ir,
931                                             enum prog_opcode op,
932                                             dst_reg dst,
933                                             const src_reg &src0,
934                                             const src_reg &src1)
935{
936   src_reg difference;
937   src_reg abs_difference = get_temp(glsl_type::vec4_type);
938   const src_reg zero = src_reg_for_float(0.0);
939
940   /* x == y is equivalent to -abs(x-y) >= 0.  Since all of the code that
941    * consumes the generated IR is pretty dumb, take special care when one
942    * of the operands is zero.
943    *
944    * Similarly, x != y is equivalent to -abs(x-y) < 0.
945    */
946   if (src0.file == zero.file &&
947       src0.index == zero.index &&
948       src0.swizzle == zero.swizzle) {
949      difference = src1;
950   } else if (src1.file == zero.file &&
951              src1.index == zero.index &&
952              src1.swizzle == zero.swizzle) {
953      difference = src0;
954   } else {
955      difference = get_temp(glsl_type::vec4_type);
956
957      src_reg tmp_src = src0;
958      tmp_src.negate = ~tmp_src.negate;
959
960      emit(ir, OPCODE_ADD, dst_reg(difference), tmp_src, src1);
961   }
962
963   emit(ir, OPCODE_ABS, dst_reg(abs_difference), difference);
964
965   abs_difference.negate = ~abs_difference.negate;
966   emit(ir, op, dst, abs_difference, zero);
967}
968
969void
970ir_to_mesa_visitor::visit(ir_expression *ir)
971{
972   unsigned int operand;
973   src_reg op[ARRAY_SIZE(ir->operands)];
974   src_reg result_src;
975   dst_reg result_dst;
976
977   /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
978    */
979   if (ir->operation == ir_binop_add) {
980      if (try_emit_mad(ir, 1))
981	 return;
982      if (try_emit_mad(ir, 0))
983	 return;
984   }
985
986   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
987    */
988   if (ir->operation == ir_binop_logic_and) {
989      if (try_emit_mad_for_and_not(ir, 1))
990	 return;
991      if (try_emit_mad_for_and_not(ir, 0))
992	 return;
993   }
994
995   if (ir->operation == ir_quadop_vector) {
996      this->emit_swz(ir);
997      return;
998   }
999
1000   for (operand = 0; operand < ir->get_num_operands(); operand++) {
1001      this->result.file = PROGRAM_UNDEFINED;
1002      ir->operands[operand]->accept(this);
1003      if (this->result.file == PROGRAM_UNDEFINED) {
1004	 printf("Failed to get tree for expression operand:\n");
1005         ir->operands[operand]->print();
1006         printf("\n");
1007	 exit(1);
1008      }
1009      op[operand] = this->result;
1010
1011      /* Matrix expression operands should have been broken down to vector
1012       * operations already.
1013       */
1014      assert(!ir->operands[operand]->type->is_matrix());
1015   }
1016
1017   int vector_elements = ir->operands[0]->type->vector_elements;
1018   if (ir->operands[1]) {
1019      vector_elements = MAX2(vector_elements,
1020			     ir->operands[1]->type->vector_elements);
1021   }
1022
1023   this->result.file = PROGRAM_UNDEFINED;
1024
1025   /* Storage for our result.  Ideally for an assignment we'd be using
1026    * the actual storage for the result here, instead.
1027    */
1028   result_src = get_temp(ir->type);
1029   /* convenience for the emit functions below. */
1030   result_dst = dst_reg(result_src);
1031   /* Limit writes to the channels that will be used by result_src later.
1032    * This does limit this temp's use as a temporary for multi-instruction
1033    * sequences.
1034    */
1035   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1036
1037   switch (ir->operation) {
1038   case ir_unop_logic_not:
1039      /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1040       * older GPUs implement SEQ using multiple instructions (i915 uses two
1041       * SGE instructions and a MUL instruction).  Since our logic values are
1042       * 0.0 and 1.0, 1-x also implements !x.
1043       */
1044      op[0].negate = ~op[0].negate;
1045      emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
1046      break;
1047   case ir_unop_neg:
1048      op[0].negate = ~op[0].negate;
1049      result_src = op[0];
1050      break;
1051   case ir_unop_abs:
1052      emit(ir, OPCODE_ABS, result_dst, op[0]);
1053      break;
1054   case ir_unop_sign:
1055      emit(ir, OPCODE_SSG, result_dst, op[0]);
1056      break;
1057   case ir_unop_rcp:
1058      emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
1059      break;
1060
1061   case ir_unop_exp2:
1062      emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
1063      break;
1064   case ir_unop_exp:
1065   case ir_unop_log:
1066      assert(!"not reached: should be handled by ir_explog_to_explog2");
1067      break;
1068   case ir_unop_log2:
1069      emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
1070      break;
1071   case ir_unop_sin:
1072      emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
1073      break;
1074   case ir_unop_cos:
1075      emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
1076      break;
1077
1078   case ir_unop_dFdx:
1079      emit(ir, OPCODE_DDX, result_dst, op[0]);
1080      break;
1081   case ir_unop_dFdy:
1082      emit(ir, OPCODE_DDY, result_dst, op[0]);
1083      break;
1084
1085   case ir_unop_saturate: {
1086      ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV,
1087                                          result_dst, op[0]);
1088      inst->saturate = true;
1089      break;
1090   }
1091   case ir_unop_noise: {
1092      const enum prog_opcode opcode =
1093	 prog_opcode(OPCODE_NOISE1
1094		     + (ir->operands[0]->type->vector_elements) - 1);
1095      assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
1096
1097      emit(ir, opcode, result_dst, op[0]);
1098      break;
1099   }
1100
1101   case ir_binop_add:
1102      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1103      break;
1104   case ir_binop_sub:
1105      emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
1106      break;
1107
1108   case ir_binop_mul:
1109      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1110      break;
1111   case ir_binop_div:
1112      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1113      break;
1114   case ir_binop_mod:
1115      /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
1116      assert(ir->type->is_integer());
1117      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1118      break;
1119
1120   case ir_binop_less:
1121      emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
1122      break;
1123   case ir_binop_greater:
1124      /* Negating the operands (as opposed to switching the order of the
1125       * operands) produces the correct result when both are +/-Inf.
1126       */
1127      op[0].negate = ~op[0].negate;
1128      op[1].negate = ~op[1].negate;
1129      emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
1130      break;
1131   case ir_binop_lequal:
1132      /* Negating the operands (as opposed to switching the order of the
1133       * operands) produces the correct result when both are +/-Inf.
1134       */
1135      op[0].negate = ~op[0].negate;
1136      op[1].negate = ~op[1].negate;
1137      emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
1138      break;
1139   case ir_binop_gequal:
1140      emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
1141      break;
1142   case ir_binop_equal:
1143      emit_seq(ir, result_dst, op[0], op[1]);
1144      break;
1145   case ir_binop_nequal:
1146      emit_sne(ir, result_dst, op[0], op[1]);
1147      break;
1148   case ir_binop_all_equal:
1149      /* "==" operator producing a scalar boolean. */
1150      if (ir->operands[0]->type->is_vector() ||
1151	  ir->operands[1]->type->is_vector()) {
1152	 src_reg temp = get_temp(glsl_type::vec4_type);
1153         emit_sne(ir, dst_reg(temp), op[0], op[1]);
1154
1155	 /* After the dot-product, the value will be an integer on the
1156	  * range [0,4].  Zero becomes 1.0, and positive values become zero.
1157	  */
1158	 emit_dp(ir, result_dst, temp, temp, vector_elements);
1159
1160	 /* Negating the result of the dot-product gives values on the range
1161	  * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
1162	  * achieved using SGE.
1163	  */
1164	 src_reg sge_src = result_src;
1165	 sge_src.negate = ~sge_src.negate;
1166	 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
1167      } else {
1168         emit_seq(ir, result_dst, op[0], op[1]);
1169      }
1170      break;
1171   case ir_binop_any_nequal:
1172      /* "!=" operator producing a scalar boolean. */
1173      if (ir->operands[0]->type->is_vector() ||
1174	  ir->operands[1]->type->is_vector()) {
1175	 src_reg temp = get_temp(glsl_type::vec4_type);
1176         if (ir->operands[0]->type->is_boolean() &&
1177             ir->operands[1]->as_constant() &&
1178             ir->operands[1]->as_constant()->is_zero()) {
1179            temp = op[0];
1180         } else {
1181            emit_sne(ir, dst_reg(temp), op[0], op[1]);
1182         }
1183
1184	 /* After the dot-product, the value will be an integer on the
1185	  * range [0,4].  Zero stays zero, and positive values become 1.0.
1186	  */
1187	 ir_to_mesa_instruction *const dp =
1188	    emit_dp(ir, result_dst, temp, temp, vector_elements);
1189	 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1190	    /* The clamping to [0,1] can be done for free in the fragment
1191	     * shader with a saturate.
1192	     */
1193	    dp->saturate = true;
1194	 } else {
1195	    /* Negating the result of the dot-product gives values on the range
1196	     * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1197	     * achieved using SLT.
1198	     */
1199	    src_reg slt_src = result_src;
1200	    slt_src.negate = ~slt_src.negate;
1201	    emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1202	 }
1203      } else {
1204         emit_sne(ir, result_dst, op[0], op[1]);
1205      }
1206      break;
1207
1208   case ir_binop_logic_xor:
1209      emit_sne(ir, result_dst, op[0], op[1]);
1210      break;
1211
1212   case ir_binop_logic_or: {
1213      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1214         /* After the addition, the value will be an integer on the
1215          * range [0,2].  Zero stays zero, and positive values become 1.0.
1216          */
1217         ir_to_mesa_instruction *add =
1218            emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1219	 add->saturate = true;
1220      } else {
1221         /* The Boolean arguments are stored as float 0.0 and 1.0.  If either
1222          * value is 1.0, the result of the logcal-or should be 1.0.  If both
1223          * values are 0.0, the result should be 0.0.  This is exactly what
1224          * MAX does.
1225          */
1226         emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1227      }
1228      break;
1229   }
1230
1231   case ir_binop_logic_and:
1232      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1233      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1234      break;
1235
1236   case ir_binop_dot:
1237      assert(ir->operands[0]->type->is_vector());
1238      assert(ir->operands[0]->type == ir->operands[1]->type);
1239      emit_dp(ir, result_dst, op[0], op[1],
1240	      ir->operands[0]->type->vector_elements);
1241      break;
1242
1243   case ir_unop_sqrt:
1244      /* sqrt(x) = x * rsq(x). */
1245      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1246      emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
1247      /* For incoming channels <= 0, set the result to 0. */
1248      op[0].negate = ~op[0].negate;
1249      emit(ir, OPCODE_CMP, result_dst,
1250			  op[0], result_src, src_reg_for_float(0.0));
1251      break;
1252   case ir_unop_rsq:
1253      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1254      break;
1255   case ir_unop_i2f:
1256   case ir_unop_u2f:
1257   case ir_unop_b2f:
1258   case ir_unop_b2i:
1259   case ir_unop_i2u:
1260   case ir_unop_u2i:
1261      /* Mesa IR lacks types, ints are stored as truncated floats. */
1262      result_src = op[0];
1263      break;
1264   case ir_unop_f2i:
1265   case ir_unop_f2u:
1266      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1267      break;
1268   case ir_unop_f2b:
1269   case ir_unop_i2b:
1270      emit_sne(ir, result_dst, op[0], src_reg_for_float(0.0));
1271      break;
1272   case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway
1273   case ir_unop_bitcast_f2u:
1274   case ir_unop_bitcast_i2f:
1275   case ir_unop_bitcast_u2f:
1276      break;
1277   case ir_unop_trunc:
1278      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1279      break;
1280   case ir_unop_ceil:
1281      op[0].negate = ~op[0].negate;
1282      emit(ir, OPCODE_FLR, result_dst, op[0]);
1283      result_src.negate = ~result_src.negate;
1284      break;
1285   case ir_unop_floor:
1286      emit(ir, OPCODE_FLR, result_dst, op[0]);
1287      break;
1288   case ir_unop_fract:
1289      emit(ir, OPCODE_FRC, result_dst, op[0]);
1290      break;
1291   case ir_unop_pack_snorm_2x16:
1292   case ir_unop_pack_snorm_4x8:
1293   case ir_unop_pack_unorm_2x16:
1294   case ir_unop_pack_unorm_4x8:
1295   case ir_unop_pack_half_2x16:
1296   case ir_unop_pack_double_2x32:
1297   case ir_unop_unpack_snorm_2x16:
1298   case ir_unop_unpack_snorm_4x8:
1299   case ir_unop_unpack_unorm_2x16:
1300   case ir_unop_unpack_unorm_4x8:
1301   case ir_unop_unpack_half_2x16:
1302   case ir_unop_unpack_double_2x32:
1303   case ir_unop_bitfield_reverse:
1304   case ir_unop_bit_count:
1305   case ir_unop_find_msb:
1306   case ir_unop_find_lsb:
1307   case ir_unop_d2f:
1308   case ir_unop_f2d:
1309   case ir_unop_d2i:
1310   case ir_unop_i2d:
1311   case ir_unop_d2u:
1312   case ir_unop_u2d:
1313   case ir_unop_d2b:
1314   case ir_unop_frexp_sig:
1315   case ir_unop_frexp_exp:
1316      assert(!"not supported");
1317      break;
1318   case ir_binop_min:
1319      emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
1320      break;
1321   case ir_binop_max:
1322      emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1323      break;
1324   case ir_binop_pow:
1325      emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
1326      break;
1327
1328      /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since
1329       * hardware backends have no way to avoid Mesa IR generation
1330       * even if they don't use it, we need to emit "something" and
1331       * continue.
1332       */
1333   case ir_binop_lshift:
1334   case ir_binop_rshift:
1335   case ir_binop_bit_and:
1336   case ir_binop_bit_xor:
1337   case ir_binop_bit_or:
1338      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1339      break;
1340
1341   case ir_unop_bit_not:
1342   case ir_unop_round_even:
1343      emit(ir, OPCODE_MOV, result_dst, op[0]);
1344      break;
1345
1346   case ir_binop_ubo_load:
1347      assert(!"not supported");
1348      break;
1349
1350   case ir_triop_lrp:
1351      /* ir_triop_lrp operands are (x, y, a) while
1352       * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program.
1353       */
1354      emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
1355      break;
1356
1357   case ir_binop_vector_extract:
1358   case ir_triop_fma:
1359   case ir_triop_bitfield_extract:
1360   case ir_triop_vector_insert:
1361   case ir_quadop_bitfield_insert:
1362   case ir_binop_ldexp:
1363   case ir_triop_csel:
1364   case ir_binop_carry:
1365   case ir_binop_borrow:
1366   case ir_binop_imul_high:
1367   case ir_unop_interpolate_at_centroid:
1368   case ir_binop_interpolate_at_offset:
1369   case ir_binop_interpolate_at_sample:
1370   case ir_unop_dFdx_coarse:
1371   case ir_unop_dFdx_fine:
1372   case ir_unop_dFdy_coarse:
1373   case ir_unop_dFdy_fine:
1374   case ir_unop_subroutine_to_int:
1375   case ir_unop_get_buffer_size:
1376   case ir_unop_vote_any:
1377   case ir_unop_vote_all:
1378   case ir_unop_vote_eq:
1379      assert(!"not supported");
1380      break;
1381
1382   case ir_unop_ssbo_unsized_array_length:
1383   case ir_quadop_vector:
1384      /* This operation should have already been handled.
1385       */
1386      assert(!"Should not get here.");
1387      break;
1388   }
1389
1390   this->result = result_src;
1391}
1392
1393
1394void
1395ir_to_mesa_visitor::visit(ir_swizzle *ir)
1396{
1397   src_reg src;
1398   int i;
1399   int swizzle[4];
1400
1401   /* Note that this is only swizzles in expressions, not those on the left
1402    * hand side of an assignment, which do write masking.  See ir_assignment
1403    * for that.
1404    */
1405
1406   ir->val->accept(this);
1407   src = this->result;
1408   assert(src.file != PROGRAM_UNDEFINED);
1409   assert(ir->type->vector_elements > 0);
1410
1411   for (i = 0; i < 4; i++) {
1412      if (i < ir->type->vector_elements) {
1413	 switch (i) {
1414	 case 0:
1415	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1416	    break;
1417	 case 1:
1418	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1419	    break;
1420	 case 2:
1421	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1422	    break;
1423	 case 3:
1424	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1425	    break;
1426	 }
1427      } else {
1428	 /* If the type is smaller than a vec4, replicate the last
1429	  * channel out.
1430	  */
1431	 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1432      }
1433   }
1434
1435   src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1436
1437   this->result = src;
1438}
1439
1440void
1441ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1442{
1443   variable_storage *entry = find_variable_storage(ir->var);
1444   ir_variable *var = ir->var;
1445
1446   if (!entry) {
1447      switch (var->data.mode) {
1448      case ir_var_uniform:
1449	 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1450					       var->data.param_index);
1451	 this->variables.push_tail(entry);
1452	 break;
1453      case ir_var_shader_in:
1454	 /* The linker assigns locations for varyings and attributes,
1455	  * including deprecated builtins (like gl_Color),
1456	  * user-assigned generic attributes (glBindVertexLocation),
1457	  * and user-defined varyings.
1458	  */
1459	 assert(var->data.location != -1);
1460         entry = new(mem_ctx) variable_storage(var,
1461                                               PROGRAM_INPUT,
1462                                               var->data.location);
1463         break;
1464      case ir_var_shader_out:
1465	 assert(var->data.location != -1);
1466         entry = new(mem_ctx) variable_storage(var,
1467                                               PROGRAM_OUTPUT,
1468                                               var->data.location);
1469	 break;
1470      case ir_var_system_value:
1471         entry = new(mem_ctx) variable_storage(var,
1472                                               PROGRAM_SYSTEM_VALUE,
1473                                               var->data.location);
1474         break;
1475      case ir_var_auto:
1476      case ir_var_temporary:
1477	 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1478					       this->next_temp);
1479	 this->variables.push_tail(entry);
1480
1481	 next_temp += type_size(var->type);
1482	 break;
1483      }
1484
1485      if (!entry) {
1486	 printf("Failed to make storage for %s\n", var->name);
1487	 exit(1);
1488      }
1489   }
1490
1491   this->result = src_reg(entry->file, entry->index, var->type);
1492}
1493
1494void
1495ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1496{
1497   ir_constant *index;
1498   src_reg src;
1499   int element_size = type_size(ir->type);
1500
1501   index = ir->array_index->constant_expression_value();
1502
1503   ir->array->accept(this);
1504   src = this->result;
1505
1506   if (index) {
1507      src.index += index->value.i[0] * element_size;
1508   } else {
1509      /* Variable index array dereference.  It eats the "vec4" of the
1510       * base of the array and an index that offsets the Mesa register
1511       * index.
1512       */
1513      ir->array_index->accept(this);
1514
1515      src_reg index_reg;
1516
1517      if (element_size == 1) {
1518	 index_reg = this->result;
1519      } else {
1520	 index_reg = get_temp(glsl_type::float_type);
1521
1522	 emit(ir, OPCODE_MUL, dst_reg(index_reg),
1523	      this->result, src_reg_for_float(element_size));
1524      }
1525
1526      /* If there was already a relative address register involved, add the
1527       * new and the old together to get the new offset.
1528       */
1529      if (src.reladdr != NULL)  {
1530	 src_reg accum_reg = get_temp(glsl_type::float_type);
1531
1532	 emit(ir, OPCODE_ADD, dst_reg(accum_reg),
1533	      index_reg, *src.reladdr);
1534
1535	 index_reg = accum_reg;
1536      }
1537
1538      src.reladdr = ralloc(mem_ctx, src_reg);
1539      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1540   }
1541
1542   /* If the type is smaller than a vec4, replicate the last channel out. */
1543   if (ir->type->is_scalar() || ir->type->is_vector())
1544      src.swizzle = swizzle_for_size(ir->type->vector_elements);
1545   else
1546      src.swizzle = SWIZZLE_NOOP;
1547
1548   this->result = src;
1549}
1550
1551void
1552ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1553{
1554   unsigned int i;
1555   const glsl_type *struct_type = ir->record->type;
1556   int offset = 0;
1557
1558   ir->record->accept(this);
1559
1560   for (i = 0; i < struct_type->length; i++) {
1561      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1562	 break;
1563      offset += type_size(struct_type->fields.structure[i].type);
1564   }
1565
1566   /* If the type is smaller than a vec4, replicate the last channel out. */
1567   if (ir->type->is_scalar() || ir->type->is_vector())
1568      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1569   else
1570      this->result.swizzle = SWIZZLE_NOOP;
1571
1572   this->result.index += offset;
1573}
1574
1575/**
1576 * We want to be careful in assignment setup to hit the actual storage
1577 * instead of potentially using a temporary like we might with the
1578 * ir_dereference handler.
1579 */
1580static dst_reg
1581get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
1582{
1583   /* The LHS must be a dereference.  If the LHS is a variable indexed array
1584    * access of a vector, it must be separated into a series conditional moves
1585    * before reaching this point (see ir_vec_index_to_cond_assign).
1586    */
1587   assert(ir->as_dereference());
1588   ir_dereference_array *deref_array = ir->as_dereference_array();
1589   if (deref_array) {
1590      assert(!deref_array->array->type->is_vector());
1591   }
1592
1593   /* Use the rvalue deref handler for the most part.  We'll ignore
1594    * swizzles in it and write swizzles using writemask, though.
1595    */
1596   ir->accept(v);
1597   return dst_reg(v->result);
1598}
1599
1600/* Calculate the sampler index and also calculate the base uniform location
1601 * for struct members.
1602 */
1603static void
1604calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref,
1605                     unsigned *offset, unsigned *array_elements,
1606                     unsigned *location)
1607{
1608   if (deref->ir_type == ir_type_dereference_variable)
1609      return;
1610
1611   switch (deref->ir_type) {
1612   case ir_type_dereference_array: {
1613      ir_dereference_array *deref_arr = deref->as_dereference_array();
1614      ir_constant *array_index =
1615         deref_arr->array_index->constant_expression_value();
1616
1617      if (!array_index) {
1618	 /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
1619	  * while GLSL 1.30 requires that the array indices be
1620	  * constant integer expressions.  We don't expect any driver
1621	  * to actually work with a really variable array index, so
1622	  * all that would work would be an unrolled loop counter that ends
1623	  * up being constant above.
1624	  */
1625         ralloc_strcat(&prog->data->InfoLog,
1626		       "warning: Variable sampler array index unsupported.\n"
1627		       "This feature of the language was removed in GLSL 1.20 "
1628		       "and is unlikely to be supported for 1.10 in Mesa.\n");
1629      } else {
1630         *offset += array_index->value.u[0] * *array_elements;
1631      }
1632
1633      *array_elements *= deref_arr->array->type->length;
1634
1635      calc_sampler_offsets(prog, deref_arr->array->as_dereference(),
1636                           offset, array_elements, location);
1637      break;
1638   }
1639
1640   case ir_type_dereference_record: {
1641      ir_dereference_record *deref_record = deref->as_dereference_record();
1642      unsigned field_index =
1643         deref_record->record->type->field_index(deref_record->field);
1644      *location +=
1645         deref_record->record->type->record_location_offset(field_index);
1646      calc_sampler_offsets(prog, deref_record->record->as_dereference(),
1647                           offset, array_elements, location);
1648      break;
1649   }
1650
1651   default:
1652      unreachable("Invalid deref type");
1653      break;
1654   }
1655}
1656
1657static int
1658get_sampler_uniform_value(class ir_dereference *sampler,
1659                          struct gl_shader_program *shader_program,
1660                          const struct gl_program *prog)
1661{
1662   GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
1663   ir_variable *var = sampler->variable_referenced();
1664   unsigned location = var->data.location;
1665   unsigned array_elements = 1;
1666   unsigned offset = 0;
1667
1668   calc_sampler_offsets(shader_program, sampler, &offset, &array_elements,
1669                        &location);
1670
1671   assert(shader_program->data->UniformStorage[location].opaque[shader].active);
1672   return shader_program->data->UniformStorage[location].opaque[shader].index +
1673          offset;
1674}
1675
1676/**
1677 * Process the condition of a conditional assignment
1678 *
1679 * Examines the condition of a conditional assignment to generate the optimal
1680 * first operand of a \c CMP instruction.  If the condition is a relational
1681 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1682 * used as the source for the \c CMP instruction.  Otherwise the comparison
1683 * is processed to a boolean result, and the boolean result is used as the
1684 * operand to the CMP instruction.
1685 */
1686bool
1687ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
1688{
1689   ir_rvalue *src_ir = ir;
1690   bool negate = true;
1691   bool switch_order = false;
1692
1693   ir_expression *const expr = ir->as_expression();
1694   if ((expr != NULL) && (expr->get_num_operands() == 2)) {
1695      bool zero_on_left = false;
1696
1697      if (expr->operands[0]->is_zero()) {
1698	 src_ir = expr->operands[1];
1699	 zero_on_left = true;
1700      } else if (expr->operands[1]->is_zero()) {
1701	 src_ir = expr->operands[0];
1702	 zero_on_left = false;
1703      }
1704
1705      /*      a is -  0  +            -  0  +
1706       * (a <  0)  T  F  F  ( a < 0)  T  F  F
1707       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
1708       * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1709       * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1710       * (a >  0)  F  F  T  (-a < 0)  F  F  T
1711       * (0 >  a)  T  F  F  ( a < 0)  T  F  F
1712       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1713       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1714       *
1715       * Note that exchanging the order of 0 and 'a' in the comparison simply
1716       * means that the value of 'a' should be negated.
1717       */
1718      if (src_ir != ir) {
1719	 switch (expr->operation) {
1720	 case ir_binop_less:
1721	    switch_order = false;
1722	    negate = zero_on_left;
1723	    break;
1724
1725	 case ir_binop_greater:
1726	    switch_order = false;
1727	    negate = !zero_on_left;
1728	    break;
1729
1730	 case ir_binop_lequal:
1731	    switch_order = true;
1732	    negate = !zero_on_left;
1733	    break;
1734
1735	 case ir_binop_gequal:
1736	    switch_order = true;
1737	    negate = zero_on_left;
1738	    break;
1739
1740	 default:
1741	    /* This isn't the right kind of comparison afterall, so make sure
1742	     * the whole condition is visited.
1743	     */
1744	    src_ir = ir;
1745	    break;
1746	 }
1747      }
1748   }
1749
1750   src_ir->accept(this);
1751
1752   /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1753    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
1754    * choose which value OPCODE_CMP produces without an extra instruction
1755    * computing the condition.
1756    */
1757   if (negate)
1758      this->result.negate = ~this->result.negate;
1759
1760   return switch_order;
1761}
1762
1763void
1764ir_to_mesa_visitor::visit(ir_assignment *ir)
1765{
1766   dst_reg l;
1767   src_reg r;
1768   int i;
1769
1770   ir->rhs->accept(this);
1771   r = this->result;
1772
1773   l = get_assignment_lhs(ir->lhs, this);
1774
1775   /* FINISHME: This should really set to the correct maximal writemask for each
1776    * FINISHME: component written (in the loops below).  This case can only
1777    * FINISHME: occur for matrices, arrays, and structures.
1778    */
1779   if (ir->write_mask == 0) {
1780      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1781      l.writemask = WRITEMASK_XYZW;
1782   } else if (ir->lhs->type->is_scalar()) {
1783      /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1784       * FINISHME: W component of fragment shader output zero, work correctly.
1785       */
1786      l.writemask = WRITEMASK_XYZW;
1787   } else {
1788      int swizzles[4];
1789      int first_enabled_chan = 0;
1790      int rhs_chan = 0;
1791
1792      assert(ir->lhs->type->is_vector());
1793      l.writemask = ir->write_mask;
1794
1795      for (int i = 0; i < 4; i++) {
1796	 if (l.writemask & (1 << i)) {
1797	    first_enabled_chan = GET_SWZ(r.swizzle, i);
1798	    break;
1799	 }
1800      }
1801
1802      /* Swizzle a small RHS vector into the channels being written.
1803       *
1804       * glsl ir treats write_mask as dictating how many channels are
1805       * present on the RHS while Mesa IR treats write_mask as just
1806       * showing which channels of the vec4 RHS get written.
1807       */
1808      for (int i = 0; i < 4; i++) {
1809	 if (l.writemask & (1 << i))
1810	    swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1811	 else
1812	    swizzles[i] = first_enabled_chan;
1813      }
1814      r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1815				swizzles[2], swizzles[3]);
1816   }
1817
1818   assert(l.file != PROGRAM_UNDEFINED);
1819   assert(r.file != PROGRAM_UNDEFINED);
1820
1821   if (ir->condition) {
1822      const bool switch_order = this->process_move_condition(ir->condition);
1823      src_reg condition = this->result;
1824
1825      for (i = 0; i < type_size(ir->lhs->type); i++) {
1826	 if (switch_order) {
1827	    emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
1828	 } else {
1829	    emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
1830	 }
1831
1832	 l.index++;
1833	 r.index++;
1834      }
1835   } else {
1836      for (i = 0; i < type_size(ir->lhs->type); i++) {
1837	 emit(ir, OPCODE_MOV, l, r);
1838	 l.index++;
1839	 r.index++;
1840      }
1841   }
1842}
1843
1844
1845void
1846ir_to_mesa_visitor::visit(ir_constant *ir)
1847{
1848   src_reg src;
1849   GLfloat stack_vals[4] = { 0 };
1850   GLfloat *values = stack_vals;
1851   unsigned int i;
1852
1853   /* Unfortunately, 4 floats is all we can get into
1854    * _mesa_add_unnamed_constant.  So, make a temp to store an
1855    * aggregate constant and move each constant value into it.  If we
1856    * get lucky, copy propagation will eliminate the extra moves.
1857    */
1858
1859   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1860      src_reg temp_base = get_temp(ir->type);
1861      dst_reg temp = dst_reg(temp_base);
1862
1863      foreach_in_list(ir_constant, field_value, &ir->components) {
1864	 int size = type_size(field_value->type);
1865
1866	 assert(size > 0);
1867
1868	 field_value->accept(this);
1869	 src = this->result;
1870
1871	 for (i = 0; i < (unsigned int)size; i++) {
1872	    emit(ir, OPCODE_MOV, temp, src);
1873
1874	    src.index++;
1875	    temp.index++;
1876	 }
1877      }
1878      this->result = temp_base;
1879      return;
1880   }
1881
1882   if (ir->type->is_array()) {
1883      src_reg temp_base = get_temp(ir->type);
1884      dst_reg temp = dst_reg(temp_base);
1885      int size = type_size(ir->type->fields.array);
1886
1887      assert(size > 0);
1888
1889      for (i = 0; i < ir->type->length; i++) {
1890	 ir->array_elements[i]->accept(this);
1891	 src = this->result;
1892	 for (int j = 0; j < size; j++) {
1893	    emit(ir, OPCODE_MOV, temp, src);
1894
1895	    src.index++;
1896	    temp.index++;
1897	 }
1898      }
1899      this->result = temp_base;
1900      return;
1901   }
1902
1903   if (ir->type->is_matrix()) {
1904      src_reg mat = get_temp(ir->type);
1905      dst_reg mat_column = dst_reg(mat);
1906
1907      for (i = 0; i < ir->type->matrix_columns; i++) {
1908	 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1909	 values = &ir->value.f[i * ir->type->vector_elements];
1910
1911	 src = src_reg(PROGRAM_CONSTANT, -1, NULL);
1912	 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1913						(gl_constant_value *) values,
1914						ir->type->vector_elements,
1915						&src.swizzle);
1916	 emit(ir, OPCODE_MOV, mat_column, src);
1917
1918	 mat_column.index++;
1919      }
1920
1921      this->result = mat;
1922      return;
1923   }
1924
1925   src.file = PROGRAM_CONSTANT;
1926   switch (ir->type->base_type) {
1927   case GLSL_TYPE_FLOAT:
1928      values = &ir->value.f[0];
1929      break;
1930   case GLSL_TYPE_UINT:
1931      for (i = 0; i < ir->type->vector_elements; i++) {
1932	 values[i] = ir->value.u[i];
1933      }
1934      break;
1935   case GLSL_TYPE_INT:
1936      for (i = 0; i < ir->type->vector_elements; i++) {
1937	 values[i] = ir->value.i[i];
1938      }
1939      break;
1940   case GLSL_TYPE_BOOL:
1941      for (i = 0; i < ir->type->vector_elements; i++) {
1942	 values[i] = ir->value.b[i];
1943      }
1944      break;
1945   default:
1946      assert(!"Non-float/uint/int/bool constant");
1947   }
1948
1949   this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
1950   this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1951						   (gl_constant_value *) values,
1952						   ir->type->vector_elements,
1953						   &this->result.swizzle);
1954}
1955
1956void
1957ir_to_mesa_visitor::visit(ir_call *)
1958{
1959   assert(!"ir_to_mesa: All function calls should have been inlined by now.");
1960}
1961
1962void
1963ir_to_mesa_visitor::visit(ir_texture *ir)
1964{
1965   src_reg result_src, coord, lod_info, projector, dx, dy;
1966   dst_reg result_dst, coord_dst;
1967   ir_to_mesa_instruction *inst = NULL;
1968   prog_opcode opcode = OPCODE_NOP;
1969
1970   if (ir->op == ir_txs)
1971      this->result = src_reg_for_float(0.0);
1972   else
1973      ir->coordinate->accept(this);
1974
1975   /* Put our coords in a temp.  We'll need to modify them for shadow,
1976    * projection, or LOD, so the only case we'd use it as-is is if
1977    * we're doing plain old texturing.  Mesa IR optimization should
1978    * handle cleaning up our mess in that case.
1979    */
1980   coord = get_temp(glsl_type::vec4_type);
1981   coord_dst = dst_reg(coord);
1982   emit(ir, OPCODE_MOV, coord_dst, this->result);
1983
1984   if (ir->projector) {
1985      ir->projector->accept(this);
1986      projector = this->result;
1987   }
1988
1989   /* Storage for our result.  Ideally for an assignment we'd be using
1990    * the actual storage for the result here, instead.
1991    */
1992   result_src = get_temp(glsl_type::vec4_type);
1993   result_dst = dst_reg(result_src);
1994
1995   switch (ir->op) {
1996   case ir_tex:
1997   case ir_txs:
1998      opcode = OPCODE_TEX;
1999      break;
2000   case ir_txb:
2001      opcode = OPCODE_TXB;
2002      ir->lod_info.bias->accept(this);
2003      lod_info = this->result;
2004      break;
2005   case ir_txf:
2006      /* Pretend to be TXL so the sampler, coordinate, lod are available */
2007   case ir_txl:
2008      opcode = OPCODE_TXL;
2009      ir->lod_info.lod->accept(this);
2010      lod_info = this->result;
2011      break;
2012   case ir_txd:
2013      opcode = OPCODE_TXD;
2014      ir->lod_info.grad.dPdx->accept(this);
2015      dx = this->result;
2016      ir->lod_info.grad.dPdy->accept(this);
2017      dy = this->result;
2018      break;
2019   case ir_txf_ms:
2020      assert(!"Unexpected ir_txf_ms opcode");
2021      break;
2022   case ir_lod:
2023      assert(!"Unexpected ir_lod opcode");
2024      break;
2025   case ir_tg4:
2026      assert(!"Unexpected ir_tg4 opcode");
2027      break;
2028   case ir_query_levels:
2029      assert(!"Unexpected ir_query_levels opcode");
2030      break;
2031   case ir_samples_identical:
2032      unreachable("Unexpected ir_samples_identical opcode");
2033   case ir_texture_samples:
2034      unreachable("Unexpected ir_texture_samples opcode");
2035   }
2036
2037   const glsl_type *sampler_type = ir->sampler->type;
2038
2039   if (ir->projector) {
2040      if (opcode == OPCODE_TEX) {
2041	 /* Slot the projector in as the last component of the coord. */
2042	 coord_dst.writemask = WRITEMASK_W;
2043	 emit(ir, OPCODE_MOV, coord_dst, projector);
2044	 coord_dst.writemask = WRITEMASK_XYZW;
2045	 opcode = OPCODE_TXP;
2046      } else {
2047	 src_reg coord_w = coord;
2048	 coord_w.swizzle = SWIZZLE_WWWW;
2049
2050	 /* For the other TEX opcodes there's no projective version
2051	  * since the last slot is taken up by lod info.  Do the
2052	  * projective divide now.
2053	  */
2054	 coord_dst.writemask = WRITEMASK_W;
2055	 emit(ir, OPCODE_RCP, coord_dst, projector);
2056
2057	 /* In the case where we have to project the coordinates "by hand,"
2058	  * the shadow comparator value must also be projected.
2059	  */
2060	 src_reg tmp_src = coord;
2061	 if (ir->shadow_comparator) {
2062	    /* Slot the shadow value in as the second to last component of the
2063	     * coord.
2064	     */
2065	    ir->shadow_comparator->accept(this);
2066
2067	    tmp_src = get_temp(glsl_type::vec4_type);
2068	    dst_reg tmp_dst = dst_reg(tmp_src);
2069
2070	    /* Projective division not allowed for array samplers. */
2071	    assert(!sampler_type->sampler_array);
2072
2073	    tmp_dst.writemask = WRITEMASK_Z;
2074	    emit(ir, OPCODE_MOV, tmp_dst, this->result);
2075
2076	    tmp_dst.writemask = WRITEMASK_XY;
2077	    emit(ir, OPCODE_MOV, tmp_dst, coord);
2078	 }
2079
2080	 coord_dst.writemask = WRITEMASK_XYZ;
2081	 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
2082
2083	 coord_dst.writemask = WRITEMASK_XYZW;
2084	 coord.swizzle = SWIZZLE_XYZW;
2085      }
2086   }
2087
2088   /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
2089    * comparator was put in the correct place (and projected) by the code,
2090    * above, that handles by-hand projection.
2091    */
2092   if (ir->shadow_comparator && (!ir->projector || opcode == OPCODE_TXP)) {
2093      /* Slot the shadow value in as the second to last component of the
2094       * coord.
2095       */
2096      ir->shadow_comparator->accept(this);
2097
2098      /* XXX This will need to be updated for cubemap array samplers. */
2099      if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
2100          sampler_type->sampler_array) {
2101         coord_dst.writemask = WRITEMASK_W;
2102      } else {
2103         coord_dst.writemask = WRITEMASK_Z;
2104      }
2105
2106      emit(ir, OPCODE_MOV, coord_dst, this->result);
2107      coord_dst.writemask = WRITEMASK_XYZW;
2108   }
2109
2110   if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2111      /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2112      coord_dst.writemask = WRITEMASK_W;
2113      emit(ir, OPCODE_MOV, coord_dst, lod_info);
2114      coord_dst.writemask = WRITEMASK_XYZW;
2115   }
2116
2117   if (opcode == OPCODE_TXD)
2118      inst = emit(ir, opcode, result_dst, coord, dx, dy);
2119   else
2120      inst = emit(ir, opcode, result_dst, coord);
2121
2122   if (ir->shadow_comparator)
2123      inst->tex_shadow = GL_TRUE;
2124
2125   inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program,
2126                                             prog);
2127
2128   switch (sampler_type->sampler_dimensionality) {
2129   case GLSL_SAMPLER_DIM_1D:
2130      inst->tex_target = (sampler_type->sampler_array)
2131	 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2132      break;
2133   case GLSL_SAMPLER_DIM_2D:
2134      inst->tex_target = (sampler_type->sampler_array)
2135	 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2136      break;
2137   case GLSL_SAMPLER_DIM_3D:
2138      inst->tex_target = TEXTURE_3D_INDEX;
2139      break;
2140   case GLSL_SAMPLER_DIM_CUBE:
2141      inst->tex_target = TEXTURE_CUBE_INDEX;
2142      break;
2143   case GLSL_SAMPLER_DIM_RECT:
2144      inst->tex_target = TEXTURE_RECT_INDEX;
2145      break;
2146   case GLSL_SAMPLER_DIM_BUF:
2147      assert(!"FINISHME: Implement ARB_texture_buffer_object");
2148      break;
2149   case GLSL_SAMPLER_DIM_EXTERNAL:
2150      inst->tex_target = TEXTURE_EXTERNAL_INDEX;
2151      break;
2152   default:
2153      assert(!"Should not get here.");
2154   }
2155
2156   this->result = result_src;
2157}
2158
2159void
2160ir_to_mesa_visitor::visit(ir_return *ir)
2161{
2162   /* Non-void functions should have been inlined.  We may still emit RETs
2163    * from main() unless the EmitNoMainReturn option is set.
2164    */
2165   assert(!ir->get_value());
2166   emit(ir, OPCODE_RET);
2167}
2168
2169void
2170ir_to_mesa_visitor::visit(ir_discard *ir)
2171{
2172   if (!ir->condition)
2173      ir->condition = new(mem_ctx) ir_constant(true);
2174
2175   ir->condition->accept(this);
2176   this->result.negate = ~this->result.negate;
2177   emit(ir, OPCODE_KIL, undef_dst, this->result);
2178}
2179
2180void
2181ir_to_mesa_visitor::visit(ir_if *ir)
2182{
2183   ir_to_mesa_instruction *if_inst;
2184
2185   ir->condition->accept(this);
2186   assert(this->result.file != PROGRAM_UNDEFINED);
2187
2188   if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
2189
2190   this->instructions.push_tail(if_inst);
2191
2192   visit_exec_list(&ir->then_instructions, this);
2193
2194   if (!ir->else_instructions.is_empty()) {
2195      emit(ir->condition, OPCODE_ELSE);
2196      visit_exec_list(&ir->else_instructions, this);
2197   }
2198
2199   emit(ir->condition, OPCODE_ENDIF);
2200}
2201
2202void
2203ir_to_mesa_visitor::visit(ir_emit_vertex *)
2204{
2205   assert(!"Geometry shaders not supported.");
2206}
2207
2208void
2209ir_to_mesa_visitor::visit(ir_end_primitive *)
2210{
2211   assert(!"Geometry shaders not supported.");
2212}
2213
2214void
2215ir_to_mesa_visitor::visit(ir_barrier *)
2216{
2217   unreachable("GLSL barrier() not supported.");
2218}
2219
2220ir_to_mesa_visitor::ir_to_mesa_visitor()
2221{
2222   result.file = PROGRAM_UNDEFINED;
2223   next_temp = 1;
2224   next_signature_id = 1;
2225   current_function = NULL;
2226   mem_ctx = ralloc_context(NULL);
2227}
2228
2229ir_to_mesa_visitor::~ir_to_mesa_visitor()
2230{
2231   ralloc_free(mem_ctx);
2232}
2233
2234static struct prog_src_register
2235mesa_src_reg_from_ir_src_reg(src_reg reg)
2236{
2237   struct prog_src_register mesa_reg;
2238
2239   mesa_reg.File = reg.file;
2240   assert(reg.index < (1 << INST_INDEX_BITS));
2241   mesa_reg.Index = reg.index;
2242   mesa_reg.Swizzle = reg.swizzle;
2243   mesa_reg.RelAddr = reg.reladdr != NULL;
2244   mesa_reg.Negate = reg.negate;
2245
2246   return mesa_reg;
2247}
2248
2249static void
2250set_branchtargets(ir_to_mesa_visitor *v,
2251		  struct prog_instruction *mesa_instructions,
2252		  int num_instructions)
2253{
2254   int if_count = 0, loop_count = 0;
2255   int *if_stack, *loop_stack;
2256   int if_stack_pos = 0, loop_stack_pos = 0;
2257   int i, j;
2258
2259   for (i = 0; i < num_instructions; i++) {
2260      switch (mesa_instructions[i].Opcode) {
2261      case OPCODE_IF:
2262	 if_count++;
2263	 break;
2264      case OPCODE_BGNLOOP:
2265	 loop_count++;
2266	 break;
2267      case OPCODE_BRK:
2268      case OPCODE_CONT:
2269	 mesa_instructions[i].BranchTarget = -1;
2270	 break;
2271      default:
2272	 break;
2273      }
2274   }
2275
2276   if_stack = rzalloc_array(v->mem_ctx, int, if_count);
2277   loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
2278
2279   for (i = 0; i < num_instructions; i++) {
2280      switch (mesa_instructions[i].Opcode) {
2281      case OPCODE_IF:
2282	 if_stack[if_stack_pos] = i;
2283	 if_stack_pos++;
2284	 break;
2285      case OPCODE_ELSE:
2286	 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2287	 if_stack[if_stack_pos - 1] = i;
2288	 break;
2289      case OPCODE_ENDIF:
2290	 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2291	 if_stack_pos--;
2292	 break;
2293      case OPCODE_BGNLOOP:
2294	 loop_stack[loop_stack_pos] = i;
2295	 loop_stack_pos++;
2296	 break;
2297      case OPCODE_ENDLOOP:
2298	 loop_stack_pos--;
2299	 /* Rewrite any breaks/conts at this nesting level (haven't
2300	  * already had a BranchTarget assigned) to point to the end
2301	  * of the loop.
2302	  */
2303	 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2304	    if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2305		mesa_instructions[j].Opcode == OPCODE_CONT) {
2306	       if (mesa_instructions[j].BranchTarget == -1) {
2307		  mesa_instructions[j].BranchTarget = i;
2308	       }
2309	    }
2310	 }
2311	 /* The loop ends point at each other. */
2312	 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2313	 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2314	 break;
2315      case OPCODE_CAL:
2316	 foreach_in_list(function_entry, entry, &v->function_signatures) {
2317	    if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2318	       mesa_instructions[i].BranchTarget = entry->inst;
2319	       break;
2320	    }
2321	 }
2322	 break;
2323      default:
2324	 break;
2325      }
2326   }
2327}
2328
2329static void
2330print_program(struct prog_instruction *mesa_instructions,
2331	      ir_instruction **mesa_instruction_annotation,
2332	      int num_instructions)
2333{
2334   ir_instruction *last_ir = NULL;
2335   int i;
2336   int indent = 0;
2337
2338   for (i = 0; i < num_instructions; i++) {
2339      struct prog_instruction *mesa_inst = mesa_instructions + i;
2340      ir_instruction *ir = mesa_instruction_annotation[i];
2341
2342      fprintf(stdout, "%3d: ", i);
2343
2344      if (last_ir != ir && ir) {
2345	 int j;
2346
2347	 for (j = 0; j < indent; j++) {
2348	    fprintf(stdout, " ");
2349	 }
2350	 ir->print();
2351	 printf("\n");
2352	 last_ir = ir;
2353
2354	 fprintf(stdout, "     "); /* line number spacing. */
2355      }
2356
2357      indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2358					    PROG_PRINT_DEBUG, NULL);
2359   }
2360}
2361
2362namespace {
2363
2364class add_uniform_to_shader : public program_resource_visitor {
2365public:
2366   add_uniform_to_shader(struct gl_shader_program *shader_program,
2367			 struct gl_program_parameter_list *params,
2368                         gl_shader_stage shader_type)
2369      : shader_program(shader_program), params(params), idx(-1),
2370        shader_type(shader_type)
2371   {
2372      /* empty */
2373   }
2374
2375   void process(ir_variable *var)
2376   {
2377      this->idx = -1;
2378      this->program_resource_visitor::process(var);
2379      var->data.param_index = this->idx;
2380   }
2381
2382private:
2383   virtual void visit_field(const glsl_type *type, const char *name,
2384                            bool row_major, const glsl_type *record_type,
2385                            const enum glsl_interface_packing packing,
2386                            bool last_field);
2387
2388   struct gl_shader_program *shader_program;
2389   struct gl_program_parameter_list *params;
2390   int idx;
2391   gl_shader_stage shader_type;
2392};
2393
2394} /* anonymous namespace */
2395
2396void
2397add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
2398                                   bool /* row_major */,
2399                                   const glsl_type * /* record_type */,
2400                                   const enum glsl_interface_packing,
2401                                   bool /* last_field */)
2402{
2403   unsigned int size;
2404
2405   /* atomics don't get real storage */
2406   if (type->contains_atomic())
2407      return;
2408
2409   if (type->is_vector() || type->is_scalar()) {
2410      size = type->vector_elements;
2411      if (type->is_64bit())
2412         size *= 2;
2413   } else {
2414      size = type_size(type) * 4;
2415   }
2416
2417   gl_register_file file;
2418   if (type->without_array()->is_sampler()) {
2419      file = PROGRAM_SAMPLER;
2420   } else {
2421      file = PROGRAM_UNIFORM;
2422   }
2423
2424   int index = _mesa_lookup_parameter_index(params, name);
2425   if (index < 0) {
2426      index = _mesa_add_parameter(params, file, name, size, type->gl_type,
2427				  NULL, NULL);
2428
2429      /* Sampler uniform values are stored in prog->SamplerUnits,
2430       * and the entry in that array is selected by this index we
2431       * store in ParameterValues[].
2432       */
2433      if (file == PROGRAM_SAMPLER) {
2434	 unsigned location;
2435	 const bool found =
2436	    this->shader_program->UniformHash->get(location,
2437						   params->Parameters[index].Name);
2438	 assert(found);
2439
2440	 if (!found)
2441	    return;
2442
2443	 struct gl_uniform_storage *storage =
2444            &this->shader_program->data->UniformStorage[location];
2445
2446         assert(storage->type->is_sampler() &&
2447                storage->opaque[shader_type].active);
2448
2449	 for (unsigned int j = 0; j < size / 4; j++)
2450            params->ParameterValues[index + j][0].f =
2451               storage->opaque[shader_type].index + j;
2452      }
2453   }
2454
2455   /* The first part of the uniform that's processed determines the base
2456    * location of the whole uniform (for structures).
2457    */
2458   if (this->idx < 0)
2459      this->idx = index;
2460}
2461
2462/**
2463 * Generate the program parameters list for the user uniforms in a shader
2464 *
2465 * \param shader_program Linked shader program.  This is only used to
2466 *                       emit possible link errors to the info log.
2467 * \param sh             Shader whose uniforms are to be processed.
2468 * \param params         Parameter list to be filled in.
2469 */
2470void
2471_mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
2472					    *shader_program,
2473					    struct gl_linked_shader *sh,
2474					    struct gl_program_parameter_list
2475					    *params)
2476{
2477   add_uniform_to_shader add(shader_program, params, sh->Stage);
2478
2479   foreach_in_list(ir_instruction, node, sh->ir) {
2480      ir_variable *var = node->as_variable();
2481
2482      if ((var == NULL) || (var->data.mode != ir_var_uniform)
2483	  || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0))
2484	 continue;
2485
2486      add.process(var);
2487   }
2488}
2489
2490void
2491_mesa_associate_uniform_storage(struct gl_context *ctx,
2492				struct gl_shader_program *shader_program,
2493				struct gl_program_parameter_list *params)
2494{
2495   /* After adding each uniform to the parameter list, connect the storage for
2496    * the parameter with the tracking structure used by the API for the
2497    * uniform.
2498    */
2499   unsigned last_location = unsigned(~0);
2500   for (unsigned i = 0; i < params->NumParameters; i++) {
2501      if (params->Parameters[i].Type != PROGRAM_UNIFORM)
2502	 continue;
2503
2504      unsigned location;
2505      const bool found =
2506	 shader_program->UniformHash->get(location, params->Parameters[i].Name);
2507      assert(found);
2508
2509      if (!found)
2510	 continue;
2511
2512      struct gl_uniform_storage *storage =
2513         &shader_program->data->UniformStorage[location];
2514
2515      /* Do not associate any uniform storage to built-in uniforms */
2516      if (storage->builtin)
2517         continue;
2518
2519      if (location != last_location) {
2520	 enum gl_uniform_driver_format format = uniform_native;
2521
2522	 unsigned columns = 0;
2523	 int dmul = 4 * sizeof(float);
2524	 switch (storage->type->base_type) {
2525	 case GLSL_TYPE_UINT:
2526	    assert(ctx->Const.NativeIntegers);
2527	    format = uniform_native;
2528	    columns = 1;
2529	    break;
2530	 case GLSL_TYPE_INT:
2531	    format =
2532	       (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float;
2533	    columns = 1;
2534	    break;
2535
2536	 case GLSL_TYPE_DOUBLE:
2537	    if (storage->type->vector_elements > 2)
2538               dmul *= 2;
2539	    /* fallthrough */
2540	 case GLSL_TYPE_FLOAT:
2541	    format = uniform_native;
2542	    columns = storage->type->matrix_columns;
2543	    break;
2544	 case GLSL_TYPE_BOOL:
2545	    format = uniform_native;
2546	    columns = 1;
2547	    break;
2548	 case GLSL_TYPE_SAMPLER:
2549	 case GLSL_TYPE_IMAGE:
2550         case GLSL_TYPE_SUBROUTINE:
2551	    format = uniform_native;
2552	    columns = 1;
2553	    break;
2554         case GLSL_TYPE_ATOMIC_UINT:
2555         case GLSL_TYPE_ARRAY:
2556         case GLSL_TYPE_VOID:
2557         case GLSL_TYPE_STRUCT:
2558         case GLSL_TYPE_ERROR:
2559         case GLSL_TYPE_INTERFACE:
2560         case GLSL_TYPE_FUNCTION:
2561	    assert(!"Should not get here.");
2562	    break;
2563	 }
2564
2565	 _mesa_uniform_attach_driver_storage(storage,
2566					     dmul * columns,
2567					     dmul,
2568					     format,
2569					     &params->ParameterValues[i]);
2570
2571	 /* After attaching the driver's storage to the uniform, propagate any
2572	  * data from the linker's backing store.  This will cause values from
2573	  * initializers in the source code to be copied over.
2574	  */
2575	 _mesa_propagate_uniforms_to_driver_storage(storage,
2576						    0,
2577						    MAX2(1, storage->array_elements));
2578
2579	 last_location = location;
2580      }
2581   }
2582}
2583
2584/*
2585 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
2586 * channels for copy propagation and updates following instructions to
2587 * use the original versions.
2588 *
2589 * The ir_to_mesa_visitor lazily produces code assuming that this pass
2590 * will occur.  As an example, a TXP production before this pass:
2591 *
2592 * 0: MOV TEMP[1], INPUT[4].xyyy;
2593 * 1: MOV TEMP[1].w, INPUT[4].wwww;
2594 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
2595 *
2596 * and after:
2597 *
2598 * 0: MOV TEMP[1], INPUT[4].xyyy;
2599 * 1: MOV TEMP[1].w, INPUT[4].wwww;
2600 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
2601 *
2602 * which allows for dead code elimination on TEMP[1]'s writes.
2603 */
2604void
2605ir_to_mesa_visitor::copy_propagate(void)
2606{
2607   ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
2608						    ir_to_mesa_instruction *,
2609						    this->next_temp * 4);
2610   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
2611   int level = 0;
2612
2613   foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) {
2614      assert(inst->dst.file != PROGRAM_TEMPORARY
2615	     || inst->dst.index < this->next_temp);
2616
2617      /* First, do any copy propagation possible into the src regs. */
2618      for (int r = 0; r < 3; r++) {
2619	 ir_to_mesa_instruction *first = NULL;
2620	 bool good = true;
2621	 int acp_base = inst->src[r].index * 4;
2622
2623	 if (inst->src[r].file != PROGRAM_TEMPORARY ||
2624	     inst->src[r].reladdr)
2625	    continue;
2626
2627	 /* See if we can find entries in the ACP consisting of MOVs
2628	  * from the same src register for all the swizzled channels
2629	  * of this src register reference.
2630	  */
2631	 for (int i = 0; i < 4; i++) {
2632	    int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2633	    ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
2634
2635	    if (!copy_chan) {
2636	       good = false;
2637	       break;
2638	    }
2639
2640	    assert(acp_level[acp_base + src_chan] <= level);
2641
2642	    if (!first) {
2643	       first = copy_chan;
2644	    } else {
2645	       if (first->src[0].file != copy_chan->src[0].file ||
2646		   first->src[0].index != copy_chan->src[0].index) {
2647		  good = false;
2648		  break;
2649	       }
2650	    }
2651	 }
2652
2653	 if (good) {
2654	    /* We've now validated that we can copy-propagate to
2655	     * replace this src register reference.  Do it.
2656	     */
2657	    inst->src[r].file = first->src[0].file;
2658	    inst->src[r].index = first->src[0].index;
2659
2660	    int swizzle = 0;
2661	    for (int i = 0; i < 4; i++) {
2662	       int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2663	       ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
2664	       swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
2665			   (3 * i));
2666	    }
2667	    inst->src[r].swizzle = swizzle;
2668	 }
2669      }
2670
2671      switch (inst->op) {
2672      case OPCODE_BGNLOOP:
2673      case OPCODE_ENDLOOP:
2674	 /* End of a basic block, clear the ACP entirely. */
2675	 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2676	 break;
2677
2678      case OPCODE_IF:
2679	 ++level;
2680	 break;
2681
2682      case OPCODE_ENDIF:
2683      case OPCODE_ELSE:
2684	 /* Clear all channels written inside the block from the ACP, but
2685	  * leaving those that were not touched.
2686	  */
2687	 for (int r = 0; r < this->next_temp; r++) {
2688	    for (int c = 0; c < 4; c++) {
2689	       if (!acp[4 * r + c])
2690		  continue;
2691
2692	       if (acp_level[4 * r + c] >= level)
2693		  acp[4 * r + c] = NULL;
2694	    }
2695	 }
2696	 if (inst->op == OPCODE_ENDIF)
2697	    --level;
2698	 break;
2699
2700      default:
2701	 /* Continuing the block, clear any written channels from
2702	  * the ACP.
2703	  */
2704	 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
2705	    /* Any temporary might be written, so no copy propagation
2706	     * across this instruction.
2707	     */
2708	    memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2709	 } else if (inst->dst.file == PROGRAM_OUTPUT &&
2710		    inst->dst.reladdr) {
2711	    /* Any output might be written, so no copy propagation
2712	     * from outputs across this instruction.
2713	     */
2714	    for (int r = 0; r < this->next_temp; r++) {
2715	       for (int c = 0; c < 4; c++) {
2716		  if (!acp[4 * r + c])
2717		     continue;
2718
2719		  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
2720		     acp[4 * r + c] = NULL;
2721	       }
2722	    }
2723	 } else if (inst->dst.file == PROGRAM_TEMPORARY ||
2724		    inst->dst.file == PROGRAM_OUTPUT) {
2725	    /* Clear where it's used as dst. */
2726	    if (inst->dst.file == PROGRAM_TEMPORARY) {
2727	       for (int c = 0; c < 4; c++) {
2728		  if (inst->dst.writemask & (1 << c)) {
2729		     acp[4 * inst->dst.index + c] = NULL;
2730		  }
2731	       }
2732	    }
2733
2734	    /* Clear where it's used as src. */
2735	    for (int r = 0; r < this->next_temp; r++) {
2736	       for (int c = 0; c < 4; c++) {
2737		  if (!acp[4 * r + c])
2738		     continue;
2739
2740		  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
2741
2742		  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
2743		      acp[4 * r + c]->src[0].index == inst->dst.index &&
2744		      inst->dst.writemask & (1 << src_chan))
2745		  {
2746		     acp[4 * r + c] = NULL;
2747		  }
2748	       }
2749	    }
2750	 }
2751	 break;
2752      }
2753
2754      /* If this is a copy, add it to the ACP. */
2755      if (inst->op == OPCODE_MOV &&
2756	  inst->dst.file == PROGRAM_TEMPORARY &&
2757	  !(inst->dst.file == inst->src[0].file &&
2758	    inst->dst.index == inst->src[0].index) &&
2759	  !inst->dst.reladdr &&
2760	  !inst->saturate &&
2761	  !inst->src[0].reladdr &&
2762	  !inst->src[0].negate) {
2763	 for (int i = 0; i < 4; i++) {
2764	    if (inst->dst.writemask & (1 << i)) {
2765	       acp[4 * inst->dst.index + i] = inst;
2766	       acp_level[4 * inst->dst.index + i] = level;
2767	    }
2768	 }
2769      }
2770   }
2771
2772   ralloc_free(acp_level);
2773   ralloc_free(acp);
2774}
2775
2776
2777/**
2778 * Convert a shader's GLSL IR into a Mesa gl_program.
2779 */
2780static struct gl_program *
2781get_mesa_program(struct gl_context *ctx,
2782                 struct gl_shader_program *shader_program,
2783		 struct gl_linked_shader *shader)
2784{
2785   ir_to_mesa_visitor v;
2786   struct prog_instruction *mesa_instructions, *mesa_inst;
2787   ir_instruction **mesa_instruction_annotation;
2788   int i;
2789   struct gl_program *prog;
2790   GLenum target = _mesa_shader_stage_to_program(shader->Stage);
2791   const char *target_string = _mesa_shader_stage_to_string(shader->Stage);
2792   struct gl_shader_compiler_options *options =
2793         &ctx->Const.ShaderCompilerOptions[shader->Stage];
2794
2795   validate_ir_tree(shader->ir);
2796
2797   prog = shader->Program;
2798   prog->Parameters = _mesa_new_parameter_list();
2799   v.ctx = ctx;
2800   v.prog = prog;
2801   v.shader_program = shader_program;
2802   v.options = options;
2803
2804   _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
2805					       prog->Parameters);
2806
2807   /* Emit Mesa IR for main(). */
2808   visit_exec_list(shader->ir, &v);
2809   v.emit(NULL, OPCODE_END);
2810
2811   prog->arb.NumTemporaries = v.next_temp;
2812
2813   unsigned num_instructions = v.instructions.length();
2814
2815   mesa_instructions = rzalloc_array(prog, struct prog_instruction,
2816                                     num_instructions);
2817   mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
2818					      num_instructions);
2819
2820   v.copy_propagate();
2821
2822   /* Convert ir_mesa_instructions into prog_instructions.
2823    */
2824   mesa_inst = mesa_instructions;
2825   i = 0;
2826   foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) {
2827      mesa_inst->Opcode = inst->op;
2828      if (inst->saturate)
2829	 mesa_inst->Saturate = GL_TRUE;
2830      mesa_inst->DstReg.File = inst->dst.file;
2831      mesa_inst->DstReg.Index = inst->dst.index;
2832      mesa_inst->DstReg.WriteMask = inst->dst.writemask;
2833      mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
2834      mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
2835      mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
2836      mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
2837      mesa_inst->TexSrcUnit = inst->sampler;
2838      mesa_inst->TexSrcTarget = inst->tex_target;
2839      mesa_inst->TexShadow = inst->tex_shadow;
2840      mesa_instruction_annotation[i] = inst->ir;
2841
2842      /* Set IndirectRegisterFiles. */
2843      if (mesa_inst->DstReg.RelAddr)
2844         prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
2845
2846      /* Update program's bitmask of indirectly accessed register files */
2847      for (unsigned src = 0; src < 3; src++)
2848         if (mesa_inst->SrcReg[src].RelAddr)
2849            prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
2850
2851      switch (mesa_inst->Opcode) {
2852      case OPCODE_IF:
2853	 if (options->MaxIfDepth == 0) {
2854	    linker_warning(shader_program,
2855			   "Couldn't flatten if-statement.  "
2856			   "This will likely result in software "
2857			   "rasterization.\n");
2858	 }
2859	 break;
2860      case OPCODE_BGNLOOP:
2861	 if (options->EmitNoLoops) {
2862	    linker_warning(shader_program,
2863			   "Couldn't unroll loop.  "
2864			   "This will likely result in software "
2865			   "rasterization.\n");
2866	 }
2867	 break;
2868      case OPCODE_CONT:
2869	 if (options->EmitNoCont) {
2870	    linker_warning(shader_program,
2871			   "Couldn't lower continue-statement.  "
2872			   "This will likely result in software "
2873			   "rasterization.\n");
2874	 }
2875	 break;
2876      case OPCODE_ARL:
2877         prog->arb.NumAddressRegs = 1;
2878	 break;
2879      default:
2880	 break;
2881      }
2882
2883      mesa_inst++;
2884      i++;
2885
2886      if (!shader_program->data->LinkStatus)
2887         break;
2888   }
2889
2890   if (!shader_program->data->LinkStatus) {
2891      goto fail_exit;
2892   }
2893
2894   set_branchtargets(&v, mesa_instructions, num_instructions);
2895
2896   if (ctx->_Shader->Flags & GLSL_DUMP) {
2897      fprintf(stderr, "\n");
2898      fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string,
2899	      shader_program->Name);
2900      _mesa_print_ir(stderr, shader->ir, NULL);
2901      fprintf(stderr, "\n");
2902      fprintf(stderr, "\n");
2903      fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string,
2904	      shader_program->Name);
2905      print_program(mesa_instructions, mesa_instruction_annotation,
2906		    num_instructions);
2907      fflush(stderr);
2908   }
2909
2910   prog->arb.Instructions = mesa_instructions;
2911   prog->arb.NumInstructions = num_instructions;
2912
2913   /* Setting this to NULL prevents a possible double free in the fail_exit
2914    * path (far below).
2915    */
2916   mesa_instructions = NULL;
2917
2918   do_set_program_inouts(shader->ir, prog, shader->Stage);
2919
2920   prog->ShadowSamplers = shader->shadow_samplers;
2921   prog->ExternalSamplersUsed = gl_external_samplers(prog);
2922   _mesa_update_shader_textures_used(shader_program, prog);
2923
2924   /* Set the gl_FragDepth layout. */
2925   if (target == GL_FRAGMENT_PROGRAM_ARB) {
2926      prog->info.fs.depth_layout = shader_program->FragDepthLayout;
2927   }
2928
2929   if ((ctx->_Shader->Flags & GLSL_NO_OPT) == 0) {
2930      _mesa_optimize_program(ctx, prog, prog);
2931   }
2932
2933   /* This has to be done last.  Any operation that can cause
2934    * prog->ParameterValues to get reallocated (e.g., anything that adds a
2935    * program constant) has to happen before creating this linkage.
2936    */
2937   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
2938   if (!shader_program->data->LinkStatus) {
2939      goto fail_exit;
2940   }
2941
2942   return prog;
2943
2944fail_exit:
2945   ralloc_free(mesa_instructions);
2946   _mesa_reference_program(ctx, &shader->Program, NULL);
2947   return NULL;
2948}
2949
2950extern "C" {
2951
2952/**
2953 * Link a shader.
2954 * Called via ctx->Driver.LinkShader()
2955 * This actually involves converting GLSL IR into Mesa gl_programs with
2956 * code lowering and other optimizations.
2957 */
2958GLboolean
2959_mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
2960{
2961   assert(prog->data->LinkStatus);
2962
2963   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
2964      if (prog->_LinkedShaders[i] == NULL)
2965	 continue;
2966
2967      bool progress;
2968      exec_list *ir = prog->_LinkedShaders[i]->ir;
2969      const struct gl_shader_compiler_options *options =
2970            &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage];
2971
2972      do {
2973	 progress = false;
2974
2975	 /* Lowering */
2976	 do_mat_op_to_vec(ir);
2977	 lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2
2978				 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
2979				 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
2980
2981	 progress = do_common_optimization(ir, true, true,
2982                                           options, ctx->Const.NativeIntegers)
2983	   || progress;
2984
2985	 progress = lower_quadop_vector(ir, true) || progress;
2986
2987	 if (options->MaxIfDepth == 0)
2988	    progress = lower_discard(ir) || progress;
2989
2990	 progress = lower_if_to_cond_assign((gl_shader_stage)i, ir,
2991                                            options->MaxIfDepth) || progress;
2992
2993         progress = lower_noise(ir) || progress;
2994
2995	 /* If there are forms of indirect addressing that the driver
2996	  * cannot handle, perform the lowering pass.
2997	  */
2998	 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
2999	     || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
3000	   progress =
3001	     lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
3002						 options->EmitNoIndirectInput,
3003						 options->EmitNoIndirectOutput,
3004						 options->EmitNoIndirectTemp,
3005						 options->EmitNoIndirectUniform)
3006	     || progress;
3007
3008	 progress = do_vec_index_to_cond_assign(ir) || progress;
3009         progress = lower_vector_insert(ir, true) || progress;
3010      } while (progress);
3011
3012      validate_ir_tree(ir);
3013   }
3014
3015   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
3016      struct gl_program *linked_prog;
3017
3018      if (prog->_LinkedShaders[i] == NULL)
3019	 continue;
3020
3021      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
3022
3023      if (linked_prog) {
3024         _mesa_copy_linked_program_data(prog, prog->_LinkedShaders[i]);
3025
3026         if (!ctx->Driver.ProgramStringNotify(ctx,
3027                                              _mesa_shader_stage_to_program(i),
3028                                              linked_prog)) {
3029            _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
3030                                    NULL);
3031            return GL_FALSE;
3032         }
3033      }
3034   }
3035
3036   build_program_resource_list(ctx, prog);
3037   return prog->data->LinkStatus;
3038}
3039
3040/**
3041 * Link a GLSL shader program.  Called via glLinkProgram().
3042 */
3043void
3044_mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3045{
3046   unsigned int i;
3047
3048   _mesa_clear_shader_program_data(ctx, prog);
3049
3050   prog->data->LinkStatus = GL_TRUE;
3051
3052   for (i = 0; i < prog->NumShaders; i++) {
3053      if (!prog->Shaders[i]->CompileStatus) {
3054	 linker_error(prog, "linking with uncompiled shader");
3055      }
3056   }
3057
3058   if (prog->data->LinkStatus) {
3059      link_shaders(ctx, prog);
3060   }
3061
3062   if (prog->data->LinkStatus) {
3063      if (!ctx->Driver.LinkShader(ctx, prog)) {
3064         prog->data->LinkStatus = GL_FALSE;
3065      }
3066   }
3067
3068   if (ctx->_Shader->Flags & GLSL_DUMP) {
3069      if (!prog->data->LinkStatus) {
3070	 fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name);
3071      }
3072
3073      if (prog->data->InfoLog && prog->data->InfoLog[0] != 0) {
3074	 fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name);
3075         fprintf(stderr, "%s\n", prog->data->InfoLog);
3076      }
3077   }
3078}
3079
3080} /* extern "C" */
3081