1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file linker.cpp
26 * GLSL linker implementation
27 *
28 * Given a set of shaders that are to be linked to generate a final program,
29 * there are three distinct stages.
30 *
31 * In the first stage shaders are partitioned into groups based on the shader
32 * type.  All shaders of a particular type (e.g., vertex shaders) are linked
33 * together.
34 *
35 *   - Undefined references in each shader are resolve to definitions in
36 *     another shader.
37 *   - Types and qualifiers of uniforms, outputs, and global variables defined
38 *     in multiple shaders with the same name are verified to be the same.
39 *   - Initializers for uniforms and global variables defined
40 *     in multiple shaders with the same name are verified to be the same.
41 *
42 * The result, in the terminology of the GLSL spec, is a set of shader
43 * executables for each processing unit.
44 *
45 * After the first stage is complete, a series of semantic checks are performed
46 * on each of the shader executables.
47 *
48 *   - Each shader executable must define a \c main function.
49 *   - Each vertex shader executable must write to \c gl_Position.
50 *   - Each fragment shader executable must write to either \c gl_FragData or
51 *     \c gl_FragColor.
52 *
53 * In the final stage individual shader executables are linked to create a
54 * complete exectuable.
55 *
56 *   - Types of uniforms defined in multiple shader stages with the same name
57 *     are verified to be the same.
58 *   - Initializers for uniforms defined in multiple shader stages with the
59 *     same name are verified to be the same.
60 *   - Types and qualifiers of outputs defined in one stage are verified to
61 *     be the same as the types and qualifiers of inputs defined with the same
62 *     name in a later stage.
63 *
64 * \author Ian Romanick <ian.d.romanick@intel.com>
65 */
66#include <cstddef>
67#include <cstdlib>
68#include <cstdio>
69#include <cstdarg>
70#include <climits>
71
72#include <pixelflinger2/pixelflinger2_interface.h>
73
74extern "C" {
75#include <hieralloc.h>
76}
77
78#include "main/core.h"
79#include "glsl_symbol_table.h"
80#include "ir.h"
81#include "program.h"
82#include "program/hash_table.h"
83#include "linker.h"
84#include "ir_optimization.h"
85
86#include "main/shaderobj.h"
87
88/**
89 * Visitor that determines whether or not a variable is ever written.
90 */
91class find_assignment_visitor : public ir_hierarchical_visitor {
92public:
93   find_assignment_visitor(const char *name)
94      : name(name), found(false)
95   {
96      /* empty */
97   }
98
99   virtual ir_visitor_status visit_enter(ir_assignment *ir)
100   {
101      ir_variable *const var = ir->lhs->variable_referenced();
102
103      if (strcmp(name, var->name) == 0) {
104	 found = true;
105	 return visit_stop;
106      }
107
108      return visit_continue_with_parent;
109   }
110
111   using ir_hierarchical_visitor::visit_enter;
112   virtual ir_visitor_status visit_enter(ir_call *ir)
113   {
114      exec_list_iterator sig_iter = ir->get_callee()->parameters.iterator();
115      foreach_iter(exec_list_iterator, iter, *ir) {
116	 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
117	 ir_variable *sig_param = (ir_variable *)sig_iter.get();
118
119	 if (sig_param->mode == ir_var_out ||
120	     sig_param->mode == ir_var_inout) {
121	    ir_variable *var = param_rval->variable_referenced();
122	    if (var && strcmp(name, var->name) == 0) {
123	       found = true;
124	       return visit_stop;
125	    }
126	 }
127	 sig_iter.next();
128      }
129
130      return visit_continue_with_parent;
131   }
132
133   bool variable_found()
134   {
135      return found;
136   }
137
138private:
139   const char *name;       /**< Find writes to a variable with this name. */
140   bool found;             /**< Was a write to the variable found? */
141};
142
143
144/**
145 * Visitor that determines whether or not a variable is ever read.
146 */
147class find_deref_visitor : public ir_hierarchical_visitor {
148public:
149   find_deref_visitor(const char *name)
150      : name(name), found(false)
151   {
152      /* empty */
153   }
154
155   using ir_hierarchical_visitor::visit;
156   virtual ir_visitor_status visit(ir_dereference_variable *ir)
157   {
158      if (strcmp(this->name, ir->var->name) == 0) {
159	 this->found = true;
160	 return visit_stop;
161      }
162
163      return visit_continue;
164   }
165
166   bool variable_found() const
167   {
168      return this->found;
169   }
170
171private:
172   const char *name;       /**< Find writes to a variable with this name. */
173   bool found;             /**< Was a write to the variable found? */
174};
175
176
177void
178linker_error_printf(gl_shader_program *prog, const char *fmt, ...)
179{
180   va_list ap;
181
182   prog->InfoLog = hieralloc_strdup_append(prog->InfoLog, "error: ");
183   va_start(ap, fmt);
184   prog->InfoLog = hieralloc_vasprintf_append(prog->InfoLog, fmt, ap);
185   va_end(ap);
186}
187
188
189void
190invalidate_variable_locations(gl_shader *sh, enum ir_variable_mode mode,
191			      int generic_base)
192{
193   foreach_list(node, sh->ir) {
194      ir_variable *const var = ((ir_instruction *) node)->as_variable();
195
196      if ((var == NULL) || (var->mode != (unsigned) mode))
197	 continue;
198
199      /* Only assign locations for generic attributes / varyings / etc.
200       */
201      if ((var->location >= generic_base) && !var->explicit_location)
202	  var->location = -1;
203   }
204}
205
206
207/**
208 * Determine the number of attribute slots required for a particular type
209 *
210 * This code is here because it implements the language rules of a specific
211 * GLSL version.  Since it's a property of the language and not a property of
212 * types in general, it doesn't really belong in glsl_type.
213 */
214unsigned
215count_attribute_slots(const glsl_type *t)
216{
217   /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec:
218    *
219    *     "A scalar input counts the same amount against this limit as a vec4,
220    *     so applications may want to consider packing groups of four
221    *     unrelated float inputs together into a vector to better utilize the
222    *     capabilities of the underlying hardware. A matrix input will use up
223    *     multiple locations.  The number of locations used will equal the
224    *     number of columns in the matrix."
225    *
226    * The spec does not explicitly say how arrays are counted.  However, it
227    * should be safe to assume the total number of slots consumed by an array
228    * is the number of entries in the array multiplied by the number of slots
229    * consumed by a single element of the array.
230    */
231
232   if (t->is_array())
233      return t->array_size() * count_attribute_slots(t->element_type());
234
235   if (t->is_matrix())
236      return t->matrix_columns;
237
238   return 1;
239}
240
241
242/**
243 * Verify that a vertex shader executable meets all semantic requirements
244 *
245 * \param shader  Vertex shader executable to be verified
246 */
247bool
248validate_vertex_shader_executable(struct gl_shader_program *prog,
249				  struct gl_shader *shader)
250{
251   if (shader == NULL)
252      return true;
253
254   find_assignment_visitor find("gl_Position");
255   find.run(shader->ir);
256   if (!find.variable_found()) {
257      linker_error_printf(prog,
258			  "vertex shader does not write to `gl_Position'\n");
259      return false;
260   }
261
262   return true;
263}
264
265
266/**
267 * Verify that a fragment shader executable meets all semantic requirements
268 *
269 * \param shader  Fragment shader executable to be verified
270 */
271bool
272validate_fragment_shader_executable(struct gl_shader_program *prog,
273				    struct gl_shader *shader)
274{
275   if (shader == NULL)
276      return true;
277
278   find_assignment_visitor frag_color("gl_FragColor");
279   find_assignment_visitor frag_data("gl_FragData");
280
281   frag_color.run(shader->ir);
282   frag_data.run(shader->ir);
283
284   if (frag_color.variable_found() && frag_data.variable_found()) {
285      linker_error_printf(prog,  "fragment shader writes to both "
286			  "`gl_FragColor' and `gl_FragData'\n");
287      return false;
288   }
289
290   return true;
291}
292
293
294/**
295 * Generate a string describing the mode of a variable
296 */
297static const char *
298mode_string(const ir_variable *var)
299{
300   switch (var->mode) {
301   case ir_var_auto:
302      return (var->read_only) ? "global constant" : "global variable";
303
304   case ir_var_uniform: return "uniform";
305   case ir_var_in:      return "shader input";
306   case ir_var_out:     return "shader output";
307   case ir_var_inout:   return "shader inout";
308
309   case ir_var_temporary:
310   default:
311      assert(!"Should not get here.");
312      return "invalid variable";
313   }
314}
315
316
317/**
318 * Perform validation of global variables used across multiple shaders
319 */
320bool
321cross_validate_globals(struct gl_shader_program *prog,
322		       struct gl_shader **shader_list,
323		       unsigned num_shaders,
324		       bool uniforms_only)
325{
326   /* Examine all of the uniforms in all of the shaders and cross validate
327    * them.
328    */
329   glsl_symbol_table variables(prog);
330   for (unsigned i = 0; i < num_shaders; i++) {
331      if (shader_list[i] == NULL)
332	 continue;
333
334      foreach_list(node, shader_list[i]->ir) {
335	 ir_variable *const var = ((ir_instruction *) node)->as_variable();
336
337	 if (var == NULL)
338	    continue;
339
340	 if (uniforms_only && (var->mode != ir_var_uniform))
341	    continue;
342
343	 /* Don't cross validate temporaries that are at global scope.  These
344	  * will eventually get pulled into the shaders 'main'.
345	  */
346	 if (var->mode == ir_var_temporary)
347	    continue;
348
349	 /* If a global with this name has already been seen, verify that the
350	  * new instance has the same type.  In addition, if the globals have
351	  * initializers, the values of the initializers must be the same.
352	  */
353	 ir_variable *const existing = variables.get_variable(var->name);
354	 if (existing != NULL) {
355	    if (var->type != existing->type) {
356	       /* Consider the types to be "the same" if both types are arrays
357		* of the same type and one of the arrays is implicitly sized.
358		* In addition, set the type of the linked variable to the
359		* explicitly sized array.
360		*/
361	       if (var->type->is_array()
362		   && existing->type->is_array()
363		   && (var->type->fields.array == existing->type->fields.array)
364		   && ((var->type->length == 0)
365		       || (existing->type->length == 0))) {
366		  if (existing->type->length == 0) {
367		     existing->type = var->type;
368		     existing->max_array_access =
369			MAX2(existing->max_array_access,
370			     var->max_array_access);
371		  }
372	       } else {
373		  linker_error_printf(prog, "%s `%s' declared as type "
374				      "`%s' and type `%s'\n",
375				      mode_string(var),
376				      var->name, var->type->name,
377				      existing->type->name);
378		  return false;
379	       }
380	    }
381
382	    if (var->explicit_location) {
383	       if (existing->explicit_location
384		   && (var->location != existing->location)) {
385		     linker_error_printf(prog, "explicit locations for %s "
386					 "`%s' have differing values\n",
387					 mode_string(var), var->name);
388		     return false;
389	       }
390
391	       existing->location = var->location;
392	       existing->explicit_location = true;
393	    }
394
395	    /* FINISHME: Handle non-constant initializers.
396	     */
397	    if (var->constant_value != NULL) {
398	       if (existing->constant_value != NULL) {
399		  if (!var->constant_value->has_value(existing->constant_value)) {
400		     linker_error_printf(prog, "initializers for %s "
401					 "`%s' have differing values\n",
402					 mode_string(var), var->name);
403		     return false;
404		  }
405	       } else
406		  /* If the first-seen instance of a particular uniform did not
407		   * have an initializer but a later instance does, copy the
408		   * initializer to the version stored in the symbol table.
409		   */
410		  /* FINISHME: This is wrong.  The constant_value field should
411		   * FINISHME: not be modified!  Imagine a case where a shader
412		   * FINISHME: without an initializer is linked in two different
413		   * FINISHME: programs with shaders that have differing
414		   * FINISHME: initializers.  Linking with the first will
415		   * FINISHME: modify the shader, and linking with the second
416		   * FINISHME: will fail.
417		   */
418		  existing->constant_value =
419		     var->constant_value->clone(hieralloc_parent(existing), NULL);
420	    }
421
422	    if (existing->invariant != var->invariant) {
423	       linker_error_printf(prog, "declarations for %s `%s' have "
424	                           "mismatching invariant qualifiers\n",
425	                           mode_string(var), var->name);
426	       return false;
427	    }
428	 } else
429	    variables.add_variable(var);
430      }
431   }
432
433   return true;
434}
435
436
437/**
438 * Perform validation of uniforms used across multiple shader stages
439 */
440bool
441cross_validate_uniforms(struct gl_shader_program *prog)
442{
443   return cross_validate_globals(prog, prog->_LinkedShaders,
444				 MESA_SHADER_TYPES, true);
445}
446
447
448/**
449 * Validate that outputs from one stage match inputs of another
450 */
451bool
452cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
453				 gl_shader *producer, gl_shader *consumer)
454{
455   glsl_symbol_table parameters(prog);
456   /* FINISHME: Figure these out dynamically. */
457   const char *const producer_stage = "vertex";
458   const char *const consumer_stage = "fragment";
459
460   /* Find all shader outputs in the "producer" stage.
461    */
462   foreach_list(node, producer->ir) {
463      ir_variable *const var = ((ir_instruction *) node)->as_variable();
464
465      /* FINISHME: For geometry shaders, this should also look for inout
466       * FINISHME: variables.
467       */
468      if ((var == NULL) || (var->mode != ir_var_out))
469	 continue;
470
471      parameters.add_variable(var);
472   }
473
474
475   /* Find all shader inputs in the "consumer" stage.  Any variables that have
476    * matching outputs already in the symbol table must have the same type and
477    * qualifiers.
478    */
479   foreach_list(node, consumer->ir) {
480      ir_variable *const input = ((ir_instruction *) node)->as_variable();
481
482      /* FINISHME: For geometry shaders, this should also look for inout
483       * FINISHME: variables.
484       */
485      if ((input == NULL) || (input->mode != ir_var_in))
486	 continue;
487
488      ir_variable *const output = parameters.get_variable(input->name);
489      if (output != NULL) {
490	 /* Check that the types match between stages.
491	  */
492	 if (input->type != output->type) {
493	    /* There is a bit of a special case for gl_TexCoord.  This
494	     * built-in is unsized by default.  Appliations that variable
495	     * access it must redeclare it with a size.  There is some
496	     * language in the GLSL spec that implies the fragment shader
497	     * and vertex shader do not have to agree on this size.  Other
498	     * driver behave this way, and one or two applications seem to
499	     * rely on it.
500	     *
501	     * Neither declaration needs to be modified here because the array
502	     * sizes are fixed later when update_array_sizes is called.
503	     *
504	     * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
505	     *
506	     *     "Unlike user-defined varying variables, the built-in
507	     *     varying variables don't have a strict one-to-one
508	     *     correspondence between the vertex language and the
509	     *     fragment language."
510	     */
511	    if (!output->type->is_array()
512		|| (strncmp("gl_", output->name, 3) != 0)) {
513	       linker_error_printf(prog,
514				   "%s shader output `%s' declared as "
515				   "type `%s', but %s shader input declared "
516				   "as type `%s'\n",
517				   producer_stage, output->name,
518				   output->type->name,
519				   consumer_stage, input->type->name);
520	       return false;
521	    }
522	 }
523
524	 /* Check that all of the qualifiers match between stages.
525	  */
526	 if (input->centroid != output->centroid) {
527	    linker_error_printf(prog,
528				"%s shader output `%s' %s centroid qualifier, "
529				"but %s shader input %s centroid qualifier\n",
530				producer_stage,
531				output->name,
532				(output->centroid) ? "has" : "lacks",
533				consumer_stage,
534				(input->centroid) ? "has" : "lacks");
535	    return false;
536	 }
537
538	 if (input->invariant != output->invariant) {
539	    linker_error_printf(prog,
540				"%s shader output `%s' %s invariant qualifier, "
541				"but %s shader input %s invariant qualifier\n",
542				producer_stage,
543				output->name,
544				(output->invariant) ? "has" : "lacks",
545				consumer_stage,
546				(input->invariant) ? "has" : "lacks");
547	    return false;
548	 }
549
550	 if (input->interpolation != output->interpolation) {
551	    linker_error_printf(prog,
552				"%s shader output `%s' specifies %s "
553				"interpolation qualifier, "
554				"but %s shader input specifies %s "
555				"interpolation qualifier\n",
556				producer_stage,
557				output->name,
558				output->interpolation_string(),
559				consumer_stage,
560				input->interpolation_string());
561	    return false;
562	 }
563      }
564   }
565
566   return true;
567}
568
569
570/**
571 * Populates a shaders symbol table with all global declarations
572 */
573static void
574populate_symbol_table(gl_shader *sh)
575{
576   sh->symbols = new(sh) glsl_symbol_table(sh);
577
578   foreach_list(node, sh->ir) {
579      ir_instruction *const inst = (ir_instruction *) node;
580      ir_variable *var;
581      ir_function *func;
582
583      if ((func = inst->as_function()) != NULL) {
584	 sh->symbols->add_function(func);
585      } else if ((var = inst->as_variable()) != NULL) {
586	 sh->symbols->add_variable(var);
587      }
588   }
589}
590
591
592/**
593 * Remap variables referenced in an instruction tree
594 *
595 * This is used when instruction trees are cloned from one shader and placed in
596 * another.  These trees will contain references to \c ir_variable nodes that
597 * do not exist in the target shader.  This function finds these \c ir_variable
598 * references and replaces the references with matching variables in the target
599 * shader.
600 *
601 * If there is no matching variable in the target shader, a clone of the
602 * \c ir_variable is made and added to the target shader.  The new variable is
603 * added to \b both the instruction stream and the symbol table.
604 *
605 * \param inst         IR tree that is to be processed.
606 * \param symbols      Symbol table containing global scope symbols in the
607 *                     linked shader.
608 * \param instructions Instruction stream where new variable declarations
609 *                     should be added.
610 */
611void
612remap_variables(ir_instruction *inst, struct gl_shader *target,
613		hash_table *temps)
614{
615   class remap_visitor : public ir_hierarchical_visitor {
616   public:
617	 remap_visitor(struct gl_shader *target,
618		    hash_table *temps)
619      {
620	 this->target = target;
621	 this->symbols = target->symbols;
622	 this->instructions = target->ir;
623	 this->temps = temps;
624      }
625
626      using ir_hierarchical_visitor::visit;
627      virtual ir_visitor_status visit(ir_dereference_variable *ir)
628      {
629	 if (ir->var->mode == ir_var_temporary) {
630	    ir_variable *var = (ir_variable *) hash_table_find(temps, ir->var);
631
632	    assert(var != NULL);
633	    ir->var = var;
634	    return visit_continue;
635	 }
636
637	 ir_variable *const existing =
638	    this->symbols->get_variable(ir->var->name);
639	 if (existing != NULL)
640	    ir->var = existing;
641	 else {
642	    ir_variable *copy = ir->var->clone(this->target, NULL);
643
644	    this->symbols->add_variable(copy);
645	    this->instructions->push_head(copy);
646	    ir->var = copy;
647	 }
648
649	 return visit_continue;
650      }
651
652   private:
653      struct gl_shader *target;
654      glsl_symbol_table *symbols;
655      exec_list *instructions;
656      hash_table *temps;
657   };
658
659   remap_visitor v(target, temps);
660
661   inst->accept(&v);
662}
663
664
665/**
666 * Move non-declarations from one instruction stream to another
667 *
668 * The intended usage pattern of this function is to pass the pointer to the
669 * head sentinel of a list (i.e., a pointer to the list cast to an \c exec_node
670 * pointer) for \c last and \c false for \c make_copies on the first
671 * call.  Successive calls pass the return value of the previous call for
672 * \c last and \c true for \c make_copies.
673 *
674 * \param instructions Source instruction stream
675 * \param last         Instruction after which new instructions should be
676 *                     inserted in the target instruction stream
677 * \param make_copies  Flag selecting whether instructions in \c instructions
678 *                     should be copied (via \c ir_instruction::clone) into the
679 *                     target list or moved.
680 *
681 * \return
682 * The new "last" instruction in the target instruction stream.  This pointer
683 * is suitable for use as the \c last parameter of a later call to this
684 * function.
685 */
686exec_node *
687move_non_declarations(exec_list *instructions, exec_node *last,
688		      bool make_copies, gl_shader *target)
689{
690   hash_table *temps = NULL;
691
692   if (make_copies)
693      temps = hash_table_ctor(0, hash_table_pointer_hash,
694			      hash_table_pointer_compare);
695
696   foreach_list_safe(node, instructions) {
697      ir_instruction *inst = (ir_instruction *) node;
698
699      if (inst->as_function())
700	 continue;
701
702      ir_variable *var = inst->as_variable();
703      if ((var != NULL) && (var->mode != ir_var_temporary))
704	 continue;
705
706      assert(inst->as_assignment()
707	     || ((var != NULL) && (var->mode == ir_var_temporary)));
708
709      if (make_copies) {
710	 inst = inst->clone(target, NULL);
711
712	 if (var != NULL)
713	    hash_table_insert(temps, inst, var);
714	 else
715	    remap_variables(inst, target, temps);
716      } else {
717	 inst->remove();
718      }
719
720      last->insert_after(inst);
721      last = inst;
722   }
723
724   if (make_copies)
725      hash_table_dtor(temps);
726
727   return last;
728}
729
730/**
731 * Get the function signature for main from a shader
732 */
733static ir_function_signature *
734get_main_function_signature(gl_shader *sh)
735{
736   ir_function *const f = sh->symbols->get_function("main");
737   if (f != NULL) {
738      exec_list void_parameters;
739
740      /* Look for the 'void main()' signature and ensure that it's defined.
741       * This keeps the linker from accidentally pick a shader that just
742       * contains a prototype for main.
743       *
744       * We don't have to check for multiple definitions of main (in multiple
745       * shaders) because that would have already been caught above.
746       */
747      ir_function_signature *sig = f->matching_signature(&void_parameters);
748      if ((sig != NULL) && sig->is_defined) {
749	 return sig;
750      }
751   }
752
753   return NULL;
754}
755
756
757/**
758 * Combine a group of shaders for a single stage to generate a linked shader
759 *
760 * \note
761 * If this function is supplied a single shader, it is cloned, and the new
762 * shader is returned.
763 */
764static struct gl_shader *
765link_intrastage_shaders(void *mem_ctx,
766			const struct gl_context *ctx,
767			struct gl_shader_program *prog,
768			struct gl_shader **shader_list,
769			unsigned num_shaders)
770{
771   /* Check that global variables defined in multiple shaders are consistent.
772    */
773   if (!cross_validate_globals(prog, shader_list, num_shaders, false))
774      return NULL;
775
776   /* Check that there is only a single definition of each function signature
777    * across all shaders.
778    */
779   for (unsigned i = 0; i < (num_shaders - 1); i++) {
780      foreach_list(node, shader_list[i]->ir) {
781	 ir_function *const f = ((ir_instruction *) node)->as_function();
782
783	 if (f == NULL)
784	    continue;
785
786	 for (unsigned j = i + 1; j < num_shaders; j++) {
787	    ir_function *const other =
788	       shader_list[j]->symbols->get_function(f->name);
789
790	    /* If the other shader has no function (and therefore no function
791	     * signatures) with the same name, skip to the next shader.
792	     */
793	    if (other == NULL)
794	       continue;
795
796	    foreach_iter (exec_list_iterator, iter, *f) {
797	       ir_function_signature *sig =
798		  (ir_function_signature *) iter.get();
799
800	       if (!sig->is_defined || sig->is_builtin)
801		  continue;
802
803	       ir_function_signature *other_sig =
804		  other->exact_matching_signature(& sig->parameters);
805
806	       if ((other_sig != NULL) && other_sig->is_defined
807		   && !other_sig->is_builtin) {
808		  linker_error_printf(prog,
809				      "function `%s' is multiply defined",
810				      f->name);
811		  return NULL;
812	       }
813	    }
814	 }
815      }
816   }
817
818   /* Find the shader that defines main, and make a clone of it.
819    *
820    * Starting with the clone, search for undefined references.  If one is
821    * found, find the shader that defines it.  Clone the reference and add
822    * it to the shader.  Repeat until there are no undefined references or
823    * until a reference cannot be resolved.
824    */
825   gl_shader *main = NULL;
826   for (unsigned i = 0; i < num_shaders; i++) {
827      if (get_main_function_signature(shader_list[i]) != NULL) {
828	 main = shader_list[i];
829	 break;
830      }
831   }
832
833   if (main == NULL) {
834      linker_error_printf(prog, "%s shader lacks `main'\n",
835			  (shader_list[0]->Type == GL_VERTEX_SHADER)
836			  ? "vertex" : "fragment");
837      return NULL;
838   }
839
840   gl_shader *linked = _mesa_new_shader(prog, 0, main->Type);
841   linked->ir = new(linked) exec_list;
842   clone_ir_list(mem_ctx, linked->ir, main->ir);
843
844   populate_symbol_table(linked);
845
846   /* The a pointer to the main function in the final linked shader (i.e., the
847    * copy of the original shader that contained the main function).
848    */
849   ir_function_signature *const main_sig = get_main_function_signature(linked);
850
851   /* Move any instructions other than variable declarations or function
852    * declarations into main.
853    */
854   exec_node *insertion_point =
855      move_non_declarations(linked->ir, (exec_node *) &main_sig->body, false,
856			    linked);
857
858   for (unsigned i = 0; i < num_shaders; i++) {
859      if (shader_list[i] == main)
860	 continue;
861
862      insertion_point = move_non_declarations(shader_list[i]->ir,
863					      insertion_point, true, linked);
864   }
865
866   /* Resolve initializers for global variables in the linked shader.
867    */
868   unsigned num_linking_shaders = num_shaders;
869   for (unsigned i = 0; i < num_shaders; i++)
870      num_linking_shaders += shader_list[i]->num_builtins_to_link;
871
872   gl_shader **linking_shaders =
873      (gl_shader **) calloc(num_linking_shaders, sizeof(gl_shader *));
874
875   memcpy(linking_shaders, shader_list,
876	  sizeof(linking_shaders[0]) * num_shaders);
877
878   unsigned idx = num_shaders;
879   for (unsigned i = 0; i < num_shaders; i++) {
880      memcpy(&linking_shaders[idx], shader_list[i]->builtins_to_link,
881	     sizeof(linking_shaders[0]) * shader_list[i]->num_builtins_to_link);
882      idx += shader_list[i]->num_builtins_to_link;
883   }
884
885   assert(idx == num_linking_shaders);
886
887   if (!link_function_calls(prog, linked, linking_shaders,
888			    num_linking_shaders)) {
889      _mesa_delete_shader(ctx, linked);
890      linked = NULL;
891   }
892
893   free(linking_shaders);
894
895   /* Make a pass over all global variables to ensure that arrays with
896    * unspecified sizes have a size specified.  The size is inferred from the
897    * max_array_access field.
898    */
899   if (linked != NULL) {
900      foreach_list(node, linked->ir) {
901	 ir_variable *const var = ((ir_instruction *) node)->as_variable();
902
903	 if (var == NULL)
904	    continue;
905
906	 if ((var->mode != ir_var_auto) && (var->mode != ir_var_temporary))
907	    continue;
908
909	 if (!var->type->is_array() || (var->type->length != 0))
910	    continue;
911
912	 const glsl_type *type =
913	    glsl_type::get_array_instance(var->type->fields.array,
914					  var->max_array_access);
915
916	 assert(type != NULL);
917	 var->type = type;
918      }
919   }
920
921   return linked;
922}
923
924
925struct uniform_node {
926   exec_node link;
927   struct gl_uniform *u;
928   unsigned slots;
929};
930
931/**
932 * Update the sizes of linked shader uniform arrays to the maximum
933 * array index used.
934 *
935 * From page 81 (page 95 of the PDF) of the OpenGL 2.1 spec:
936 *
937 *     If one or more elements of an array are active,
938 *     GetActiveUniform will return the name of the array in name,
939 *     subject to the restrictions listed above. The type of the array
940 *     is returned in type. The size parameter contains the highest
941 *     array element index used, plus one. The compiler or linker
942 *     determines the highest index used.  There will be only one
943 *     active uniform reported by the GL per uniform array.
944
945 */
946static void
947update_array_sizes(struct gl_shader_program *prog)
948{
949   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
950	 if (prog->_LinkedShaders[i] == NULL)
951	    continue;
952
953      foreach_list(node, prog->_LinkedShaders[i]->ir) {
954	 ir_variable *const var = ((ir_instruction *) node)->as_variable();
955
956	 if ((var == NULL) || (var->mode != ir_var_uniform &&
957			       var->mode != ir_var_in &&
958			       var->mode != ir_var_out) ||
959	     !var->type->is_array())
960	    continue;
961
962	 unsigned int size = var->max_array_access;
963	 for (unsigned j = 0; j < MESA_SHADER_TYPES; j++) {
964	       if (prog->_LinkedShaders[j] == NULL)
965		  continue;
966
967	    foreach_list(node2, prog->_LinkedShaders[j]->ir) {
968	       ir_variable *other_var = ((ir_instruction *) node2)->as_variable();
969	       if (!other_var)
970		  continue;
971
972	       if (strcmp(var->name, other_var->name) == 0 &&
973		   other_var->max_array_access > size) {
974		  size = other_var->max_array_access;
975	       }
976	    }
977	 }
978
979	 if (size + 1 != var->type->fields.array->length) {
980	    var->type = glsl_type::get_array_instance(var->type->fields.array,
981						      size + 1);
982	    /* FINISHME: We should update the types of array
983	     * dereferences of this variable now.
984	     */
985	 }
986      }
987   }
988}
989
990static int // returns location assigned
991add_uniform(void *mem_ctx, exec_list *uniforms, struct hash_table *ht,
992	    const char *name, const glsl_type *type, GLenum shader_type,
993	    unsigned *next_shader_pos, unsigned *total_uniforms, unsigned *next_sampler_pos, unsigned * samplers_used)
994{
995   int index = -1;
996   if (type->is_record()) {
997      for (unsigned int i = 0; i < type->length; i++) {
998         const glsl_type *field_type = type->fields.structure[i].type;
999         char *field_name = hieralloc_asprintf(mem_ctx, "%s.%s", name,
1000					    type->fields.structure[i].name);
1001
1002         int firstIndex = add_uniform(mem_ctx, uniforms, ht, field_name, field_type,
1003            shader_type, next_shader_pos, total_uniforms, next_sampler_pos, samplers_used);
1004         if (i == 0)
1005            index = firstIndex;
1006      }
1007   } else {
1008      uniform_node *n = (uniform_node *) hash_table_find(ht, name);
1009      unsigned int vec4_slots;
1010      const glsl_type *array_elem_type = NULL;
1011
1012      if (type->is_array()) {
1013         array_elem_type = type->fields.array;
1014         /* Array of structures. */
1015         if (array_elem_type->is_record()) {
1016            for (unsigned int i = 0; i < type->length; i++) {
1017               char *elem_name = hieralloc_asprintf(mem_ctx, "%s[%d]", name, i);
1018               int firstIndex = add_uniform(mem_ctx, uniforms, ht, elem_name, array_elem_type,
1019                  shader_type, next_shader_pos, total_uniforms, next_sampler_pos, samplers_used);
1020               if (i == 0)
1021                  index = firstIndex;
1022            }
1023            return index;
1024         }
1025      }
1026
1027      /* Fix the storage size of samplers at 1 vec4 each. Be sure to pad out
1028       * vectors to vec4 slots.
1029       */
1030      if (type->is_array()) {
1031         if (array_elem_type->is_sampler())
1032            vec4_slots = type->length;
1033         else
1034            vec4_slots = type->length * array_elem_type->matrix_columns;
1035      } else if (type->is_sampler())
1036         vec4_slots = 1;
1037      else
1038         vec4_slots = type->matrix_columns;
1039
1040      if (n == NULL) {
1041         n = (uniform_node *) calloc(1, sizeof(struct uniform_node));
1042         n->u = (gl_uniform *) calloc(1, sizeof(struct gl_uniform));
1043         n->slots = vec4_slots;
1044
1045         n->u->Name = strdup(name);
1046         n->u->Type = type;
1047         n->u->Pos = *next_shader_pos;
1048         (*total_uniforms)++;
1049
1050         if (type->is_sampler() || (array_elem_type && array_elem_type->is_sampler()))
1051         {
1052            n->u->Pos = *next_sampler_pos;
1053            *next_sampler_pos += vec4_slots;
1054         }
1055         else
1056            (*next_shader_pos) += vec4_slots;
1057         hash_table_insert(ht, n, name);
1058         uniforms->push_tail(&n->link);
1059      }
1060
1061      if (type->is_sampler() || (array_elem_type && array_elem_type->is_sampler()))
1062         (*samplers_used) |= 1 << n->u->Pos;
1063      index = n->u->Pos;
1064   }
1065   return index;
1066}
1067
1068void
1069assign_uniform_locations(struct gl_shader_program *prog)
1070{
1071   /* */
1072   exec_list uniforms;
1073   unsigned total_uniforms = 0;
1074   unsigned next_sampler_pos = 0; // all shaders in prog share same sampler location
1075   hash_table *ht = hash_table_ctor(32, hash_table_string_hash,
1076				    hash_table_string_compare);
1077   void *mem_ctx = hieralloc_new(prog);
1078
1079   unsigned next_position = 0; // also number of slots for uniforms
1080
1081   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
1082      if (prog->_LinkedShaders[i] == NULL)
1083	 continue;
1084
1085      prog->_LinkedShaders[i]->SamplersUsed = 0;
1086      foreach_list(node, prog->_LinkedShaders[i]->ir) {
1087	 ir_variable *const var = ((ir_instruction *) node)->as_variable();
1088
1089	 if ((var == NULL) || (var->mode != ir_var_uniform))
1090	    continue;
1091
1092	 if (strncmp(var->name, "gl_", 3) == 0) {
1093	    /* At the moment, we don't allocate uniform locations for
1094	     * builtin uniforms.  It's permitted by spec, and we'll
1095	     * likely switch to doing that at some point, but not yet.
1096	     */
1097	    continue;
1098	 }
1099
1100	 var->location = add_uniform(mem_ctx, &uniforms, ht, var->name, var->type,
1101		     prog->_LinkedShaders[i]->Type,
1102		     &next_position, &total_uniforms, &next_sampler_pos, &prog->_LinkedShaders[i]->SamplersUsed);
1103      }
1104   }
1105
1106   gl_uniform_list *ul = hieralloc_zero(prog, gl_uniform_list);
1107
1108   ul->Size = total_uniforms;
1109   ul->NumUniforms = total_uniforms;
1110   ul->Uniforms = (gl_uniform *)hieralloc_zero_size(ul, total_uniforms * sizeof(gl_uniform));
1111
1112   unsigned idx = 0;
1113   uniform_node *next;
1114   for (uniform_node *node = (uniform_node *) uniforms.head
1115	   ; node->link.next != NULL
1116	   ; node = next) {
1117      next = (uniform_node *) node->link.next;
1118
1119      node->link.remove();
1120      memcpy(&ul->Uniforms[idx], node->u, sizeof(gl_uniform));
1121      idx++;
1122
1123      free(node->u);
1124      free(node);
1125   }
1126
1127   hash_table_dtor(ht);
1128
1129   prog->Uniforms = ul;
1130   prog->Uniforms->Slots = next_position;
1131   prog->Uniforms->SamplerSlots = next_sampler_pos;
1132
1133   hieralloc_free(mem_ctx);
1134}
1135
1136
1137/**
1138 * Find a contiguous set of available bits in a bitmask
1139 *
1140 * \param used_mask     Bits representing used (1) and unused (0) locations
1141 * \param needed_count  Number of contiguous bits needed.
1142 *
1143 * \return
1144 * Base location of the available bits on success or -1 on failure.
1145 */
1146int
1147find_available_slots(unsigned used_mask, unsigned needed_count)
1148{
1149   unsigned needed_mask = (1 << needed_count) - 1;
1150   const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count;
1151
1152   /* The comparison to 32 is redundant, but without it GCC emits "warning:
1153    * cannot optimize possibly infinite loops" for the loop below.
1154    */
1155   if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32))
1156      return -1;
1157
1158   for (int i = 0; i <= max_bit_to_test; i++) {
1159      if ((needed_mask & ~used_mask) == needed_mask)
1160	 return i;
1161
1162      needed_mask <<= 1;
1163   }
1164
1165   return -1;
1166}
1167
1168
1169bool
1170assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index)
1171{
1172   /* Mark invalid attribute locations as being used.
1173    */
1174   unsigned used_locations = (max_attribute_index >= 32)
1175      ? ~0 : ~((1 << max_attribute_index) - 1);
1176
1177   gl_shader *const sh = prog->_LinkedShaders[0];
1178   assert(sh->Type == GL_VERTEX_SHADER);
1179   prog->VaryingSlots = 0;
1180   /* Operate in a total of four passes.
1181    *
1182    * 1. Invalidate the location assignments for all vertex shader inputs,
1183    *    except for explicit_location and glBindAttribLocation
1184    *
1185    * 2. Assign locations for inputs that have user-defined (via
1186    *    glBindVertexAttribLocation) locatoins.
1187    *
1188    * 3. Sort the attributes without assigned locations by number of slots
1189    *    required in decreasing order.  Fragmentation caused by attribute
1190    *    locations assigned by the application may prevent large attributes
1191    *    from having enough contiguous space.
1192    *
1193    * 4. Assign locations to any inputs without assigned locations.
1194    */
1195   if (prog->Attributes != NULL) {
1196      // declare attributes if they haven't been already by BindAttribLocation
1197      gl_program_parameter_list * attributes = prog->Attributes;
1198         foreach_list(node, sh->ir) {
1199            ir_variable *const var = ((ir_instruction *) node)->as_variable();
1200            if ((var == NULL) || (var->mode != ir_var_in))
1201               continue;
1202            if (_mesa_get_parameter(attributes, var->name) < 0)
1203                _mesa_add_parameter(attributes, var->name);
1204         }
1205
1206      for (unsigned i = 0; i < attributes->NumParameters; i++) {
1207         gl_program_parameter * param = attributes->Parameters + i;
1208         ir_variable * const var = sh->symbols->get_variable(param->Name);
1209         if (!var || ir_var_in != var->mode)
1210            continue;
1211
1212         if (param->BindLocation >= 0 && !var->explicit_location)
1213            var->location = param->Location = param->BindLocation;
1214         else if (var->explicit_location)
1215            param->Location = var->location;
1216         else
1217            var->location = -1;
1218         const unsigned slots = count_attribute_slots(var->type);
1219         param->Slots = slots;
1220         if (0 > var->location)
1221            continue;
1222 	 /* From page 61 of the OpenGL 4.0 spec:
1223	  *
1224	  *     "LinkProgram will fail if the attribute bindings assigned by
1225	  *     BindAttribLocation do not leave not enough space to assign a
1226	  *     location for an active matrix attribute or an active attribute
1227	  *     array, both of which require multiple contiguous generic
1228	  *     attributes."
1229	  *
1230	  * Previous versions of the spec contain similar language but omit the
1231	  * bit about attribute arrays.
1232	  *
1233	  * Page 61 of the OpenGL 4.0 spec also says:
1234	  *
1235	  *     "It is possible for an application to bind more than one
1236	  *     attribute name to the same location. This is referred to as
1237	  *     aliasing. This will only work if only one of the aliased
1238	  *     attributes is active in the executable program, or if no path
1239	  *     through the shader consumes more than one attribute of a set
1240	  *     of attributes aliased to the same location. A link error can
1241	  *     occur if the linker determines that every path through the
1242	  *     shader consumes multiple aliased attributes, but
1243	  *     implementations are not required to generate an error in this
1244	  *     case."
1245	  *
1246	  * These two paragraphs are either somewhat contradictory, or I don't
1247	  * fully understand one or both of them.
1248	  */
1249	 /* FINISHME: The code as currently written does not support attribute
1250	  * FINISHME: location aliasing (see comment above).
1251	  */
1252         const int attr = param->Location;
1253	 /* Mask representing the contiguous slots that will be used by this
1254	  * attribute.
1255	  */
1256	 const unsigned use_mask = (1 << slots) - 1;
1257	 /* Generate a link error if the set of bits requested for this
1258	  * attribute overlaps any previously allocated bits.
1259	  */
1260	 if ((use_mask << attr) & used_locations) {
1261	    linker_error_printf(prog,
1262				"insufficient contiguous attribute locations "
1263				"available for vertex shader input `%s'",
1264				var->name);
1265	    return false;
1266	 }
1267
1268	 used_locations |= (use_mask << attr);
1269      }
1270   }
1271
1272   /* Temporary storage for the set of attributes that need locations assigned.
1273    */
1274   struct temp_attr {
1275      unsigned slots;
1276      ir_variable *var;
1277
1278      /* Used below in the call to qsort. */
1279      static int compare(const void *a, const void *b)
1280      {
1281	 const temp_attr *const l = (const temp_attr *) a;
1282	 const temp_attr *const r = (const temp_attr *) b;
1283
1284	 /* Reversed because we want a descending order sort below. */
1285	 return r->slots - l->slots;
1286      }
1287   } to_assign[16];
1288
1289   unsigned num_attr = 0;
1290
1291   foreach_list(node, sh->ir) {
1292      ir_variable *const var = ((ir_instruction *) node)->as_variable();
1293      if ((var == NULL) || (var->mode != ir_var_in))
1294         continue;
1295      if (var->explicit_location) {
1296	 const unsigned slots = count_attribute_slots(var->type);
1297	 const unsigned use_mask = (1 << slots) - 1;
1298	 const int attr = var->location/* - VERT_ATTRIB_GENERIC0*/;
1299
1300	 if ((var->location >= (int)(max_attribute_index/* + VERT_ATTRIB_GENERIC0*/))
1301	     || (var->location < 0)) {
1302	    linker_error_printf(prog,
1303				"invalid explicit location %d specified for "
1304				"`%s'\n",
1305				(var->location < 0) ? var->location : attr,
1306				var->name);
1307	    return false;
1308	 } else if (var->location >= 0/*VERT_ATTRIB_GENERIC0*/) {
1309	    used_locations |= (use_mask << attr);
1310	 }
1311      }
1312
1313      /* The location was explicitly assigned, nothing to do here.
1314       */
1315      if (var->location != -1)
1316	 continue;
1317
1318      to_assign[num_attr].slots = count_attribute_slots(var->type);
1319      to_assign[num_attr].var = var;
1320      num_attr++;
1321   }
1322
1323   /* If all of the attributes were assigned locations by the application (or
1324    * are built-in attributes with fixed locations), return early.  This should
1325    * be the common case.
1326    */
1327   if (num_attr == 0)
1328      return true;
1329
1330   qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare);
1331
1332   /* VERT_ATTRIB_GENERIC0 is a psdueo-alias for VERT_ATTRIB_POS.  It can only
1333    * be explicitly assigned by via glBindAttribLocation.  Mark it as reserved
1334    * to prevent it from being automatically allocated below.
1335    */
1336   find_deref_visitor find("gl_Vertex");
1337   find.run(sh->ir);
1338   if (find.variable_found())
1339      used_locations |= (1 << 0);
1340
1341   for (unsigned i = 0; i < num_attr; i++) {
1342      /* Mask representing the contiguous slots that will be used by this
1343       * attribute.
1344       */
1345      const unsigned use_mask = (1 << to_assign[i].slots) - 1;
1346
1347      int location = find_available_slots(used_locations, to_assign[i].slots);
1348
1349      if (location < 0) {
1350	 linker_error_printf(prog,
1351			     "insufficient contiguous attribute locations "
1352			     "available for vertex shader input `%s'",
1353			     to_assign[i].var->name);
1354	 return false;
1355      }
1356
1357      to_assign[i].var->location = /*VERT_ATTRIB_GENERIC0 +*/ location;
1358      used_locations |= (use_mask << location);
1359      int paramIndex = _mesa_get_parameter(prog->Attributes, to_assign[i].var->name);
1360      if (0 <= paramIndex)
1361         prog->Attributes->Parameters[paramIndex].Location = location;
1362   }
1363
1364   return true;
1365}
1366
1367
1368/**
1369 * Demote shader inputs and outputs that are not used in other stages
1370 */
1371void
1372demote_shader_inputs_and_outputs(gl_shader *sh, enum ir_variable_mode mode)
1373{
1374   foreach_list(node, sh->ir) {
1375      ir_variable *const var = ((ir_instruction *) node)->as_variable();
1376
1377      if ((var == NULL) || (var->mode != int(mode)))
1378	 continue;
1379
1380      /* A shader 'in' or 'out' variable is only really an input or output if
1381       * its value is used by other shader stages.  This will cause the variable
1382       * to have a location assigned.
1383       */
1384      if (var->location == -1) {
1385	 var->mode = ir_var_auto;
1386      }
1387   }
1388}
1389
1390void
1391assign_varying_locations(struct gl_shader_program *prog,
1392			 gl_shader *producer, gl_shader *consumer)
1393{
1394   prog->VaryingSlots = 0;
1395   prog->UsesFragCoord = false;
1396   prog->UsesPointCoord = false;
1397   /* FINISHME: Set dynamically when geometry shader support is added. */
1398   unsigned output_index = offsetof(VertexOutput,varyings) / sizeof(Vector4); /*VERT_RESULT_VAR0*/;
1399   unsigned input_index = offsetof(VertexOutput,varyings) / sizeof(Vector4);
1400
1401   /* Operate in a total of three passes.
1402    *
1403    * 1. Assign locations for any matching inputs and outputs.
1404    *
1405    * 2. Mark output variables in the producer that do not have locations as
1406    *    not being outputs.  This lets the optimizer eliminate them.
1407    *
1408    * 3. Mark input variables in the consumer that do not have locations as
1409    *    not being inputs.  This lets the optimizer eliminate them.
1410    */
1411   foreach_list(node, producer->ir) {
1412      ir_variable *const var = ((ir_instruction *) node)->as_variable();
1413      if (!var || ir_var_out != var->mode)
1414         continue;
1415      if (!strcmp("gl_Position", var->name))
1416         var->location = offsetof(VertexOutput,position) / sizeof(Vector4);
1417      else if (!strcmp("gl_PointSize", var->name))
1418         var->location = offsetof(VertexOutput,pointSize) / sizeof(Vector4);
1419      else
1420         var->location = -1;
1421   }
1422   foreach_list(node, consumer->ir) {
1423      ir_variable *const var = ((ir_instruction *) node)->as_variable();
1424      if (!var || ir_var_in != var->mode)
1425         continue;
1426      if (!strcmp("gl_FragCoord", var->name))
1427      {
1428         var->location = offsetof(VertexOutput,position)/sizeof(Vector4);
1429         prog->UsesFragCoord = true;
1430      }
1431      else if (!strcmp("gl_FrontFacing", var->name))
1432         var->location = offsetof(VertexOutput,frontFacingPointCoord)/sizeof(Vector4);
1433      else if (!strcmp("gl_PointCoord", var->name))
1434      {
1435         var->location = offsetof(VertexOutput,frontFacingPointCoord)/sizeof(Vector4);
1436         prog->UsesPointCoord = true;
1437      }
1438      else
1439         var->location = -1;
1440   }
1441
1442   foreach_list(node, producer->ir) {
1443      ir_variable *const output_var = ((ir_instruction *) node)->as_variable();
1444
1445      if ((output_var == NULL) || (output_var->mode != ir_var_out))
1446         continue;
1447      int paramIndex = _mesa_get_parameter(prog->Varying, output_var->name);
1448      if (paramIndex < 0)
1449         paramIndex = _mesa_add_parameter(prog->Varying, output_var->name);
1450      gl_program_parameter * param = prog->Varying->Parameters + paramIndex;
1451      if (output_var->location != -1)
1452      {
1453         param->BindLocation = output_var->location;
1454         continue;
1455      }
1456
1457      ir_variable *const input_var =
1458	 consumer->symbols->get_variable(output_var->name);
1459
1460      if ((input_var == NULL) || (input_var->mode != ir_var_in))
1461	 continue;
1462
1463      assert(input_var->location == -1);
1464
1465      param->BindLocation = output_var->location = output_index;
1466      param->Location = input_var->location = input_index;
1467
1468      /* FINISHME: Support for "varying" records in GLSL 1.50. */
1469      assert(!output_var->type->is_record());
1470
1471      if (output_var->type->is_array()) {
1472	 const unsigned slots = output_var->type->length
1473	    * output_var->type->fields.array->matrix_columns;
1474
1475	 output_index += slots;
1476	 input_index += slots;
1477    prog->VaryingSlots += slots;
1478      } else {
1479	 const unsigned slots = output_var->type->matrix_columns;
1480
1481	 output_index += slots;
1482	 input_index += slots;
1483    prog->VaryingSlots += slots;
1484      }
1485   }
1486
1487   foreach_list(node, consumer->ir) {
1488      ir_variable *const var = ((ir_instruction *) node)->as_variable();
1489
1490      if ((var == NULL) || (var->mode != ir_var_in))
1491	 continue;
1492      int paramIndex = _mesa_get_parameter(prog->Varying, var->name);
1493      if (paramIndex < 0)
1494         paramIndex = _mesa_add_parameter(prog->Varying, var->name);
1495      gl_program_parameter * param = prog->Varying->Parameters + paramIndex;
1496
1497      if (var->location == -1) {
1498         if (prog->Version <= 120) {
1499	    /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
1500	     *
1501	     *     Only those varying variables used (i.e. read) in
1502	     *     the fragment shader executable must be written to
1503	     *     by the vertex shader executable; declaring
1504	     *     superfluous varying variables in a vertex shader is
1505	     *     permissible.
1506	     *
1507	     * We interpret this text as meaning that the VS must
1508	     * write the variable for the FS to read it.  See
1509	     * "glsl1-varying read but not written" in piglit.
1510	     */
1511
1512            linker_error_printf(prog, "fragment shader varying %s not written "
1513               "by vertex shader\n.", var->name);
1514            prog->LinkStatus = false;
1515         }
1516
1517	 /* An 'in' variable is only really a shader input if its
1518	  * value is written by the previous stage.
1519	  */
1520         var->mode = ir_var_auto;
1521      }
1522      else
1523         param->Location = var->location;
1524   }
1525}
1526
1527
1528void
1529link_shaders(const struct gl_context *ctx, struct gl_shader_program *prog)
1530{
1531   //void *mem_ctx = hieralloc_init("temporary linker context");
1532   void * mem_ctx = prog; // need linked & cloned ir to persist
1533
1534   prog->LinkStatus = false;
1535   prog->Validated = false;
1536   prog->_Used = false;
1537
1538   if (prog->InfoLog != NULL)
1539      hieralloc_free(prog->InfoLog);
1540
1541   prog->InfoLog = hieralloc_strdup(prog, "");
1542
1543   /* Separate the shaders into groups based on their type.
1544    */
1545   struct gl_shader **vert_shader_list;
1546   unsigned num_vert_shaders = 0;
1547   struct gl_shader **frag_shader_list;
1548   unsigned num_frag_shaders = 0;
1549
1550   vert_shader_list = (struct gl_shader **)
1551      calloc(2 * prog->NumShaders, sizeof(struct gl_shader *));
1552   frag_shader_list =  &vert_shader_list[prog->NumShaders];
1553
1554   unsigned min_version = UINT_MAX;
1555   unsigned max_version = 0;
1556   for (unsigned i = 0; i < prog->NumShaders; i++) {
1557      min_version = MIN2(min_version, prog->Shaders[i]->Version);
1558      max_version = MAX2(max_version, prog->Shaders[i]->Version);
1559
1560      switch (prog->Shaders[i]->Type) {
1561      case GL_VERTEX_SHADER:
1562	 vert_shader_list[num_vert_shaders] = prog->Shaders[i];
1563	 num_vert_shaders++;
1564	 break;
1565      case GL_FRAGMENT_SHADER:
1566	 frag_shader_list[num_frag_shaders] = prog->Shaders[i];
1567	 num_frag_shaders++;
1568	 break;
1569      case GL_GEOMETRY_SHADER:
1570	 /* FINISHME: Support geometry shaders. */
1571	 assert(prog->Shaders[i]->Type != GL_GEOMETRY_SHADER);
1572	 break;
1573      }
1574   }
1575
1576   /* Previous to GLSL version 1.30, different compilation units could mix and
1577    * match shading language versions.  With GLSL 1.30 and later, the versions
1578    * of all shaders must match.
1579    */
1580   assert(min_version >= 100);
1581   assert(max_version <= 130);
1582   if ((max_version >= 130 || min_version == 100)
1583       && min_version != max_version) {
1584      linker_error_printf(prog, "all shaders must use same shading "
1585			  "language version\n");
1586      goto done;
1587   }
1588
1589   prog->Version = max_version;
1590
1591   for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
1592      if (prog->_LinkedShaders[i] != NULL)
1593         _mesa_delete_shader(ctx, prog->_LinkedShaders[i]);
1594
1595      prog->_LinkedShaders[i] = NULL;
1596   }
1597
1598   /* Link all shaders for a particular stage and validate the result.
1599    */
1600   if (num_vert_shaders > 0) {
1601      gl_shader *const sh =
1602	 link_intrastage_shaders(mem_ctx, ctx, prog, vert_shader_list,
1603				 num_vert_shaders);
1604
1605      if (sh == NULL)
1606	 goto done;
1607
1608      if (!validate_vertex_shader_executable(prog, sh))
1609	 goto done;
1610
1611      _mesa_reference_shader(ctx, &prog->_LinkedShaders[MESA_SHADER_VERTEX],
1612			     sh);
1613   }
1614
1615   if (num_frag_shaders > 0) {
1616      gl_shader *const sh =
1617	 link_intrastage_shaders(mem_ctx, ctx, prog, frag_shader_list,
1618				 num_frag_shaders);
1619
1620      if (sh == NULL)
1621	 goto done;
1622
1623      if (!validate_fragment_shader_executable(prog, sh))
1624	 goto done;
1625
1626      _mesa_reference_shader(ctx, &prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1627			     sh);
1628   }
1629
1630   /* Here begins the inter-stage linking phase.  Some initial validation is
1631    * performed, then locations are assigned for uniforms, attributes, and
1632    * varyings.
1633    */
1634   if (cross_validate_uniforms(prog)) {
1635      unsigned prev;
1636
1637      for (prev = 0; prev < MESA_SHADER_TYPES; prev++) {
1638	 if (prog->_LinkedShaders[prev] != NULL)
1639	    break;
1640      }
1641
1642      /* Validate the inputs of each stage with the output of the preceeding
1643       * stage.
1644       */
1645      for (unsigned i = prev + 1; i < MESA_SHADER_TYPES; i++) {
1646	 if (prog->_LinkedShaders[i] == NULL)
1647	    continue;
1648
1649	 if (!cross_validate_outputs_to_inputs(prog,
1650					       prog->_LinkedShaders[prev],
1651					       prog->_LinkedShaders[i]))
1652	    goto done;
1653
1654	 prev = i;
1655      }
1656
1657      prog->LinkStatus = true;
1658   }
1659
1660   /* Do common optimization before assigning storage for attributes,
1661    * uniforms, and varyings.  Later optimization could possibly make
1662    * some of that unused.
1663    */
1664   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
1665      if (prog->_LinkedShaders[i] == NULL)
1666	 continue;
1667
1668      while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, 32))
1669	 ;
1670   }
1671
1672   update_array_sizes(prog);
1673
1674   assign_uniform_locations(prog);
1675
1676   if (prog->_LinkedShaders[MESA_SHADER_VERTEX] != NULL) {
1677      /* FINISHME: The value of the max_attribute_index parameter is
1678       * FINISHME: implementation dependent based on the value of
1679       * FINISHME: GL_MAX_VERTEX_ATTRIBS.  GL_MAX_VERTEX_ATTRIBS must be
1680       * FINISHME: at least 16, so hardcode 16 for now.
1681       */
1682      if (!assign_attribute_locations(prog, 16)) {
1683	 prog->LinkStatus = false;
1684	 goto done;
1685      }
1686      prog->AttributeSlots = 0;
1687      for (unsigned i = 0; i < prog->Attributes->NumParameters; i++)
1688      {
1689         const gl_program_parameter & param = prog->Attributes->Parameters[i];
1690         if (param.Location + param.Slots > prog->AttributeSlots)
1691            prog->AttributeSlots = param.Location + param.Slots;
1692      }
1693   }
1694
1695   unsigned prev;
1696   for (prev = 0; prev < MESA_SHADER_TYPES; prev++) {
1697      if (prog->_LinkedShaders[prev] != NULL)
1698	 break;
1699   }
1700
1701   for (unsigned i = prev + 1; i < MESA_SHADER_TYPES; i++) {
1702      if (prog->_LinkedShaders[i] == NULL)
1703	 continue;
1704
1705      assign_varying_locations(prog,
1706			       prog->_LinkedShaders[prev],
1707			       prog->_LinkedShaders[i]);
1708      prev = i;
1709   }
1710
1711   if (prog->_LinkedShaders[MESA_SHADER_VERTEX] != NULL) {
1712      demote_shader_inputs_and_outputs(prog->_LinkedShaders[MESA_SHADER_VERTEX],
1713				       ir_var_out);
1714   }
1715
1716   if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) {
1717      gl_shader *const sh = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
1718
1719      demote_shader_inputs_and_outputs(sh, ir_var_in);
1720      demote_shader_inputs_and_outputs(sh, ir_var_inout);
1721      demote_shader_inputs_and_outputs(sh, ir_var_out);
1722   }
1723
1724   if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] != NULL) {
1725      gl_shader *const sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
1726
1727      demote_shader_inputs_and_outputs(sh, ir_var_in);
1728
1729      foreach_list(node, sh->ir) {
1730         ir_variable *const var = ((ir_instruction *) node)->as_variable();
1731         if (!var || ir_var_out != var->mode)
1732            continue;
1733         if (!strcmp("gl_FragColor", var->name) || !strcmp("gl_FragData", var->name))
1734         {
1735            int paramIndex = _mesa_get_parameter(prog->Varying, var->name);
1736            if (0 > paramIndex)
1737               paramIndex = _mesa_add_parameter(prog->Varying, var->name);
1738            var->location= offsetof(VertexOutput,fragColor)/sizeof(Vector4);
1739            prog->Varying->Parameters[paramIndex].Location = var->location;
1740         }
1741         else
1742            assert(0);
1743      }
1744   }
1745
1746   //prog->InputOuputBase = malloc(1024 * 8);
1747   //memset(prog->InputOuputBase, 0xdd, 1024 * 8);
1748   prog->InputOuputBase = hieralloc_realloc(prog, prog->InputOuputBase, char,
1749      (prog->Uniforms->Slots + prog->Uniforms->SamplerSlots) * sizeof(float) * 4 + sizeof(VertexInput) + sizeof(VertexOutput) + 16);
1750   prog->ValuesVertexInput = (float (*)[4])((((unsigned long)prog->InputOuputBase) + 15L) & (~15L));
1751   prog->ValuesVertexOutput = (float (*)[4])((unsigned long)prog->ValuesVertexInput + sizeof(VertexInput));
1752   prog->ValuesUniform = (float (*)[4])((unsigned long)prog->ValuesVertexOutput + sizeof(VertexOutput));
1753
1754   // initialize uniforms to zero after link
1755   memset(prog->ValuesUniform, 0, sizeof(float) * 4 * (prog->Uniforms->Slots + prog->Uniforms->SamplerSlots));
1756
1757done:
1758   free(vert_shader_list);
1759
1760   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
1761      if (prog->_LinkedShaders[i] == NULL)
1762	 continue;
1763
1764      /* Retain any live IR, but trash the rest. */
1765      reparent_ir(prog->_LinkedShaders[i]->ir, prog->_LinkedShaders[i]->ir);
1766   }
1767
1768   //hieralloc_free(mem_ctx);
1769}
1770