linker.cpp revision 59c45e9e6cf80be149c6e5d94763e98312f49be2
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file linker.cpp 26 * GLSL linker implementation 27 * 28 * Given a set of shaders that are to be linked to generate a final program, 29 * there are three distinct stages. 30 * 31 * In the first stage shaders are partitioned into groups based on the shader 32 * type. All shaders of a particular type (e.g., vertex shaders) are linked 33 * together. 34 * 35 * - Undefined references in each shader are resolve to definitions in 36 * another shader. 37 * - Types and qualifiers of uniforms, outputs, and global variables defined 38 * in multiple shaders with the same name are verified to be the same. 39 * - Initializers for uniforms and global variables defined 40 * in multiple shaders with the same name are verified to be the same. 41 * 42 * The result, in the terminology of the GLSL spec, is a set of shader 43 * executables for each processing unit. 44 * 45 * After the first stage is complete, a series of semantic checks are performed 46 * on each of the shader executables. 47 * 48 * - Each shader executable must define a \c main function. 49 * - Each vertex shader executable must write to \c gl_Position. 50 * - Each fragment shader executable must write to either \c gl_FragData or 51 * \c gl_FragColor. 52 * 53 * In the final stage individual shader executables are linked to create a 54 * complete exectuable. 55 * 56 * - Types of uniforms defined in multiple shader stages with the same name 57 * are verified to be the same. 58 * - Initializers for uniforms defined in multiple shader stages with the 59 * same name are verified to be the same. 60 * - Types and qualifiers of outputs defined in one stage are verified to 61 * be the same as the types and qualifiers of inputs defined with the same 62 * name in a later stage. 63 * 64 * \author Ian Romanick <ian.d.romanick@intel.com> 65 */ 66#include <cstdlib> 67#include <cstdio> 68#include <cstdarg> 69#include <climits> 70 71extern "C" { 72#include <talloc.h> 73} 74 75#include "main/mtypes.h" 76#include "main/macros.h" 77#include "main/shaderobj.h" 78#include "glsl_symbol_table.h" 79#include "ir.h" 80#include "program.h" 81#include "hash_table.h" 82#include "linker.h" 83#include "ir_optimization.h" 84 85/** 86 * Visitor that determines whether or not a variable is ever written. 87 */ 88class find_assignment_visitor : public ir_hierarchical_visitor { 89public: 90 find_assignment_visitor(const char *name) 91 : name(name), found(false) 92 { 93 /* empty */ 94 } 95 96 virtual ir_visitor_status visit_enter(ir_assignment *ir) 97 { 98 ir_variable *const var = ir->lhs->variable_referenced(); 99 100 if (strcmp(name, var->name) == 0) { 101 found = true; 102 return visit_stop; 103 } 104 105 return visit_continue_with_parent; 106 } 107 108 bool variable_found() 109 { 110 return found; 111 } 112 113private: 114 const char *name; /**< Find writes to a variable with this name. */ 115 bool found; /**< Was a write to the variable found? */ 116}; 117 118 119void 120linker_error_printf(gl_shader_program *prog, const char *fmt, ...) 121{ 122 va_list ap; 123 124 prog->InfoLog = talloc_strdup_append(prog->InfoLog, "error: "); 125 va_start(ap, fmt); 126 prog->InfoLog = talloc_vasprintf_append(prog->InfoLog, fmt, ap); 127 va_end(ap); 128} 129 130 131void 132invalidate_variable_locations(gl_shader *sh, enum ir_variable_mode mode, 133 int generic_base) 134{ 135 foreach_list(node, sh->ir) { 136 ir_variable *const var = ((ir_instruction *) node)->as_variable(); 137 138 if ((var == NULL) || (var->mode != (unsigned) mode)) 139 continue; 140 141 /* Only assign locations for generic attributes / varyings / etc. 142 */ 143 if (var->location >= generic_base) 144 var->location = -1; 145 } 146} 147 148 149/** 150 * Determine the number of attribute slots required for a particular type 151 * 152 * This code is here because it implements the language rules of a specific 153 * GLSL version. Since it's a property of the language and not a property of 154 * types in general, it doesn't really belong in glsl_type. 155 */ 156unsigned 157count_attribute_slots(const glsl_type *t) 158{ 159 /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: 160 * 161 * "A scalar input counts the same amount against this limit as a vec4, 162 * so applications may want to consider packing groups of four 163 * unrelated float inputs together into a vector to better utilize the 164 * capabilities of the underlying hardware. A matrix input will use up 165 * multiple locations. The number of locations used will equal the 166 * number of columns in the matrix." 167 * 168 * The spec does not explicitly say how arrays are counted. However, it 169 * should be safe to assume the total number of slots consumed by an array 170 * is the number of entries in the array multiplied by the number of slots 171 * consumed by a single element of the array. 172 */ 173 174 if (t->is_array()) 175 return t->array_size() * count_attribute_slots(t->element_type()); 176 177 if (t->is_matrix()) 178 return t->matrix_columns; 179 180 return 1; 181} 182 183 184/** 185 * Verify that a vertex shader executable meets all semantic requirements 186 * 187 * \param shader Vertex shader executable to be verified 188 */ 189bool 190validate_vertex_shader_executable(struct gl_shader_program *prog, 191 struct gl_shader *shader) 192{ 193 if (shader == NULL) 194 return true; 195 196 find_assignment_visitor find("gl_Position"); 197 find.run(shader->ir); 198 if (!find.variable_found()) { 199 linker_error_printf(prog, 200 "vertex shader does not write to `gl_Position'\n"); 201 return false; 202 } 203 204 return true; 205} 206 207 208/** 209 * Verify that a fragment shader executable meets all semantic requirements 210 * 211 * \param shader Fragment shader executable to be verified 212 */ 213bool 214validate_fragment_shader_executable(struct gl_shader_program *prog, 215 struct gl_shader *shader) 216{ 217 if (shader == NULL) 218 return true; 219 220 find_assignment_visitor frag_color("gl_FragColor"); 221 find_assignment_visitor frag_data("gl_FragData"); 222 223 frag_color.run(shader->ir); 224 frag_data.run(shader->ir); 225 226 if (frag_color.variable_found() && frag_data.variable_found()) { 227 linker_error_printf(prog, "fragment shader writes to both " 228 "`gl_FragColor' and `gl_FragData'\n"); 229 return false; 230 } 231 232 return true; 233} 234 235 236/** 237 * Generate a string describing the mode of a variable 238 */ 239static const char * 240mode_string(const ir_variable *var) 241{ 242 switch (var->mode) { 243 case ir_var_auto: 244 return (var->read_only) ? "global constant" : "global variable"; 245 246 case ir_var_uniform: return "uniform"; 247 case ir_var_in: return "shader input"; 248 case ir_var_out: return "shader output"; 249 case ir_var_inout: return "shader inout"; 250 251 case ir_var_temporary: 252 default: 253 assert(!"Should not get here."); 254 return "invalid variable"; 255 } 256} 257 258 259/** 260 * Perform validation of global variables used across multiple shaders 261 */ 262bool 263cross_validate_globals(struct gl_shader_program *prog, 264 struct gl_shader **shader_list, 265 unsigned num_shaders, 266 bool uniforms_only) 267{ 268 /* Examine all of the uniforms in all of the shaders and cross validate 269 * them. 270 */ 271 glsl_symbol_table variables; 272 for (unsigned i = 0; i < num_shaders; i++) { 273 foreach_list(node, shader_list[i]->ir) { 274 ir_variable *const var = ((ir_instruction *) node)->as_variable(); 275 276 if (var == NULL) 277 continue; 278 279 if (uniforms_only && (var->mode != ir_var_uniform)) 280 continue; 281 282 /* Don't cross validate temporaries that are at global scope. These 283 * will eventually get pulled into the shaders 'main'. 284 */ 285 if (var->mode == ir_var_temporary) 286 continue; 287 288 /* If a global with this name has already been seen, verify that the 289 * new instance has the same type. In addition, if the globals have 290 * initializers, the values of the initializers must be the same. 291 */ 292 ir_variable *const existing = variables.get_variable(var->name); 293 if (existing != NULL) { 294 if (var->type != existing->type) { 295 linker_error_printf(prog, "%s `%s' declared as type " 296 "`%s' and type `%s'\n", 297 mode_string(var), 298 var->name, var->type->name, 299 existing->type->name); 300 return false; 301 } 302 303 /* FINISHME: Handle non-constant initializers. 304 */ 305 if (var->constant_value != NULL) { 306 if (existing->constant_value != NULL) { 307 if (!var->constant_value->has_value(existing->constant_value)) { 308 linker_error_printf(prog, "initializers for %s " 309 "`%s' have differing values\n", 310 mode_string(var), var->name); 311 return false; 312 } 313 } else 314 /* If the first-seen instance of a particular uniform did not 315 * have an initializer but a later instance does, copy the 316 * initializer to the version stored in the symbol table. 317 */ 318 /* FINISHME: This is wrong. The constant_value field should 319 * FINISHME: not be modified! Imagine a case where a shader 320 * FINISHME: without an initializer is linked in two different 321 * FINISHME: programs with shaders that have differing 322 * FINISHME: initializers. Linking with the first will 323 * FINISHME: modify the shader, and linking with the second 324 * FINISHME: will fail. 325 */ 326 existing->constant_value = var->constant_value->clone(NULL); 327 } 328 } else 329 variables.add_variable(var->name, var); 330 } 331 } 332 333 return true; 334} 335 336 337/** 338 * Perform validation of uniforms used across multiple shader stages 339 */ 340bool 341cross_validate_uniforms(struct gl_shader_program *prog) 342{ 343 return cross_validate_globals(prog, prog->_LinkedShaders, 344 prog->_NumLinkedShaders, true); 345} 346 347 348/** 349 * Validate that outputs from one stage match inputs of another 350 */ 351bool 352cross_validate_outputs_to_inputs(struct gl_shader_program *prog, 353 gl_shader *producer, gl_shader *consumer) 354{ 355 glsl_symbol_table parameters; 356 /* FINISHME: Figure these out dynamically. */ 357 const char *const producer_stage = "vertex"; 358 const char *const consumer_stage = "fragment"; 359 360 /* Find all shader outputs in the "producer" stage. 361 */ 362 foreach_list(node, producer->ir) { 363 ir_variable *const var = ((ir_instruction *) node)->as_variable(); 364 365 /* FINISHME: For geometry shaders, this should also look for inout 366 * FINISHME: variables. 367 */ 368 if ((var == NULL) || (var->mode != ir_var_out)) 369 continue; 370 371 parameters.add_variable(var->name, var); 372 } 373 374 375 /* Find all shader inputs in the "consumer" stage. Any variables that have 376 * matching outputs already in the symbol table must have the same type and 377 * qualifiers. 378 */ 379 foreach_list(node, consumer->ir) { 380 ir_variable *const input = ((ir_instruction *) node)->as_variable(); 381 382 /* FINISHME: For geometry shaders, this should also look for inout 383 * FINISHME: variables. 384 */ 385 if ((input == NULL) || (input->mode != ir_var_in)) 386 continue; 387 388 ir_variable *const output = parameters.get_variable(input->name); 389 if (output != NULL) { 390 /* Check that the types match between stages. 391 */ 392 if (input->type != output->type) { 393 linker_error_printf(prog, 394 "%s shader output `%s' delcared as " 395 "type `%s', but %s shader input declared " 396 "as type `%s'\n", 397 producer_stage, output->name, 398 output->type->name, 399 consumer_stage, input->type->name); 400 return false; 401 } 402 403 /* Check that all of the qualifiers match between stages. 404 */ 405 if (input->centroid != output->centroid) { 406 linker_error_printf(prog, 407 "%s shader output `%s' %s centroid qualifier, " 408 "but %s shader input %s centroid qualifier\n", 409 producer_stage, 410 output->name, 411 (output->centroid) ? "has" : "lacks", 412 consumer_stage, 413 (input->centroid) ? "has" : "lacks"); 414 return false; 415 } 416 417 if (input->invariant != output->invariant) { 418 linker_error_printf(prog, 419 "%s shader output `%s' %s invariant qualifier, " 420 "but %s shader input %s invariant qualifier\n", 421 producer_stage, 422 output->name, 423 (output->invariant) ? "has" : "lacks", 424 consumer_stage, 425 (input->invariant) ? "has" : "lacks"); 426 return false; 427 } 428 429 if (input->interpolation != output->interpolation) { 430 linker_error_printf(prog, 431 "%s shader output `%s' specifies %s " 432 "interpolation qualifier, " 433 "but %s shader input specifies %s " 434 "interpolation qualifier\n", 435 producer_stage, 436 output->name, 437 output->interpolation_string(), 438 consumer_stage, 439 input->interpolation_string()); 440 return false; 441 } 442 } 443 } 444 445 return true; 446} 447 448 449/** 450 * Populates a shaders symbol table with all global declarations 451 */ 452static void 453populate_symbol_table(gl_shader *sh) 454{ 455 sh->symbols = new(sh) glsl_symbol_table; 456 457 foreach_list(node, sh->ir) { 458 ir_instruction *const inst = (ir_instruction *) node; 459 ir_variable *var; 460 ir_function *func; 461 462 if ((func = inst->as_function()) != NULL) { 463 sh->symbols->add_function(func->name, func); 464 } else if ((var = inst->as_variable()) != NULL) { 465 sh->symbols->add_variable(var->name, var); 466 } 467 } 468} 469 470 471/** 472 * Remap variables referenced in an instruction tree 473 * 474 * This is used when instruction trees are cloned from one shader and placed in 475 * another. These trees will contain references to \c ir_variable nodes that 476 * do not exist in the target shader. This function finds these \c ir_variable 477 * references and replaces the references with matching variables in the target 478 * shader. 479 * 480 * If there is no matching variable in the target shader, a clone of the 481 * \c ir_variable is made and added to the target shader. The new variable is 482 * added to \b both the instruction stream and the symbol table. 483 * 484 * \param inst IR tree that is to be processed. 485 * \param symbols Symbol table containing global scope symbols in the 486 * linked shader. 487 * \param instructions Instruction stream where new variable declarations 488 * should be added. 489 */ 490void 491remap_variables(ir_instruction *inst, glsl_symbol_table *symbols, 492 exec_list *instructions, hash_table *temps) 493{ 494 class remap_visitor : public ir_hierarchical_visitor { 495 public: 496 remap_visitor(glsl_symbol_table *symbols, exec_list *instructions, 497 hash_table *temps) 498 { 499 this->symbols = symbols; 500 this->instructions = instructions; 501 this->temps = temps; 502 } 503 504 virtual ir_visitor_status visit(ir_dereference_variable *ir) 505 { 506 if (ir->var->mode == ir_var_temporary) { 507 ir_variable *var = (ir_variable *) hash_table_find(temps, ir->var); 508 509 assert(var != NULL); 510 ir->var = var; 511 return visit_continue; 512 } 513 514 ir_variable *const existing = 515 this->symbols->get_variable(ir->var->name); 516 if (existing != NULL) 517 ir->var = existing; 518 else { 519 ir_variable *copy = ir->var->clone(NULL); 520 521 this->symbols->add_variable(copy->name, copy); 522 this->instructions->push_head(copy); 523 ir->var = copy; 524 } 525 526 return visit_continue; 527 } 528 529 private: 530 glsl_symbol_table *symbols; 531 exec_list *instructions; 532 hash_table *temps; 533 }; 534 535 remap_visitor v(symbols, instructions, temps); 536 537 inst->accept(&v); 538} 539 540 541/** 542 * Move non-declarations from one instruction stream to another 543 * 544 * The intended usage pattern of this function is to pass the pointer to the 545 * head sentinal of a list (i.e., a pointer to the list cast to an \c exec_node 546 * pointer) for \c last and \c false for \c make_copies on the first 547 * call. Successive calls pass the return value of the previous call for 548 * \c last and \c true for \c make_copies. 549 * 550 * \param instructions Source instruction stream 551 * \param last Instruction after which new instructions should be 552 * inserted in the target instruction stream 553 * \param make_copies Flag selecting whether instructions in \c instructions 554 * should be copied (via \c ir_instruction::clone) into the 555 * target list or moved. 556 * 557 * \return 558 * The new "last" instruction in the target instruction stream. This pointer 559 * is suitable for use as the \c last parameter of a later call to this 560 * function. 561 */ 562exec_node * 563move_non_declarations(exec_list *instructions, exec_node *last, 564 bool make_copies, gl_shader *target) 565{ 566 hash_table *temps = NULL; 567 568 if (make_copies) 569 temps = hash_table_ctor(0, hash_table_pointer_hash, 570 hash_table_pointer_compare); 571 572 foreach_list_safe(node, instructions) { 573 ir_instruction *inst = (ir_instruction *) node; 574 575 if (inst->as_function()) 576 continue; 577 578 ir_variable *var = inst->as_variable(); 579 if ((var != NULL) && (var->mode != ir_var_temporary)) 580 continue; 581 582 assert(inst->as_assignment() 583 || ((var != NULL) && (var->mode == ir_var_temporary))); 584 585 if (make_copies) { 586 inst = inst->clone(NULL); 587 588 if (var != NULL) 589 hash_table_insert(temps, inst, var); 590 else 591 remap_variables(inst, target->symbols, target->ir, temps); 592 } else { 593 inst->remove(); 594 } 595 596 last->insert_after(inst); 597 last = inst; 598 } 599 600 if (make_copies) 601 hash_table_dtor(temps); 602 603 return last; 604} 605 606/** 607 * Get the function signature for main from a shader 608 */ 609static ir_function_signature * 610get_main_function_signature(gl_shader *sh) 611{ 612 ir_function *const f = sh->symbols->get_function("main"); 613 if (f != NULL) { 614 exec_list void_parameters; 615 616 /* Look for the 'void main()' signature and ensure that it's defined. 617 * This keeps the linker from accidentally pick a shader that just 618 * contains a prototype for main. 619 * 620 * We don't have to check for multiple definitions of main (in multiple 621 * shaders) because that would have already been caught above. 622 */ 623 ir_function_signature *sig = f->matching_signature(&void_parameters); 624 if ((sig != NULL) && sig->is_defined) { 625 return sig; 626 } 627 } 628 629 return NULL; 630} 631 632 633/** 634 * Combine a group of shaders for a single stage to generate a linked shader 635 * 636 * \note 637 * If this function is supplied a single shader, it is cloned, and the new 638 * shader is returned. 639 */ 640static struct gl_shader * 641link_intrastage_shaders(struct gl_shader_program *prog, 642 struct gl_shader **shader_list, 643 unsigned num_shaders) 644{ 645 /* Check that global variables defined in multiple shaders are consistent. 646 */ 647 if (!cross_validate_globals(prog, shader_list, num_shaders, false)) 648 return NULL; 649 650 /* Check that there is only a single definition of each function signature 651 * across all shaders. 652 */ 653 for (unsigned i = 0; i < (num_shaders - 1); i++) { 654 foreach_list(node, shader_list[i]->ir) { 655 ir_function *const f = ((ir_instruction *) node)->as_function(); 656 657 if (f == NULL) 658 continue; 659 660 for (unsigned j = i + 1; j < num_shaders; j++) { 661 ir_function *const other = 662 shader_list[j]->symbols->get_function(f->name); 663 664 /* If the other shader has no function (and therefore no function 665 * signatures) with the same name, skip to the next shader. 666 */ 667 if (other == NULL) 668 continue; 669 670 foreach_iter (exec_list_iterator, iter, *f) { 671 ir_function_signature *sig = 672 (ir_function_signature *) iter.get(); 673 674 if (!sig->is_defined || sig->is_built_in) 675 continue; 676 677 ir_function_signature *other_sig = 678 other->exact_matching_signature(& sig->parameters); 679 680 if ((other_sig != NULL) && other_sig->is_defined 681 && !other_sig->is_built_in) { 682 linker_error_printf(prog, 683 "function `%s' is multiply defined", 684 f->name); 685 return NULL; 686 } 687 } 688 } 689 } 690 } 691 692 /* Find the shader that defines main, and make a clone of it. 693 * 694 * Starting with the clone, search for undefined references. If one is 695 * found, find the shader that defines it. Clone the reference and add 696 * it to the shader. Repeat until there are no undefined references or 697 * until a reference cannot be resolved. 698 */ 699 gl_shader *main = NULL; 700 for (unsigned i = 0; i < num_shaders; i++) { 701 if (get_main_function_signature(shader_list[i]) != NULL) { 702 main = shader_list[i]; 703 break; 704 } 705 } 706 707 if (main == NULL) { 708 linker_error_printf(prog, "%s shader lacks `main'\n", 709 (shader_list[0]->Type == GL_VERTEX_SHADER) 710 ? "vertex" : "fragment"); 711 return NULL; 712 } 713 714 gl_shader *const linked = _mesa_new_shader(NULL, 0, main->Type); 715 linked->ir = new(linked) exec_list; 716 clone_ir_list(linked->ir, main->ir); 717 718 populate_symbol_table(linked); 719 720 /* The a pointer to the main function in the final linked shader (i.e., the 721 * copy of the original shader that contained the main function). 722 */ 723 ir_function_signature *const main_sig = get_main_function_signature(linked); 724 725 /* Move any instructions other than variable declarations or function 726 * declarations into main. 727 */ 728 exec_node *insertion_point = 729 move_non_declarations(linked->ir, (exec_node *) &main_sig->body, false, 730 linked); 731 732 for (unsigned i = 0; i < num_shaders; i++) { 733 if (shader_list[i] == main) 734 continue; 735 736 insertion_point = move_non_declarations(shader_list[i]->ir, 737 insertion_point, true, linked); 738 } 739 740 /* Resolve initializers for global variables in the linked shader. 741 */ 742 unsigned num_linking_shaders = num_shaders; 743 for (unsigned i = 0; i < num_shaders; i++) 744 num_linking_shaders += shader_list[i]->num_builtins_to_link; 745 746 gl_shader **linking_shaders = 747 (gl_shader **) calloc(num_linking_shaders, sizeof(gl_shader *)); 748 749 memcpy(linking_shaders, shader_list, 750 sizeof(linking_shaders[0]) * num_shaders); 751 752 unsigned idx = num_shaders; 753 for (unsigned i = 0; i < num_shaders; i++) { 754 memcpy(&linking_shaders[idx], shader_list[i]->builtins_to_link, 755 sizeof(linking_shaders[0]) * shader_list[i]->num_builtins_to_link); 756 idx += shader_list[i]->num_builtins_to_link; 757 } 758 759 assert(idx == num_linking_shaders); 760 761 link_function_calls(prog, linked, linking_shaders, num_linking_shaders); 762 763 free(linking_shaders); 764 765 return linked; 766} 767 768 769struct uniform_node { 770 exec_node link; 771 struct gl_uniform *u; 772 unsigned slots; 773}; 774 775void 776assign_uniform_locations(struct gl_shader_program *prog) 777{ 778 /* */ 779 exec_list uniforms; 780 unsigned total_uniforms = 0; 781 hash_table *ht = hash_table_ctor(32, hash_table_string_hash, 782 hash_table_string_compare); 783 784 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 785 unsigned next_position = 0; 786 787 foreach_list(node, prog->_LinkedShaders[i]->ir) { 788 ir_variable *const var = ((ir_instruction *) node)->as_variable(); 789 790 if ((var == NULL) || (var->mode != ir_var_uniform)) 791 continue; 792 793 const unsigned vec4_slots = (var->component_slots() + 3) / 4; 794 assert(vec4_slots != 0); 795 796 uniform_node *n = (uniform_node *) hash_table_find(ht, var->name); 797 if (n == NULL) { 798 n = (uniform_node *) calloc(1, sizeof(struct uniform_node)); 799 n->u = (gl_uniform *) calloc(vec4_slots, sizeof(struct gl_uniform)); 800 n->slots = vec4_slots; 801 802 n->u[0].Name = strdup(var->name); 803 for (unsigned j = 1; j < vec4_slots; j++) 804 n->u[j].Name = n->u[0].Name; 805 806 hash_table_insert(ht, n, n->u[0].Name); 807 uniforms.push_tail(& n->link); 808 total_uniforms += vec4_slots; 809 } 810 811 if (var->constant_value != NULL) 812 for (unsigned j = 0; j < vec4_slots; j++) 813 n->u[j].Initialized = true; 814 815 var->location = next_position; 816 817 for (unsigned j = 0; j < vec4_slots; j++) { 818 switch (prog->_LinkedShaders[i]->Type) { 819 case GL_VERTEX_SHADER: 820 n->u[j].VertPos = next_position; 821 break; 822 case GL_FRAGMENT_SHADER: 823 n->u[j].FragPos = next_position; 824 break; 825 case GL_GEOMETRY_SHADER: 826 /* FINISHME: Support geometry shaders. */ 827 assert(prog->_LinkedShaders[i]->Type != GL_GEOMETRY_SHADER); 828 break; 829 } 830 831 next_position++; 832 } 833 } 834 } 835 836 gl_uniform_list *ul = (gl_uniform_list *) 837 calloc(1, sizeof(gl_uniform_list)); 838 839 ul->Size = total_uniforms; 840 ul->NumUniforms = total_uniforms; 841 ul->Uniforms = (gl_uniform *) calloc(total_uniforms, sizeof(gl_uniform)); 842 843 unsigned idx = 0; 844 uniform_node *next; 845 for (uniform_node *node = (uniform_node *) uniforms.head 846 ; node->link.next != NULL 847 ; node = next) { 848 next = (uniform_node *) node->link.next; 849 850 node->link.remove(); 851 memcpy(&ul->Uniforms[idx], node->u, sizeof(gl_uniform) * node->slots); 852 idx += node->slots; 853 854 free(node->u); 855 free(node); 856 } 857 858 hash_table_dtor(ht); 859 860 prog->Uniforms = ul; 861} 862 863 864/** 865 * Find a contiguous set of available bits in a bitmask 866 * 867 * \param used_mask Bits representing used (1) and unused (0) locations 868 * \param needed_count Number of contiguous bits needed. 869 * 870 * \return 871 * Base location of the available bits on success or -1 on failure. 872 */ 873int 874find_available_slots(unsigned used_mask, unsigned needed_count) 875{ 876 unsigned needed_mask = (1 << needed_count) - 1; 877 const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count; 878 879 /* The comparison to 32 is redundant, but without it GCC emits "warning: 880 * cannot optimize possibly infinite loops" for the loop below. 881 */ 882 if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32)) 883 return -1; 884 885 for (int i = 0; i <= max_bit_to_test; i++) { 886 if ((needed_mask & ~used_mask) == needed_mask) 887 return i; 888 889 needed_mask <<= 1; 890 } 891 892 return -1; 893} 894 895 896bool 897assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index) 898{ 899 /* Mark invalid attribute locations as being used. 900 */ 901 unsigned used_locations = (max_attribute_index >= 32) 902 ? ~0 : ~((1 << max_attribute_index) - 1); 903 904 gl_shader *const sh = prog->_LinkedShaders[0]; 905 assert(sh->Type == GL_VERTEX_SHADER); 906 907 /* Operate in a total of four passes. 908 * 909 * 1. Invalidate the location assignments for all vertex shader inputs. 910 * 911 * 2. Assign locations for inputs that have user-defined (via 912 * glBindVertexAttribLocation) locatoins. 913 * 914 * 3. Sort the attributes without assigned locations by number of slots 915 * required in decreasing order. Fragmentation caused by attribute 916 * locations assigned by the application may prevent large attributes 917 * from having enough contiguous space. 918 * 919 * 4. Assign locations to any inputs without assigned locations. 920 */ 921 922 invalidate_variable_locations(sh, ir_var_in, VERT_ATTRIB_GENERIC0); 923 924 if (prog->Attributes != NULL) { 925 for (unsigned i = 0; i < prog->Attributes->NumParameters; i++) { 926 ir_variable *const var = 927 sh->symbols->get_variable(prog->Attributes->Parameters[i].Name); 928 929 /* Note: attributes that occupy multiple slots, such as arrays or 930 * matrices, may appear in the attrib array multiple times. 931 */ 932 if ((var == NULL) || (var->location != -1)) 933 continue; 934 935 /* From page 61 of the OpenGL 4.0 spec: 936 * 937 * "LinkProgram will fail if the attribute bindings assigned by 938 * BindAttribLocation do not leave not enough space to assign a 939 * location for an active matrix attribute or an active attribute 940 * array, both of which require multiple contiguous generic 941 * attributes." 942 * 943 * Previous versions of the spec contain similar language but omit the 944 * bit about attribute arrays. 945 * 946 * Page 61 of the OpenGL 4.0 spec also says: 947 * 948 * "It is possible for an application to bind more than one 949 * attribute name to the same location. This is referred to as 950 * aliasing. This will only work if only one of the aliased 951 * attributes is active in the executable program, or if no path 952 * through the shader consumes more than one attribute of a set 953 * of attributes aliased to the same location. A link error can 954 * occur if the linker determines that every path through the 955 * shader consumes multiple aliased attributes, but 956 * implementations are not required to generate an error in this 957 * case." 958 * 959 * These two paragraphs are either somewhat contradictory, or I don't 960 * fully understand one or both of them. 961 */ 962 /* FINISHME: The code as currently written does not support attribute 963 * FINISHME: location aliasing (see comment above). 964 */ 965 const int attr = prog->Attributes->Parameters[i].StateIndexes[0]; 966 const unsigned slots = count_attribute_slots(var->type); 967 968 /* Mask representing the contiguous slots that will be used by this 969 * attribute. 970 */ 971 const unsigned use_mask = (1 << slots) - 1; 972 973 /* Generate a link error if the set of bits requested for this 974 * attribute overlaps any previously allocated bits. 975 */ 976 if ((~(use_mask << attr) & used_locations) != used_locations) { 977 linker_error_printf(prog, 978 "insufficient contiguous attribute locations " 979 "available for vertex shader input `%s'", 980 var->name); 981 return false; 982 } 983 984 var->location = VERT_ATTRIB_GENERIC0 + attr; 985 used_locations |= (use_mask << attr); 986 } 987 } 988 989 /* Temporary storage for the set of attributes that need locations assigned. 990 */ 991 struct temp_attr { 992 unsigned slots; 993 ir_variable *var; 994 995 /* Used below in the call to qsort. */ 996 static int compare(const void *a, const void *b) 997 { 998 const temp_attr *const l = (const temp_attr *) a; 999 const temp_attr *const r = (const temp_attr *) b; 1000 1001 /* Reversed because we want a descending order sort below. */ 1002 return r->slots - l->slots; 1003 } 1004 } to_assign[16]; 1005 1006 unsigned num_attr = 0; 1007 1008 foreach_list(node, sh->ir) { 1009 ir_variable *const var = ((ir_instruction *) node)->as_variable(); 1010 1011 if ((var == NULL) || (var->mode != ir_var_in)) 1012 continue; 1013 1014 /* The location was explicitly assigned, nothing to do here. 1015 */ 1016 if (var->location != -1) 1017 continue; 1018 1019 to_assign[num_attr].slots = count_attribute_slots(var->type); 1020 to_assign[num_attr].var = var; 1021 num_attr++; 1022 } 1023 1024 /* If all of the attributes were assigned locations by the application (or 1025 * are built-in attributes with fixed locations), return early. This should 1026 * be the common case. 1027 */ 1028 if (num_attr == 0) 1029 return true; 1030 1031 qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare); 1032 1033 /* VERT_ATTRIB_GENERIC0 is a psdueo-alias for VERT_ATTRIB_POS. It can only 1034 * be explicitly assigned by via glBindAttribLocation. Mark it as reserved 1035 * to prevent it from being automatically allocated below. 1036 */ 1037 used_locations |= (1 << 0); 1038 1039 for (unsigned i = 0; i < num_attr; i++) { 1040 /* Mask representing the contiguous slots that will be used by this 1041 * attribute. 1042 */ 1043 const unsigned use_mask = (1 << to_assign[i].slots) - 1; 1044 1045 int location = find_available_slots(used_locations, to_assign[i].slots); 1046 1047 if (location < 0) { 1048 linker_error_printf(prog, 1049 "insufficient contiguous attribute locations " 1050 "available for vertex shader input `%s'", 1051 to_assign[i].var->name); 1052 return false; 1053 } 1054 1055 to_assign[i].var->location = VERT_ATTRIB_GENERIC0 + location; 1056 used_locations |= (use_mask << location); 1057 } 1058 1059 return true; 1060} 1061 1062 1063void 1064assign_varying_locations(gl_shader *producer, gl_shader *consumer) 1065{ 1066 /* FINISHME: Set dynamically when geometry shader support is added. */ 1067 unsigned output_index = VERT_RESULT_VAR0; 1068 unsigned input_index = FRAG_ATTRIB_VAR0; 1069 1070 /* Operate in a total of three passes. 1071 * 1072 * 1. Assign locations for any matching inputs and outputs. 1073 * 1074 * 2. Mark output variables in the producer that do not have locations as 1075 * not being outputs. This lets the optimizer eliminate them. 1076 * 1077 * 3. Mark input variables in the consumer that do not have locations as 1078 * not being inputs. This lets the optimizer eliminate them. 1079 */ 1080 1081 invalidate_variable_locations(producer, ir_var_out, VERT_RESULT_VAR0); 1082 invalidate_variable_locations(consumer, ir_var_in, FRAG_ATTRIB_VAR0); 1083 1084 foreach_list(node, producer->ir) { 1085 ir_variable *const output_var = ((ir_instruction *) node)->as_variable(); 1086 1087 if ((output_var == NULL) || (output_var->mode != ir_var_out) 1088 || (output_var->location != -1)) 1089 continue; 1090 1091 ir_variable *const input_var = 1092 consumer->symbols->get_variable(output_var->name); 1093 1094 if ((input_var == NULL) || (input_var->mode != ir_var_in)) 1095 continue; 1096 1097 assert(input_var->location == -1); 1098 1099 /* FINISHME: Location assignment will need some changes when arrays, 1100 * FINISHME: matrices, and structures are allowed as shader inputs / 1101 * FINISHME: outputs. 1102 */ 1103 output_var->location = output_index; 1104 input_var->location = input_index; 1105 1106 output_index++; 1107 input_index++; 1108 } 1109 1110 foreach_list(node, producer->ir) { 1111 ir_variable *const var = ((ir_instruction *) node)->as_variable(); 1112 1113 if ((var == NULL) || (var->mode != ir_var_out)) 1114 continue; 1115 1116 /* An 'out' variable is only really a shader output if its value is read 1117 * by the following stage. 1118 */ 1119 if (var->location == -1) { 1120 var->shader_out = false; 1121 var->mode = ir_var_auto; 1122 } 1123 } 1124 1125 foreach_list(node, consumer->ir) { 1126 ir_variable *const var = ((ir_instruction *) node)->as_variable(); 1127 1128 if ((var == NULL) || (var->mode != ir_var_in)) 1129 continue; 1130 1131 /* An 'in' variable is only really a shader input if its value is written 1132 * by the previous stage. 1133 */ 1134 var->shader_in = (var->location != -1); 1135 } 1136} 1137 1138 1139void 1140link_shaders(struct gl_shader_program *prog) 1141{ 1142 prog->LinkStatus = false; 1143 prog->Validated = false; 1144 prog->_Used = false; 1145 1146 if (prog->InfoLog != NULL) 1147 talloc_free(prog->InfoLog); 1148 1149 prog->InfoLog = talloc_strdup(NULL, ""); 1150 1151 /* Separate the shaders into groups based on their type. 1152 */ 1153 struct gl_shader **vert_shader_list; 1154 unsigned num_vert_shaders = 0; 1155 struct gl_shader **frag_shader_list; 1156 unsigned num_frag_shaders = 0; 1157 1158 vert_shader_list = (struct gl_shader **) 1159 calloc(2 * prog->NumShaders, sizeof(struct gl_shader *)); 1160 frag_shader_list = &vert_shader_list[prog->NumShaders]; 1161 1162 unsigned min_version = UINT_MAX; 1163 unsigned max_version = 0; 1164 for (unsigned i = 0; i < prog->NumShaders; i++) { 1165 min_version = MIN2(min_version, prog->Shaders[i]->Version); 1166 max_version = MAX2(max_version, prog->Shaders[i]->Version); 1167 1168 switch (prog->Shaders[i]->Type) { 1169 case GL_VERTEX_SHADER: 1170 vert_shader_list[num_vert_shaders] = prog->Shaders[i]; 1171 num_vert_shaders++; 1172 break; 1173 case GL_FRAGMENT_SHADER: 1174 frag_shader_list[num_frag_shaders] = prog->Shaders[i]; 1175 num_frag_shaders++; 1176 break; 1177 case GL_GEOMETRY_SHADER: 1178 /* FINISHME: Support geometry shaders. */ 1179 assert(prog->Shaders[i]->Type != GL_GEOMETRY_SHADER); 1180 break; 1181 } 1182 } 1183 1184 /* Previous to GLSL version 1.30, different compilation units could mix and 1185 * match shading language versions. With GLSL 1.30 and later, the versions 1186 * of all shaders must match. 1187 */ 1188 assert(min_version >= 110); 1189 assert(max_version <= 130); 1190 if ((max_version >= 130) && (min_version != max_version)) { 1191 linker_error_printf(prog, "all shaders must use same shading " 1192 "language version\n"); 1193 goto done; 1194 } 1195 1196 prog->Version = max_version; 1197 1198 /* Link all shaders for a particular stage and validate the result. 1199 */ 1200 prog->_NumLinkedShaders = 0; 1201 if (num_vert_shaders > 0) { 1202 gl_shader *const sh = 1203 link_intrastage_shaders(prog, vert_shader_list, num_vert_shaders); 1204 1205 if (sh == NULL) 1206 goto done; 1207 1208 if (!validate_vertex_shader_executable(prog, sh)) 1209 goto done; 1210 1211 prog->_LinkedShaders[prog->_NumLinkedShaders] = sh; 1212 prog->_NumLinkedShaders++; 1213 } 1214 1215 if (num_frag_shaders > 0) { 1216 gl_shader *const sh = 1217 link_intrastage_shaders(prog, frag_shader_list, num_frag_shaders); 1218 1219 if (sh == NULL) 1220 goto done; 1221 1222 if (!validate_fragment_shader_executable(prog, sh)) 1223 goto done; 1224 1225 prog->_LinkedShaders[prog->_NumLinkedShaders] = sh; 1226 prog->_NumLinkedShaders++; 1227 } 1228 1229 /* Here begins the inter-stage linking phase. Some initial validation is 1230 * performed, then locations are assigned for uniforms, attributes, and 1231 * varyings. 1232 */ 1233 if (cross_validate_uniforms(prog)) { 1234 /* Validate the inputs of each stage with the output of the preceeding 1235 * stage. 1236 */ 1237 for (unsigned i = 1; i < prog->_NumLinkedShaders; i++) { 1238 if (!cross_validate_outputs_to_inputs(prog, 1239 prog->_LinkedShaders[i - 1], 1240 prog->_LinkedShaders[i])) 1241 goto done; 1242 } 1243 1244 prog->LinkStatus = true; 1245 } 1246 1247 /* FINISHME: Perform whole-program optimization here. */ 1248 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 1249 /* Optimization passes */ 1250 bool progress; 1251 exec_list *ir = prog->_LinkedShaders[i]->ir; 1252 1253 /* Lowering */ 1254 do_mat_op_to_vec(ir); 1255 do_mod_to_fract(ir); 1256 do_div_to_mul_rcp(ir); 1257 1258 do { 1259 progress = false; 1260 1261 progress = do_function_inlining(ir) || progress; 1262 progress = do_if_simplification(ir) || progress; 1263 progress = do_copy_propagation(ir) || progress; 1264 progress = do_dead_code_local(ir) || progress; 1265 progress = do_dead_code(ir) || progress; 1266 progress = do_constant_variable_unlinked(ir) || progress; 1267 progress = do_constant_folding(ir) || progress; 1268 progress = do_if_return(ir) || progress; 1269#if 0 1270 if (ctx->Shader.EmitNoIfs) 1271 progress = do_if_to_cond_assign(ir) || progress; 1272#endif 1273 1274 progress = do_vec_index_to_swizzle(ir) || progress; 1275 /* Do this one after the previous to let the easier pass handle 1276 * constant vector indexing. 1277 */ 1278 progress = do_vec_index_to_cond_assign(ir) || progress; 1279 1280 progress = do_swizzle_swizzle(ir) || progress; 1281 } while (progress); 1282 } 1283 1284 assign_uniform_locations(prog); 1285 1286 if (prog->_LinkedShaders[0]->Type == GL_VERTEX_SHADER) 1287 /* FINISHME: The value of the max_attribute_index parameter is 1288 * FINISHME: implementation dependent based on the value of 1289 * FINISHME: GL_MAX_VERTEX_ATTRIBS. GL_MAX_VERTEX_ATTRIBS must be 1290 * FINISHME: at least 16, so hardcode 16 for now. 1291 */ 1292 if (!assign_attribute_locations(prog, 16)) 1293 goto done; 1294 1295 for (unsigned i = 1; i < prog->_NumLinkedShaders; i++) 1296 assign_varying_locations(prog->_LinkedShaders[i - 1], 1297 prog->_LinkedShaders[i]); 1298 1299 /* FINISHME: Assign fragment shader output locations. */ 1300 1301done: 1302 free(vert_shader_list); 1303} 1304