1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 * DEALINGS IN THE SOFTWARE. 24 */ 25 26/** 27 * \file ir_to_mesa.cpp 28 * 29 * Translate GLSL IR to Mesa's gl_program representation. 30 */ 31 32#include <stdio.h> 33#include "main/compiler.h" 34#include "main/macros.h" 35#include "main/mtypes.h" 36#include "main/shaderapi.h" 37#include "main/shaderobj.h" 38#include "main/uniforms.h" 39#include "compiler/glsl/ast.h" 40#include "compiler/glsl/ir.h" 41#include "compiler/glsl/ir_expression_flattening.h" 42#include "compiler/glsl/ir_visitor.h" 43#include "compiler/glsl/ir_optimization.h" 44#include "compiler/glsl/ir_uniform.h" 45#include "compiler/glsl/glsl_parser_extras.h" 46#include "compiler/glsl_types.h" 47#include "compiler/glsl/linker.h" 48#include "compiler/glsl/program.h" 49#include "program/prog_instruction.h" 50#include "program/prog_optimize.h" 51#include "program/prog_print.h" 52#include "program/program.h" 53#include "program/prog_parameter.h" 54#include "util/string_to_uint_map.h" 55 56 57static int swizzle_for_size(int size); 58 59namespace { 60 61class src_reg; 62class dst_reg; 63 64/** 65 * This struct is a corresponding struct to Mesa prog_src_register, with 66 * wider fields. 67 */ 68class src_reg { 69public: 70 src_reg(gl_register_file file, int index, const glsl_type *type) 71 { 72 this->file = file; 73 this->index = index; 74 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 75 this->swizzle = swizzle_for_size(type->vector_elements); 76 else 77 this->swizzle = SWIZZLE_XYZW; 78 this->negate = 0; 79 this->reladdr = NULL; 80 } 81 82 src_reg() 83 { 84 this->file = PROGRAM_UNDEFINED; 85 this->index = 0; 86 this->swizzle = 0; 87 this->negate = 0; 88 this->reladdr = NULL; 89 } 90 91 explicit src_reg(dst_reg reg); 92 93 gl_register_file file; /**< PROGRAM_* from Mesa */ 94 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 95 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 96 int negate; /**< NEGATE_XYZW mask from mesa */ 97 /** Register index should be offset by the integer in this reg. */ 98 src_reg *reladdr; 99}; 100 101class dst_reg { 102public: 103 dst_reg(gl_register_file file, int writemask) 104 { 105 this->file = file; 106 this->index = 0; 107 this->writemask = writemask; 108 this->reladdr = NULL; 109 } 110 111 dst_reg() 112 { 113 this->file = PROGRAM_UNDEFINED; 114 this->index = 0; 115 this->writemask = 0; 116 this->reladdr = NULL; 117 } 118 119 explicit dst_reg(src_reg reg); 120 121 gl_register_file file; /**< PROGRAM_* from Mesa */ 122 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 123 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 124 /** Register index should be offset by the integer in this reg. */ 125 src_reg *reladdr; 126}; 127 128} /* anonymous namespace */ 129 130src_reg::src_reg(dst_reg reg) 131{ 132 this->file = reg.file; 133 this->index = reg.index; 134 this->swizzle = SWIZZLE_XYZW; 135 this->negate = 0; 136 this->reladdr = reg.reladdr; 137} 138 139dst_reg::dst_reg(src_reg reg) 140{ 141 this->file = reg.file; 142 this->index = reg.index; 143 this->writemask = WRITEMASK_XYZW; 144 this->reladdr = reg.reladdr; 145} 146 147namespace { 148 149class ir_to_mesa_instruction : public exec_node { 150public: 151 DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction) 152 153 enum prog_opcode op; 154 dst_reg dst; 155 src_reg src[3]; 156 /** Pointer to the ir source this tree came from for debugging */ 157 ir_instruction *ir; 158 bool saturate; 159 int sampler; /**< sampler index */ 160 int tex_target; /**< One of TEXTURE_*_INDEX */ 161 GLboolean tex_shadow; 162}; 163 164class variable_storage : public exec_node { 165public: 166 variable_storage(ir_variable *var, gl_register_file file, int index) 167 : file(file), index(index), var(var) 168 { 169 /* empty */ 170 } 171 172 gl_register_file file; 173 int index; 174 ir_variable *var; /* variable that maps to this, if any */ 175}; 176 177class function_entry : public exec_node { 178public: 179 ir_function_signature *sig; 180 181 /** 182 * identifier of this function signature used by the program. 183 * 184 * At the point that Mesa instructions for function calls are 185 * generated, we don't know the address of the first instruction of 186 * the function body. So we make the BranchTarget that is called a 187 * small integer and rewrite them during set_branchtargets(). 188 */ 189 int sig_id; 190 191 /** 192 * Pointer to first instruction of the function body. 193 * 194 * Set during function body emits after main() is processed. 195 */ 196 ir_to_mesa_instruction *bgn_inst; 197 198 /** 199 * Index of the first instruction of the function body in actual 200 * Mesa IR. 201 * 202 * Set after convertion from ir_to_mesa_instruction to prog_instruction. 203 */ 204 int inst; 205 206 /** Storage for the return value. */ 207 src_reg return_reg; 208}; 209 210class ir_to_mesa_visitor : public ir_visitor { 211public: 212 ir_to_mesa_visitor(); 213 ~ir_to_mesa_visitor(); 214 215 function_entry *current_function; 216 217 struct gl_context *ctx; 218 struct gl_program *prog; 219 struct gl_shader_program *shader_program; 220 struct gl_shader_compiler_options *options; 221 222 int next_temp; 223 224 variable_storage *find_variable_storage(const ir_variable *var); 225 226 src_reg get_temp(const glsl_type *type); 227 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); 228 229 src_reg src_reg_for_float(float val); 230 231 /** 232 * \name Visit methods 233 * 234 * As typical for the visitor pattern, there must be one \c visit method for 235 * each concrete subclass of \c ir_instruction. Virtual base classes within 236 * the hierarchy should not have \c visit methods. 237 */ 238 /*@{*/ 239 virtual void visit(ir_variable *); 240 virtual void visit(ir_loop *); 241 virtual void visit(ir_loop_jump *); 242 virtual void visit(ir_function_signature *); 243 virtual void visit(ir_function *); 244 virtual void visit(ir_expression *); 245 virtual void visit(ir_swizzle *); 246 virtual void visit(ir_dereference_variable *); 247 virtual void visit(ir_dereference_array *); 248 virtual void visit(ir_dereference_record *); 249 virtual void visit(ir_assignment *); 250 virtual void visit(ir_constant *); 251 virtual void visit(ir_call *); 252 virtual void visit(ir_return *); 253 virtual void visit(ir_discard *); 254 virtual void visit(ir_texture *); 255 virtual void visit(ir_if *); 256 virtual void visit(ir_emit_vertex *); 257 virtual void visit(ir_end_primitive *); 258 virtual void visit(ir_barrier *); 259 /*@}*/ 260 261 src_reg result; 262 263 /** List of variable_storage */ 264 exec_list variables; 265 266 /** List of function_entry */ 267 exec_list function_signatures; 268 int next_signature_id; 269 270 /** List of ir_to_mesa_instruction */ 271 exec_list instructions; 272 273 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op); 274 275 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 276 dst_reg dst, src_reg src0); 277 278 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 279 dst_reg dst, src_reg src0, src_reg src1); 280 281 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 282 dst_reg dst, 283 src_reg src0, src_reg src1, src_reg src2); 284 285 /** 286 * Emit the correct dot-product instruction for the type of arguments 287 */ 288 ir_to_mesa_instruction * emit_dp(ir_instruction *ir, 289 dst_reg dst, 290 src_reg src0, 291 src_reg src1, 292 unsigned elements); 293 294 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 295 dst_reg dst, src_reg src0); 296 297 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 298 dst_reg dst, src_reg src0, src_reg src1); 299 300 bool try_emit_mad(ir_expression *ir, 301 int mul_operand); 302 bool try_emit_mad_for_and_not(ir_expression *ir, 303 int mul_operand); 304 305 void emit_swz(ir_expression *ir); 306 307 void emit_equality_comparison(ir_expression *ir, enum prog_opcode op, 308 dst_reg dst, 309 const src_reg &src0, const src_reg &src1); 310 311 inline void emit_sne(ir_expression *ir, dst_reg dst, 312 const src_reg &src0, const src_reg &src1) 313 { 314 emit_equality_comparison(ir, OPCODE_SLT, dst, src0, src1); 315 } 316 317 inline void emit_seq(ir_expression *ir, dst_reg dst, 318 const src_reg &src0, const src_reg &src1) 319 { 320 emit_equality_comparison(ir, OPCODE_SGE, dst, src0, src1); 321 } 322 323 bool process_move_condition(ir_rvalue *ir); 324 325 void copy_propagate(void); 326 327 void *mem_ctx; 328}; 329 330} /* anonymous namespace */ 331 332static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL); 333 334static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); 335 336static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); 337 338static int 339swizzle_for_size(int size) 340{ 341 static const int size_swizzles[4] = { 342 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 343 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 344 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 345 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 346 }; 347 348 assert((size >= 1) && (size <= 4)); 349 return size_swizzles[size - 1]; 350} 351 352ir_to_mesa_instruction * 353ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 354 dst_reg dst, 355 src_reg src0, src_reg src1, src_reg src2) 356{ 357 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction(); 358 int num_reladdr = 0; 359 360 /* If we have to do relative addressing, we want to load the ARL 361 * reg directly for one of the regs, and preload the other reladdr 362 * sources into temps. 363 */ 364 num_reladdr += dst.reladdr != NULL; 365 num_reladdr += src0.reladdr != NULL; 366 num_reladdr += src1.reladdr != NULL; 367 num_reladdr += src2.reladdr != NULL; 368 369 reladdr_to_temp(ir, &src2, &num_reladdr); 370 reladdr_to_temp(ir, &src1, &num_reladdr); 371 reladdr_to_temp(ir, &src0, &num_reladdr); 372 373 if (dst.reladdr) { 374 emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); 375 num_reladdr--; 376 } 377 assert(num_reladdr == 0); 378 379 inst->op = op; 380 inst->dst = dst; 381 inst->src[0] = src0; 382 inst->src[1] = src1; 383 inst->src[2] = src2; 384 inst->ir = ir; 385 386 this->instructions.push_tail(inst); 387 388 return inst; 389} 390 391 392ir_to_mesa_instruction * 393ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 394 dst_reg dst, src_reg src0, src_reg src1) 395{ 396 return emit(ir, op, dst, src0, src1, undef_src); 397} 398 399ir_to_mesa_instruction * 400ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 401 dst_reg dst, src_reg src0) 402{ 403 assert(dst.writemask != 0); 404 return emit(ir, op, dst, src0, undef_src, undef_src); 405} 406 407ir_to_mesa_instruction * 408ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) 409{ 410 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 411} 412 413ir_to_mesa_instruction * 414ir_to_mesa_visitor::emit_dp(ir_instruction *ir, 415 dst_reg dst, src_reg src0, src_reg src1, 416 unsigned elements) 417{ 418 static const enum prog_opcode dot_opcodes[] = { 419 OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 420 }; 421 422 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 423} 424 425/** 426 * Emits Mesa scalar opcodes to produce unique answers across channels. 427 * 428 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X 429 * channel determines the result across all channels. So to do a vec4 430 * of this operation, we want to emit a scalar per source channel used 431 * to produce dest channels. 432 */ 433void 434ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 435 dst_reg dst, 436 src_reg orig_src0, src_reg orig_src1) 437{ 438 int i, j; 439 int done_mask = ~dst.writemask; 440 441 /* Mesa RCP is a scalar operation splatting results to all channels, 442 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 443 * dst channels. 444 */ 445 for (i = 0; i < 4; i++) { 446 GLuint this_mask = (1 << i); 447 ir_to_mesa_instruction *inst; 448 src_reg src0 = orig_src0; 449 src_reg src1 = orig_src1; 450 451 if (done_mask & this_mask) 452 continue; 453 454 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 455 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 456 for (j = i + 1; j < 4; j++) { 457 /* If there is another enabled component in the destination that is 458 * derived from the same inputs, generate its value on this pass as 459 * well. 460 */ 461 if (!(done_mask & (1 << j)) && 462 GET_SWZ(src0.swizzle, j) == src0_swiz && 463 GET_SWZ(src1.swizzle, j) == src1_swiz) { 464 this_mask |= (1 << j); 465 } 466 } 467 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 468 src0_swiz, src0_swiz); 469 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 470 src1_swiz, src1_swiz); 471 472 inst = emit(ir, op, dst, src0, src1); 473 inst->dst.writemask = this_mask; 474 done_mask |= this_mask; 475 } 476} 477 478void 479ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 480 dst_reg dst, src_reg src0) 481{ 482 src_reg undef = undef_src; 483 484 undef.swizzle = SWIZZLE_XXXX; 485 486 emit_scalar(ir, op, dst, src0, undef); 487} 488 489src_reg 490ir_to_mesa_visitor::src_reg_for_float(float val) 491{ 492 src_reg src(PROGRAM_CONSTANT, -1, NULL); 493 494 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 495 (const gl_constant_value *)&val, 1, &src.swizzle); 496 497 return src; 498} 499 500static int 501type_size(const struct glsl_type *type) 502{ 503 unsigned int i; 504 int size; 505 506 switch (type->base_type) { 507 case GLSL_TYPE_UINT: 508 case GLSL_TYPE_INT: 509 case GLSL_TYPE_FLOAT: 510 case GLSL_TYPE_BOOL: 511 if (type->is_matrix()) { 512 return type->matrix_columns; 513 } else { 514 /* Regardless of size of vector, it gets a vec4. This is bad 515 * packing for things like floats, but otherwise arrays become a 516 * mess. Hopefully a later pass over the code can pack scalars 517 * down if appropriate. 518 */ 519 return 1; 520 } 521 break; 522 case GLSL_TYPE_DOUBLE: 523 if (type->is_matrix()) { 524 if (type->vector_elements > 2) 525 return type->matrix_columns * 2; 526 else 527 return type->matrix_columns; 528 } else { 529 if (type->vector_elements > 2) 530 return 2; 531 else 532 return 1; 533 } 534 break; 535 case GLSL_TYPE_ARRAY: 536 assert(type->length > 0); 537 return type_size(type->fields.array) * type->length; 538 case GLSL_TYPE_STRUCT: 539 size = 0; 540 for (i = 0; i < type->length; i++) { 541 size += type_size(type->fields.structure[i].type); 542 } 543 return size; 544 case GLSL_TYPE_SAMPLER: 545 case GLSL_TYPE_IMAGE: 546 case GLSL_TYPE_SUBROUTINE: 547 /* Samplers take up one slot in UNIFORMS[], but they're baked in 548 * at link time. 549 */ 550 return 1; 551 case GLSL_TYPE_ATOMIC_UINT: 552 case GLSL_TYPE_VOID: 553 case GLSL_TYPE_ERROR: 554 case GLSL_TYPE_INTERFACE: 555 case GLSL_TYPE_FUNCTION: 556 assert(!"Invalid type in type_size"); 557 break; 558 } 559 560 return 0; 561} 562 563/** 564 * In the initial pass of codegen, we assign temporary numbers to 565 * intermediate results. (not SSA -- variable assignments will reuse 566 * storage). Actual register allocation for the Mesa VM occurs in a 567 * pass over the Mesa IR later. 568 */ 569src_reg 570ir_to_mesa_visitor::get_temp(const glsl_type *type) 571{ 572 src_reg src; 573 574 src.file = PROGRAM_TEMPORARY; 575 src.index = next_temp; 576 src.reladdr = NULL; 577 next_temp += type_size(type); 578 579 if (type->is_array() || type->is_record()) { 580 src.swizzle = SWIZZLE_NOOP; 581 } else { 582 src.swizzle = swizzle_for_size(type->vector_elements); 583 } 584 src.negate = 0; 585 586 return src; 587} 588 589variable_storage * 590ir_to_mesa_visitor::find_variable_storage(const ir_variable *var) 591{ 592 foreach_in_list(variable_storage, entry, &this->variables) { 593 if (entry->var == var) 594 return entry; 595 } 596 597 return NULL; 598} 599 600void 601ir_to_mesa_visitor::visit(ir_variable *ir) 602{ 603 if (strcmp(ir->name, "gl_FragCoord") == 0) { 604 this->prog->OriginUpperLeft = ir->data.origin_upper_left; 605 this->prog->PixelCenterInteger = ir->data.pixel_center_integer; 606 } 607 608 if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 609 unsigned int i; 610 const ir_state_slot *const slots = ir->get_state_slots(); 611 assert(slots != NULL); 612 613 /* Check if this statevar's setup in the STATE file exactly 614 * matches how we'll want to reference it as a 615 * struct/array/whatever. If not, then we need to move it into 616 * temporary storage and hope that it'll get copy-propagated 617 * out. 618 */ 619 for (i = 0; i < ir->get_num_state_slots(); i++) { 620 if (slots[i].swizzle != SWIZZLE_XYZW) { 621 break; 622 } 623 } 624 625 variable_storage *storage; 626 dst_reg dst; 627 if (i == ir->get_num_state_slots()) { 628 /* We'll set the index later. */ 629 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 630 this->variables.push_tail(storage); 631 632 dst = undef_dst; 633 } else { 634 /* The variable_storage constructor allocates slots based on the size 635 * of the type. However, this had better match the number of state 636 * elements that we're going to copy into the new temporary. 637 */ 638 assert((int) ir->get_num_state_slots() == type_size(ir->type)); 639 640 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 641 this->next_temp); 642 this->variables.push_tail(storage); 643 this->next_temp += type_size(ir->type); 644 645 dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); 646 } 647 648 649 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { 650 int index = _mesa_add_state_reference(this->prog->Parameters, 651 (gl_state_index *)slots[i].tokens); 652 653 if (storage->file == PROGRAM_STATE_VAR) { 654 if (storage->index == -1) { 655 storage->index = index; 656 } else { 657 assert(index == storage->index + (int)i); 658 } 659 } else { 660 src_reg src(PROGRAM_STATE_VAR, index, NULL); 661 src.swizzle = slots[i].swizzle; 662 emit(ir, OPCODE_MOV, dst, src); 663 /* even a float takes up a whole vec4 reg in a struct/array. */ 664 dst.index++; 665 } 666 } 667 668 if (storage->file == PROGRAM_TEMPORARY && 669 dst.index != storage->index + (int) ir->get_num_state_slots()) { 670 linker_error(this->shader_program, 671 "failed to load builtin uniform `%s' " 672 "(%d/%d regs loaded)\n", 673 ir->name, dst.index - storage->index, 674 type_size(ir->type)); 675 } 676 } 677} 678 679void 680ir_to_mesa_visitor::visit(ir_loop *ir) 681{ 682 emit(NULL, OPCODE_BGNLOOP); 683 684 visit_exec_list(&ir->body_instructions, this); 685 686 emit(NULL, OPCODE_ENDLOOP); 687} 688 689void 690ir_to_mesa_visitor::visit(ir_loop_jump *ir) 691{ 692 switch (ir->mode) { 693 case ir_loop_jump::jump_break: 694 emit(NULL, OPCODE_BRK); 695 break; 696 case ir_loop_jump::jump_continue: 697 emit(NULL, OPCODE_CONT); 698 break; 699 } 700} 701 702 703void 704ir_to_mesa_visitor::visit(ir_function_signature *ir) 705{ 706 assert(0); 707 (void)ir; 708} 709 710void 711ir_to_mesa_visitor::visit(ir_function *ir) 712{ 713 /* Ignore function bodies other than main() -- we shouldn't see calls to 714 * them since they should all be inlined before we get to ir_to_mesa. 715 */ 716 if (strcmp(ir->name, "main") == 0) { 717 const ir_function_signature *sig; 718 exec_list empty; 719 720 sig = ir->matching_signature(NULL, &empty, false); 721 722 assert(sig); 723 724 foreach_in_list(ir_instruction, ir, &sig->body) { 725 ir->accept(this); 726 } 727 } 728} 729 730bool 731ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 732{ 733 int nonmul_operand = 1 - mul_operand; 734 src_reg a, b, c; 735 736 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 737 if (!expr || expr->operation != ir_binop_mul) 738 return false; 739 740 expr->operands[0]->accept(this); 741 a = this->result; 742 expr->operands[1]->accept(this); 743 b = this->result; 744 ir->operands[nonmul_operand]->accept(this); 745 c = this->result; 746 747 this->result = get_temp(ir->type); 748 emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c); 749 750 return true; 751} 752 753/** 754 * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) 755 * 756 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 757 * implemented using multiplication, and logical-or is implemented using 758 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 759 * As result, the logical expression (a & !b) can be rewritten as: 760 * 761 * - a * !b 762 * - a * (1 - b) 763 * - (a * 1) - (a * b) 764 * - a + -(a * b) 765 * - a + (a * -b) 766 * 767 * This final expression can be implemented as a single MAD(a, -b, a) 768 * instruction. 769 */ 770bool 771ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 772{ 773 const int other_operand = 1 - try_operand; 774 src_reg a, b; 775 776 ir_expression *expr = ir->operands[try_operand]->as_expression(); 777 if (!expr || expr->operation != ir_unop_logic_not) 778 return false; 779 780 ir->operands[other_operand]->accept(this); 781 a = this->result; 782 expr->operands[0]->accept(this); 783 b = this->result; 784 785 b.negate = ~b.negate; 786 787 this->result = get_temp(ir->type); 788 emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); 789 790 return true; 791} 792 793void 794ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, 795 src_reg *reg, int *num_reladdr) 796{ 797 if (!reg->reladdr) 798 return; 799 800 emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); 801 802 if (*num_reladdr != 1) { 803 src_reg temp = get_temp(glsl_type::vec4_type); 804 805 emit(ir, OPCODE_MOV, dst_reg(temp), *reg); 806 *reg = temp; 807 } 808 809 (*num_reladdr)--; 810} 811 812void 813ir_to_mesa_visitor::emit_swz(ir_expression *ir) 814{ 815 /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. 816 * This means that each of the operands is either an immediate value of -1, 817 * 0, or 1, or is a component from one source register (possibly with 818 * negation). 819 */ 820 uint8_t components[4] = { 0 }; 821 bool negate[4] = { false }; 822 ir_variable *var = NULL; 823 824 for (unsigned i = 0; i < ir->type->vector_elements; i++) { 825 ir_rvalue *op = ir->operands[i]; 826 827 assert(op->type->is_scalar()); 828 829 while (op != NULL) { 830 switch (op->ir_type) { 831 case ir_type_constant: { 832 833 assert(op->type->is_scalar()); 834 835 const ir_constant *const c = op->as_constant(); 836 if (c->is_one()) { 837 components[i] = SWIZZLE_ONE; 838 } else if (c->is_zero()) { 839 components[i] = SWIZZLE_ZERO; 840 } else if (c->is_negative_one()) { 841 components[i] = SWIZZLE_ONE; 842 negate[i] = true; 843 } else { 844 assert(!"SWZ constant must be 0.0 or 1.0."); 845 } 846 847 op = NULL; 848 break; 849 } 850 851 case ir_type_dereference_variable: { 852 ir_dereference_variable *const deref = 853 (ir_dereference_variable *) op; 854 855 assert((var == NULL) || (deref->var == var)); 856 components[i] = SWIZZLE_X; 857 var = deref->var; 858 op = NULL; 859 break; 860 } 861 862 case ir_type_expression: { 863 ir_expression *const expr = (ir_expression *) op; 864 865 assert(expr->operation == ir_unop_neg); 866 negate[i] = true; 867 868 op = expr->operands[0]; 869 break; 870 } 871 872 case ir_type_swizzle: { 873 ir_swizzle *const swiz = (ir_swizzle *) op; 874 875 components[i] = swiz->mask.x; 876 op = swiz->val; 877 break; 878 } 879 880 default: 881 assert(!"Should not get here."); 882 return; 883 } 884 } 885 } 886 887 assert(var != NULL); 888 889 ir_dereference_variable *const deref = 890 new(mem_ctx) ir_dereference_variable(var); 891 892 this->result.file = PROGRAM_UNDEFINED; 893 deref->accept(this); 894 if (this->result.file == PROGRAM_UNDEFINED) { 895 printf("Failed to get tree for expression operand:\n"); 896 deref->print(); 897 printf("\n"); 898 exit(1); 899 } 900 901 src_reg src; 902 903 src = this->result; 904 src.swizzle = MAKE_SWIZZLE4(components[0], 905 components[1], 906 components[2], 907 components[3]); 908 src.negate = ((unsigned(negate[0]) << 0) 909 | (unsigned(negate[1]) << 1) 910 | (unsigned(negate[2]) << 2) 911 | (unsigned(negate[3]) << 3)); 912 913 /* Storage for our result. Ideally for an assignment we'd be using the 914 * actual storage for the result here, instead. 915 */ 916 const src_reg result_src = get_temp(ir->type); 917 dst_reg result_dst = dst_reg(result_src); 918 919 /* Limit writes to the channels that will be used by result_src later. 920 * This does limit this temp's use as a temporary for multi-instruction 921 * sequences. 922 */ 923 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 924 925 emit(ir, OPCODE_SWZ, result_dst, src); 926 this->result = result_src; 927} 928 929void 930ir_to_mesa_visitor::emit_equality_comparison(ir_expression *ir, 931 enum prog_opcode op, 932 dst_reg dst, 933 const src_reg &src0, 934 const src_reg &src1) 935{ 936 src_reg difference; 937 src_reg abs_difference = get_temp(glsl_type::vec4_type); 938 const src_reg zero = src_reg_for_float(0.0); 939 940 /* x == y is equivalent to -abs(x-y) >= 0. Since all of the code that 941 * consumes the generated IR is pretty dumb, take special care when one 942 * of the operands is zero. 943 * 944 * Similarly, x != y is equivalent to -abs(x-y) < 0. 945 */ 946 if (src0.file == zero.file && 947 src0.index == zero.index && 948 src0.swizzle == zero.swizzle) { 949 difference = src1; 950 } else if (src1.file == zero.file && 951 src1.index == zero.index && 952 src1.swizzle == zero.swizzle) { 953 difference = src0; 954 } else { 955 difference = get_temp(glsl_type::vec4_type); 956 957 src_reg tmp_src = src0; 958 tmp_src.negate = ~tmp_src.negate; 959 960 emit(ir, OPCODE_ADD, dst_reg(difference), tmp_src, src1); 961 } 962 963 emit(ir, OPCODE_ABS, dst_reg(abs_difference), difference); 964 965 abs_difference.negate = ~abs_difference.negate; 966 emit(ir, op, dst, abs_difference, zero); 967} 968 969void 970ir_to_mesa_visitor::visit(ir_expression *ir) 971{ 972 unsigned int operand; 973 src_reg op[ARRAY_SIZE(ir->operands)]; 974 src_reg result_src; 975 dst_reg result_dst; 976 977 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) 978 */ 979 if (ir->operation == ir_binop_add) { 980 if (try_emit_mad(ir, 1)) 981 return; 982 if (try_emit_mad(ir, 0)) 983 return; 984 } 985 986 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 987 */ 988 if (ir->operation == ir_binop_logic_and) { 989 if (try_emit_mad_for_and_not(ir, 1)) 990 return; 991 if (try_emit_mad_for_and_not(ir, 0)) 992 return; 993 } 994 995 if (ir->operation == ir_quadop_vector) { 996 this->emit_swz(ir); 997 return; 998 } 999 1000 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1001 this->result.file = PROGRAM_UNDEFINED; 1002 ir->operands[operand]->accept(this); 1003 if (this->result.file == PROGRAM_UNDEFINED) { 1004 printf("Failed to get tree for expression operand:\n"); 1005 ir->operands[operand]->print(); 1006 printf("\n"); 1007 exit(1); 1008 } 1009 op[operand] = this->result; 1010 1011 /* Matrix expression operands should have been broken down to vector 1012 * operations already. 1013 */ 1014 assert(!ir->operands[operand]->type->is_matrix()); 1015 } 1016 1017 int vector_elements = ir->operands[0]->type->vector_elements; 1018 if (ir->operands[1]) { 1019 vector_elements = MAX2(vector_elements, 1020 ir->operands[1]->type->vector_elements); 1021 } 1022 1023 this->result.file = PROGRAM_UNDEFINED; 1024 1025 /* Storage for our result. Ideally for an assignment we'd be using 1026 * the actual storage for the result here, instead. 1027 */ 1028 result_src = get_temp(ir->type); 1029 /* convenience for the emit functions below. */ 1030 result_dst = dst_reg(result_src); 1031 /* Limit writes to the channels that will be used by result_src later. 1032 * This does limit this temp's use as a temporary for multi-instruction 1033 * sequences. 1034 */ 1035 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1036 1037 switch (ir->operation) { 1038 case ir_unop_logic_not: 1039 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1040 * older GPUs implement SEQ using multiple instructions (i915 uses two 1041 * SGE instructions and a MUL instruction). Since our logic values are 1042 * 0.0 and 1.0, 1-x also implements !x. 1043 */ 1044 op[0].negate = ~op[0].negate; 1045 emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); 1046 break; 1047 case ir_unop_neg: 1048 op[0].negate = ~op[0].negate; 1049 result_src = op[0]; 1050 break; 1051 case ir_unop_abs: 1052 emit(ir, OPCODE_ABS, result_dst, op[0]); 1053 break; 1054 case ir_unop_sign: 1055 emit(ir, OPCODE_SSG, result_dst, op[0]); 1056 break; 1057 case ir_unop_rcp: 1058 emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); 1059 break; 1060 1061 case ir_unop_exp2: 1062 emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); 1063 break; 1064 case ir_unop_exp: 1065 case ir_unop_log: 1066 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1067 break; 1068 case ir_unop_log2: 1069 emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); 1070 break; 1071 case ir_unop_sin: 1072 emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); 1073 break; 1074 case ir_unop_cos: 1075 emit_scalar(ir, OPCODE_COS, result_dst, op[0]); 1076 break; 1077 1078 case ir_unop_dFdx: 1079 emit(ir, OPCODE_DDX, result_dst, op[0]); 1080 break; 1081 case ir_unop_dFdy: 1082 emit(ir, OPCODE_DDY, result_dst, op[0]); 1083 break; 1084 1085 case ir_unop_saturate: { 1086 ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV, 1087 result_dst, op[0]); 1088 inst->saturate = true; 1089 break; 1090 } 1091 case ir_unop_noise: { 1092 const enum prog_opcode opcode = 1093 prog_opcode(OPCODE_NOISE1 1094 + (ir->operands[0]->type->vector_elements) - 1); 1095 assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); 1096 1097 emit(ir, opcode, result_dst, op[0]); 1098 break; 1099 } 1100 1101 case ir_binop_add: 1102 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1103 break; 1104 case ir_binop_sub: 1105 emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); 1106 break; 1107 1108 case ir_binop_mul: 1109 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1110 break; 1111 case ir_binop_div: 1112 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1113 break; 1114 case ir_binop_mod: 1115 /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ 1116 assert(ir->type->is_integer()); 1117 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1118 break; 1119 1120 case ir_binop_less: 1121 emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); 1122 break; 1123 case ir_binop_greater: 1124 /* Negating the operands (as opposed to switching the order of the 1125 * operands) produces the correct result when both are +/-Inf. 1126 */ 1127 op[0].negate = ~op[0].negate; 1128 op[1].negate = ~op[1].negate; 1129 emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); 1130 break; 1131 case ir_binop_lequal: 1132 /* Negating the operands (as opposed to switching the order of the 1133 * operands) produces the correct result when both are +/-Inf. 1134 */ 1135 op[0].negate = ~op[0].negate; 1136 op[1].negate = ~op[1].negate; 1137 emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); 1138 break; 1139 case ir_binop_gequal: 1140 emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); 1141 break; 1142 case ir_binop_equal: 1143 emit_seq(ir, result_dst, op[0], op[1]); 1144 break; 1145 case ir_binop_nequal: 1146 emit_sne(ir, result_dst, op[0], op[1]); 1147 break; 1148 case ir_binop_all_equal: 1149 /* "==" operator producing a scalar boolean. */ 1150 if (ir->operands[0]->type->is_vector() || 1151 ir->operands[1]->type->is_vector()) { 1152 src_reg temp = get_temp(glsl_type::vec4_type); 1153 emit_sne(ir, dst_reg(temp), op[0], op[1]); 1154 1155 /* After the dot-product, the value will be an integer on the 1156 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1157 */ 1158 emit_dp(ir, result_dst, temp, temp, vector_elements); 1159 1160 /* Negating the result of the dot-product gives values on the range 1161 * [-4, 0]. Zero becomes 1.0, and negative values become zero. This 1162 * achieved using SGE. 1163 */ 1164 src_reg sge_src = result_src; 1165 sge_src.negate = ~sge_src.negate; 1166 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); 1167 } else { 1168 emit_seq(ir, result_dst, op[0], op[1]); 1169 } 1170 break; 1171 case ir_binop_any_nequal: 1172 /* "!=" operator producing a scalar boolean. */ 1173 if (ir->operands[0]->type->is_vector() || 1174 ir->operands[1]->type->is_vector()) { 1175 src_reg temp = get_temp(glsl_type::vec4_type); 1176 if (ir->operands[0]->type->is_boolean() && 1177 ir->operands[1]->as_constant() && 1178 ir->operands[1]->as_constant()->is_zero()) { 1179 temp = op[0]; 1180 } else { 1181 emit_sne(ir, dst_reg(temp), op[0], op[1]); 1182 } 1183 1184 /* After the dot-product, the value will be an integer on the 1185 * range [0,4]. Zero stays zero, and positive values become 1.0. 1186 */ 1187 ir_to_mesa_instruction *const dp = 1188 emit_dp(ir, result_dst, temp, temp, vector_elements); 1189 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1190 /* The clamping to [0,1] can be done for free in the fragment 1191 * shader with a saturate. 1192 */ 1193 dp->saturate = true; 1194 } else { 1195 /* Negating the result of the dot-product gives values on the range 1196 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1197 * achieved using SLT. 1198 */ 1199 src_reg slt_src = result_src; 1200 slt_src.negate = ~slt_src.negate; 1201 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); 1202 } 1203 } else { 1204 emit_sne(ir, result_dst, op[0], op[1]); 1205 } 1206 break; 1207 1208 case ir_binop_logic_xor: 1209 emit_sne(ir, result_dst, op[0], op[1]); 1210 break; 1211 1212 case ir_binop_logic_or: { 1213 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1214 /* After the addition, the value will be an integer on the 1215 * range [0,2]. Zero stays zero, and positive values become 1.0. 1216 */ 1217 ir_to_mesa_instruction *add = 1218 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1219 add->saturate = true; 1220 } else { 1221 /* The Boolean arguments are stored as float 0.0 and 1.0. If either 1222 * value is 1.0, the result of the logcal-or should be 1.0. If both 1223 * values are 0.0, the result should be 0.0. This is exactly what 1224 * MAX does. 1225 */ 1226 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 1227 } 1228 break; 1229 } 1230 1231 case ir_binop_logic_and: 1232 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ 1233 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1234 break; 1235 1236 case ir_binop_dot: 1237 assert(ir->operands[0]->type->is_vector()); 1238 assert(ir->operands[0]->type == ir->operands[1]->type); 1239 emit_dp(ir, result_dst, op[0], op[1], 1240 ir->operands[0]->type->vector_elements); 1241 break; 1242 1243 case ir_unop_sqrt: 1244 /* sqrt(x) = x * rsq(x). */ 1245 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1246 emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); 1247 /* For incoming channels <= 0, set the result to 0. */ 1248 op[0].negate = ~op[0].negate; 1249 emit(ir, OPCODE_CMP, result_dst, 1250 op[0], result_src, src_reg_for_float(0.0)); 1251 break; 1252 case ir_unop_rsq: 1253 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1254 break; 1255 case ir_unop_i2f: 1256 case ir_unop_u2f: 1257 case ir_unop_b2f: 1258 case ir_unop_b2i: 1259 case ir_unop_i2u: 1260 case ir_unop_u2i: 1261 /* Mesa IR lacks types, ints are stored as truncated floats. */ 1262 result_src = op[0]; 1263 break; 1264 case ir_unop_f2i: 1265 case ir_unop_f2u: 1266 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1267 break; 1268 case ir_unop_f2b: 1269 case ir_unop_i2b: 1270 emit_sne(ir, result_dst, op[0], src_reg_for_float(0.0)); 1271 break; 1272 case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway 1273 case ir_unop_bitcast_f2u: 1274 case ir_unop_bitcast_i2f: 1275 case ir_unop_bitcast_u2f: 1276 break; 1277 case ir_unop_trunc: 1278 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1279 break; 1280 case ir_unop_ceil: 1281 op[0].negate = ~op[0].negate; 1282 emit(ir, OPCODE_FLR, result_dst, op[0]); 1283 result_src.negate = ~result_src.negate; 1284 break; 1285 case ir_unop_floor: 1286 emit(ir, OPCODE_FLR, result_dst, op[0]); 1287 break; 1288 case ir_unop_fract: 1289 emit(ir, OPCODE_FRC, result_dst, op[0]); 1290 break; 1291 case ir_unop_pack_snorm_2x16: 1292 case ir_unop_pack_snorm_4x8: 1293 case ir_unop_pack_unorm_2x16: 1294 case ir_unop_pack_unorm_4x8: 1295 case ir_unop_pack_half_2x16: 1296 case ir_unop_pack_double_2x32: 1297 case ir_unop_unpack_snorm_2x16: 1298 case ir_unop_unpack_snorm_4x8: 1299 case ir_unop_unpack_unorm_2x16: 1300 case ir_unop_unpack_unorm_4x8: 1301 case ir_unop_unpack_half_2x16: 1302 case ir_unop_unpack_double_2x32: 1303 case ir_unop_bitfield_reverse: 1304 case ir_unop_bit_count: 1305 case ir_unop_find_msb: 1306 case ir_unop_find_lsb: 1307 case ir_unop_d2f: 1308 case ir_unop_f2d: 1309 case ir_unop_d2i: 1310 case ir_unop_i2d: 1311 case ir_unop_d2u: 1312 case ir_unop_u2d: 1313 case ir_unop_d2b: 1314 case ir_unop_frexp_sig: 1315 case ir_unop_frexp_exp: 1316 assert(!"not supported"); 1317 break; 1318 case ir_binop_min: 1319 emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); 1320 break; 1321 case ir_binop_max: 1322 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 1323 break; 1324 case ir_binop_pow: 1325 emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); 1326 break; 1327 1328 /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since 1329 * hardware backends have no way to avoid Mesa IR generation 1330 * even if they don't use it, we need to emit "something" and 1331 * continue. 1332 */ 1333 case ir_binop_lshift: 1334 case ir_binop_rshift: 1335 case ir_binop_bit_and: 1336 case ir_binop_bit_xor: 1337 case ir_binop_bit_or: 1338 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1339 break; 1340 1341 case ir_unop_bit_not: 1342 case ir_unop_round_even: 1343 emit(ir, OPCODE_MOV, result_dst, op[0]); 1344 break; 1345 1346 case ir_binop_ubo_load: 1347 assert(!"not supported"); 1348 break; 1349 1350 case ir_triop_lrp: 1351 /* ir_triop_lrp operands are (x, y, a) while 1352 * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program. 1353 */ 1354 emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]); 1355 break; 1356 1357 case ir_binop_vector_extract: 1358 case ir_triop_fma: 1359 case ir_triop_bitfield_extract: 1360 case ir_triop_vector_insert: 1361 case ir_quadop_bitfield_insert: 1362 case ir_binop_ldexp: 1363 case ir_triop_csel: 1364 case ir_binop_carry: 1365 case ir_binop_borrow: 1366 case ir_binop_imul_high: 1367 case ir_unop_interpolate_at_centroid: 1368 case ir_binop_interpolate_at_offset: 1369 case ir_binop_interpolate_at_sample: 1370 case ir_unop_dFdx_coarse: 1371 case ir_unop_dFdx_fine: 1372 case ir_unop_dFdy_coarse: 1373 case ir_unop_dFdy_fine: 1374 case ir_unop_subroutine_to_int: 1375 case ir_unop_get_buffer_size: 1376 case ir_unop_vote_any: 1377 case ir_unop_vote_all: 1378 case ir_unop_vote_eq: 1379 assert(!"not supported"); 1380 break; 1381 1382 case ir_unop_ssbo_unsized_array_length: 1383 case ir_quadop_vector: 1384 /* This operation should have already been handled. 1385 */ 1386 assert(!"Should not get here."); 1387 break; 1388 } 1389 1390 this->result = result_src; 1391} 1392 1393 1394void 1395ir_to_mesa_visitor::visit(ir_swizzle *ir) 1396{ 1397 src_reg src; 1398 int i; 1399 int swizzle[4]; 1400 1401 /* Note that this is only swizzles in expressions, not those on the left 1402 * hand side of an assignment, which do write masking. See ir_assignment 1403 * for that. 1404 */ 1405 1406 ir->val->accept(this); 1407 src = this->result; 1408 assert(src.file != PROGRAM_UNDEFINED); 1409 assert(ir->type->vector_elements > 0); 1410 1411 for (i = 0; i < 4; i++) { 1412 if (i < ir->type->vector_elements) { 1413 switch (i) { 1414 case 0: 1415 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1416 break; 1417 case 1: 1418 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1419 break; 1420 case 2: 1421 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1422 break; 1423 case 3: 1424 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1425 break; 1426 } 1427 } else { 1428 /* If the type is smaller than a vec4, replicate the last 1429 * channel out. 1430 */ 1431 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1432 } 1433 } 1434 1435 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1436 1437 this->result = src; 1438} 1439 1440void 1441ir_to_mesa_visitor::visit(ir_dereference_variable *ir) 1442{ 1443 variable_storage *entry = find_variable_storage(ir->var); 1444 ir_variable *var = ir->var; 1445 1446 if (!entry) { 1447 switch (var->data.mode) { 1448 case ir_var_uniform: 1449 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1450 var->data.param_index); 1451 this->variables.push_tail(entry); 1452 break; 1453 case ir_var_shader_in: 1454 /* The linker assigns locations for varyings and attributes, 1455 * including deprecated builtins (like gl_Color), 1456 * user-assigned generic attributes (glBindVertexLocation), 1457 * and user-defined varyings. 1458 */ 1459 assert(var->data.location != -1); 1460 entry = new(mem_ctx) variable_storage(var, 1461 PROGRAM_INPUT, 1462 var->data.location); 1463 break; 1464 case ir_var_shader_out: 1465 assert(var->data.location != -1); 1466 entry = new(mem_ctx) variable_storage(var, 1467 PROGRAM_OUTPUT, 1468 var->data.location); 1469 break; 1470 case ir_var_system_value: 1471 entry = new(mem_ctx) variable_storage(var, 1472 PROGRAM_SYSTEM_VALUE, 1473 var->data.location); 1474 break; 1475 case ir_var_auto: 1476 case ir_var_temporary: 1477 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1478 this->next_temp); 1479 this->variables.push_tail(entry); 1480 1481 next_temp += type_size(var->type); 1482 break; 1483 } 1484 1485 if (!entry) { 1486 printf("Failed to make storage for %s\n", var->name); 1487 exit(1); 1488 } 1489 } 1490 1491 this->result = src_reg(entry->file, entry->index, var->type); 1492} 1493 1494void 1495ir_to_mesa_visitor::visit(ir_dereference_array *ir) 1496{ 1497 ir_constant *index; 1498 src_reg src; 1499 int element_size = type_size(ir->type); 1500 1501 index = ir->array_index->constant_expression_value(); 1502 1503 ir->array->accept(this); 1504 src = this->result; 1505 1506 if (index) { 1507 src.index += index->value.i[0] * element_size; 1508 } else { 1509 /* Variable index array dereference. It eats the "vec4" of the 1510 * base of the array and an index that offsets the Mesa register 1511 * index. 1512 */ 1513 ir->array_index->accept(this); 1514 1515 src_reg index_reg; 1516 1517 if (element_size == 1) { 1518 index_reg = this->result; 1519 } else { 1520 index_reg = get_temp(glsl_type::float_type); 1521 1522 emit(ir, OPCODE_MUL, dst_reg(index_reg), 1523 this->result, src_reg_for_float(element_size)); 1524 } 1525 1526 /* If there was already a relative address register involved, add the 1527 * new and the old together to get the new offset. 1528 */ 1529 if (src.reladdr != NULL) { 1530 src_reg accum_reg = get_temp(glsl_type::float_type); 1531 1532 emit(ir, OPCODE_ADD, dst_reg(accum_reg), 1533 index_reg, *src.reladdr); 1534 1535 index_reg = accum_reg; 1536 } 1537 1538 src.reladdr = ralloc(mem_ctx, src_reg); 1539 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1540 } 1541 1542 /* If the type is smaller than a vec4, replicate the last channel out. */ 1543 if (ir->type->is_scalar() || ir->type->is_vector()) 1544 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1545 else 1546 src.swizzle = SWIZZLE_NOOP; 1547 1548 this->result = src; 1549} 1550 1551void 1552ir_to_mesa_visitor::visit(ir_dereference_record *ir) 1553{ 1554 unsigned int i; 1555 const glsl_type *struct_type = ir->record->type; 1556 int offset = 0; 1557 1558 ir->record->accept(this); 1559 1560 for (i = 0; i < struct_type->length; i++) { 1561 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1562 break; 1563 offset += type_size(struct_type->fields.structure[i].type); 1564 } 1565 1566 /* If the type is smaller than a vec4, replicate the last channel out. */ 1567 if (ir->type->is_scalar() || ir->type->is_vector()) 1568 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1569 else 1570 this->result.swizzle = SWIZZLE_NOOP; 1571 1572 this->result.index += offset; 1573} 1574 1575/** 1576 * We want to be careful in assignment setup to hit the actual storage 1577 * instead of potentially using a temporary like we might with the 1578 * ir_dereference handler. 1579 */ 1580static dst_reg 1581get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v) 1582{ 1583 /* The LHS must be a dereference. If the LHS is a variable indexed array 1584 * access of a vector, it must be separated into a series conditional moves 1585 * before reaching this point (see ir_vec_index_to_cond_assign). 1586 */ 1587 assert(ir->as_dereference()); 1588 ir_dereference_array *deref_array = ir->as_dereference_array(); 1589 if (deref_array) { 1590 assert(!deref_array->array->type->is_vector()); 1591 } 1592 1593 /* Use the rvalue deref handler for the most part. We'll ignore 1594 * swizzles in it and write swizzles using writemask, though. 1595 */ 1596 ir->accept(v); 1597 return dst_reg(v->result); 1598} 1599 1600/* Calculate the sampler index and also calculate the base uniform location 1601 * for struct members. 1602 */ 1603static void 1604calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref, 1605 unsigned *offset, unsigned *array_elements, 1606 unsigned *location) 1607{ 1608 if (deref->ir_type == ir_type_dereference_variable) 1609 return; 1610 1611 switch (deref->ir_type) { 1612 case ir_type_dereference_array: { 1613 ir_dereference_array *deref_arr = deref->as_dereference_array(); 1614 ir_constant *array_index = 1615 deref_arr->array_index->constant_expression_value(); 1616 1617 if (!array_index) { 1618 /* GLSL 1.10 and 1.20 allowed variable sampler array indices, 1619 * while GLSL 1.30 requires that the array indices be 1620 * constant integer expressions. We don't expect any driver 1621 * to actually work with a really variable array index, so 1622 * all that would work would be an unrolled loop counter that ends 1623 * up being constant above. 1624 */ 1625 ralloc_strcat(&prog->data->InfoLog, 1626 "warning: Variable sampler array index unsupported.\n" 1627 "This feature of the language was removed in GLSL 1.20 " 1628 "and is unlikely to be supported for 1.10 in Mesa.\n"); 1629 } else { 1630 *offset += array_index->value.u[0] * *array_elements; 1631 } 1632 1633 *array_elements *= deref_arr->array->type->length; 1634 1635 calc_sampler_offsets(prog, deref_arr->array->as_dereference(), 1636 offset, array_elements, location); 1637 break; 1638 } 1639 1640 case ir_type_dereference_record: { 1641 ir_dereference_record *deref_record = deref->as_dereference_record(); 1642 unsigned field_index = 1643 deref_record->record->type->field_index(deref_record->field); 1644 *location += 1645 deref_record->record->type->record_location_offset(field_index); 1646 calc_sampler_offsets(prog, deref_record->record->as_dereference(), 1647 offset, array_elements, location); 1648 break; 1649 } 1650 1651 default: 1652 unreachable("Invalid deref type"); 1653 break; 1654 } 1655} 1656 1657static int 1658get_sampler_uniform_value(class ir_dereference *sampler, 1659 struct gl_shader_program *shader_program, 1660 const struct gl_program *prog) 1661{ 1662 GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); 1663 ir_variable *var = sampler->variable_referenced(); 1664 unsigned location = var->data.location; 1665 unsigned array_elements = 1; 1666 unsigned offset = 0; 1667 1668 calc_sampler_offsets(shader_program, sampler, &offset, &array_elements, 1669 &location); 1670 1671 assert(shader_program->data->UniformStorage[location].opaque[shader].active); 1672 return shader_program->data->UniformStorage[location].opaque[shader].index + 1673 offset; 1674} 1675 1676/** 1677 * Process the condition of a conditional assignment 1678 * 1679 * Examines the condition of a conditional assignment to generate the optimal 1680 * first operand of a \c CMP instruction. If the condition is a relational 1681 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 1682 * used as the source for the \c CMP instruction. Otherwise the comparison 1683 * is processed to a boolean result, and the boolean result is used as the 1684 * operand to the CMP instruction. 1685 */ 1686bool 1687ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir) 1688{ 1689 ir_rvalue *src_ir = ir; 1690 bool negate = true; 1691 bool switch_order = false; 1692 1693 ir_expression *const expr = ir->as_expression(); 1694 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 1695 bool zero_on_left = false; 1696 1697 if (expr->operands[0]->is_zero()) { 1698 src_ir = expr->operands[1]; 1699 zero_on_left = true; 1700 } else if (expr->operands[1]->is_zero()) { 1701 src_ir = expr->operands[0]; 1702 zero_on_left = false; 1703 } 1704 1705 /* a is - 0 + - 0 + 1706 * (a < 0) T F F ( a < 0) T F F 1707 * (0 < a) F F T (-a < 0) F F T 1708 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 1709 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 1710 * (a > 0) F F T (-a < 0) F F T 1711 * (0 > a) T F F ( a < 0) T F F 1712 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 1713 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 1714 * 1715 * Note that exchanging the order of 0 and 'a' in the comparison simply 1716 * means that the value of 'a' should be negated. 1717 */ 1718 if (src_ir != ir) { 1719 switch (expr->operation) { 1720 case ir_binop_less: 1721 switch_order = false; 1722 negate = zero_on_left; 1723 break; 1724 1725 case ir_binop_greater: 1726 switch_order = false; 1727 negate = !zero_on_left; 1728 break; 1729 1730 case ir_binop_lequal: 1731 switch_order = true; 1732 negate = !zero_on_left; 1733 break; 1734 1735 case ir_binop_gequal: 1736 switch_order = true; 1737 negate = zero_on_left; 1738 break; 1739 1740 default: 1741 /* This isn't the right kind of comparison afterall, so make sure 1742 * the whole condition is visited. 1743 */ 1744 src_ir = ir; 1745 break; 1746 } 1747 } 1748 } 1749 1750 src_ir->accept(this); 1751 1752 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 1753 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 1754 * choose which value OPCODE_CMP produces without an extra instruction 1755 * computing the condition. 1756 */ 1757 if (negate) 1758 this->result.negate = ~this->result.negate; 1759 1760 return switch_order; 1761} 1762 1763void 1764ir_to_mesa_visitor::visit(ir_assignment *ir) 1765{ 1766 dst_reg l; 1767 src_reg r; 1768 int i; 1769 1770 ir->rhs->accept(this); 1771 r = this->result; 1772 1773 l = get_assignment_lhs(ir->lhs, this); 1774 1775 /* FINISHME: This should really set to the correct maximal writemask for each 1776 * FINISHME: component written (in the loops below). This case can only 1777 * FINISHME: occur for matrices, arrays, and structures. 1778 */ 1779 if (ir->write_mask == 0) { 1780 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1781 l.writemask = WRITEMASK_XYZW; 1782 } else if (ir->lhs->type->is_scalar()) { 1783 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 1784 * FINISHME: W component of fragment shader output zero, work correctly. 1785 */ 1786 l.writemask = WRITEMASK_XYZW; 1787 } else { 1788 int swizzles[4]; 1789 int first_enabled_chan = 0; 1790 int rhs_chan = 0; 1791 1792 assert(ir->lhs->type->is_vector()); 1793 l.writemask = ir->write_mask; 1794 1795 for (int i = 0; i < 4; i++) { 1796 if (l.writemask & (1 << i)) { 1797 first_enabled_chan = GET_SWZ(r.swizzle, i); 1798 break; 1799 } 1800 } 1801 1802 /* Swizzle a small RHS vector into the channels being written. 1803 * 1804 * glsl ir treats write_mask as dictating how many channels are 1805 * present on the RHS while Mesa IR treats write_mask as just 1806 * showing which channels of the vec4 RHS get written. 1807 */ 1808 for (int i = 0; i < 4; i++) { 1809 if (l.writemask & (1 << i)) 1810 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 1811 else 1812 swizzles[i] = first_enabled_chan; 1813 } 1814 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 1815 swizzles[2], swizzles[3]); 1816 } 1817 1818 assert(l.file != PROGRAM_UNDEFINED); 1819 assert(r.file != PROGRAM_UNDEFINED); 1820 1821 if (ir->condition) { 1822 const bool switch_order = this->process_move_condition(ir->condition); 1823 src_reg condition = this->result; 1824 1825 for (i = 0; i < type_size(ir->lhs->type); i++) { 1826 if (switch_order) { 1827 emit(ir, OPCODE_CMP, l, condition, src_reg(l), r); 1828 } else { 1829 emit(ir, OPCODE_CMP, l, condition, r, src_reg(l)); 1830 } 1831 1832 l.index++; 1833 r.index++; 1834 } 1835 } else { 1836 for (i = 0; i < type_size(ir->lhs->type); i++) { 1837 emit(ir, OPCODE_MOV, l, r); 1838 l.index++; 1839 r.index++; 1840 } 1841 } 1842} 1843 1844 1845void 1846ir_to_mesa_visitor::visit(ir_constant *ir) 1847{ 1848 src_reg src; 1849 GLfloat stack_vals[4] = { 0 }; 1850 GLfloat *values = stack_vals; 1851 unsigned int i; 1852 1853 /* Unfortunately, 4 floats is all we can get into 1854 * _mesa_add_unnamed_constant. So, make a temp to store an 1855 * aggregate constant and move each constant value into it. If we 1856 * get lucky, copy propagation will eliminate the extra moves. 1857 */ 1858 1859 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1860 src_reg temp_base = get_temp(ir->type); 1861 dst_reg temp = dst_reg(temp_base); 1862 1863 foreach_in_list(ir_constant, field_value, &ir->components) { 1864 int size = type_size(field_value->type); 1865 1866 assert(size > 0); 1867 1868 field_value->accept(this); 1869 src = this->result; 1870 1871 for (i = 0; i < (unsigned int)size; i++) { 1872 emit(ir, OPCODE_MOV, temp, src); 1873 1874 src.index++; 1875 temp.index++; 1876 } 1877 } 1878 this->result = temp_base; 1879 return; 1880 } 1881 1882 if (ir->type->is_array()) { 1883 src_reg temp_base = get_temp(ir->type); 1884 dst_reg temp = dst_reg(temp_base); 1885 int size = type_size(ir->type->fields.array); 1886 1887 assert(size > 0); 1888 1889 for (i = 0; i < ir->type->length; i++) { 1890 ir->array_elements[i]->accept(this); 1891 src = this->result; 1892 for (int j = 0; j < size; j++) { 1893 emit(ir, OPCODE_MOV, temp, src); 1894 1895 src.index++; 1896 temp.index++; 1897 } 1898 } 1899 this->result = temp_base; 1900 return; 1901 } 1902 1903 if (ir->type->is_matrix()) { 1904 src_reg mat = get_temp(ir->type); 1905 dst_reg mat_column = dst_reg(mat); 1906 1907 for (i = 0; i < ir->type->matrix_columns; i++) { 1908 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 1909 values = &ir->value.f[i * ir->type->vector_elements]; 1910 1911 src = src_reg(PROGRAM_CONSTANT, -1, NULL); 1912 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1913 (gl_constant_value *) values, 1914 ir->type->vector_elements, 1915 &src.swizzle); 1916 emit(ir, OPCODE_MOV, mat_column, src); 1917 1918 mat_column.index++; 1919 } 1920 1921 this->result = mat; 1922 return; 1923 } 1924 1925 src.file = PROGRAM_CONSTANT; 1926 switch (ir->type->base_type) { 1927 case GLSL_TYPE_FLOAT: 1928 values = &ir->value.f[0]; 1929 break; 1930 case GLSL_TYPE_UINT: 1931 for (i = 0; i < ir->type->vector_elements; i++) { 1932 values[i] = ir->value.u[i]; 1933 } 1934 break; 1935 case GLSL_TYPE_INT: 1936 for (i = 0; i < ir->type->vector_elements; i++) { 1937 values[i] = ir->value.i[i]; 1938 } 1939 break; 1940 case GLSL_TYPE_BOOL: 1941 for (i = 0; i < ir->type->vector_elements; i++) { 1942 values[i] = ir->value.b[i]; 1943 } 1944 break; 1945 default: 1946 assert(!"Non-float/uint/int/bool constant"); 1947 } 1948 1949 this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); 1950 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1951 (gl_constant_value *) values, 1952 ir->type->vector_elements, 1953 &this->result.swizzle); 1954} 1955 1956void 1957ir_to_mesa_visitor::visit(ir_call *) 1958{ 1959 assert(!"ir_to_mesa: All function calls should have been inlined by now."); 1960} 1961 1962void 1963ir_to_mesa_visitor::visit(ir_texture *ir) 1964{ 1965 src_reg result_src, coord, lod_info, projector, dx, dy; 1966 dst_reg result_dst, coord_dst; 1967 ir_to_mesa_instruction *inst = NULL; 1968 prog_opcode opcode = OPCODE_NOP; 1969 1970 if (ir->op == ir_txs) 1971 this->result = src_reg_for_float(0.0); 1972 else 1973 ir->coordinate->accept(this); 1974 1975 /* Put our coords in a temp. We'll need to modify them for shadow, 1976 * projection, or LOD, so the only case we'd use it as-is is if 1977 * we're doing plain old texturing. Mesa IR optimization should 1978 * handle cleaning up our mess in that case. 1979 */ 1980 coord = get_temp(glsl_type::vec4_type); 1981 coord_dst = dst_reg(coord); 1982 emit(ir, OPCODE_MOV, coord_dst, this->result); 1983 1984 if (ir->projector) { 1985 ir->projector->accept(this); 1986 projector = this->result; 1987 } 1988 1989 /* Storage for our result. Ideally for an assignment we'd be using 1990 * the actual storage for the result here, instead. 1991 */ 1992 result_src = get_temp(glsl_type::vec4_type); 1993 result_dst = dst_reg(result_src); 1994 1995 switch (ir->op) { 1996 case ir_tex: 1997 case ir_txs: 1998 opcode = OPCODE_TEX; 1999 break; 2000 case ir_txb: 2001 opcode = OPCODE_TXB; 2002 ir->lod_info.bias->accept(this); 2003 lod_info = this->result; 2004 break; 2005 case ir_txf: 2006 /* Pretend to be TXL so the sampler, coordinate, lod are available */ 2007 case ir_txl: 2008 opcode = OPCODE_TXL; 2009 ir->lod_info.lod->accept(this); 2010 lod_info = this->result; 2011 break; 2012 case ir_txd: 2013 opcode = OPCODE_TXD; 2014 ir->lod_info.grad.dPdx->accept(this); 2015 dx = this->result; 2016 ir->lod_info.grad.dPdy->accept(this); 2017 dy = this->result; 2018 break; 2019 case ir_txf_ms: 2020 assert(!"Unexpected ir_txf_ms opcode"); 2021 break; 2022 case ir_lod: 2023 assert(!"Unexpected ir_lod opcode"); 2024 break; 2025 case ir_tg4: 2026 assert(!"Unexpected ir_tg4 opcode"); 2027 break; 2028 case ir_query_levels: 2029 assert(!"Unexpected ir_query_levels opcode"); 2030 break; 2031 case ir_samples_identical: 2032 unreachable("Unexpected ir_samples_identical opcode"); 2033 case ir_texture_samples: 2034 unreachable("Unexpected ir_texture_samples opcode"); 2035 } 2036 2037 const glsl_type *sampler_type = ir->sampler->type; 2038 2039 if (ir->projector) { 2040 if (opcode == OPCODE_TEX) { 2041 /* Slot the projector in as the last component of the coord. */ 2042 coord_dst.writemask = WRITEMASK_W; 2043 emit(ir, OPCODE_MOV, coord_dst, projector); 2044 coord_dst.writemask = WRITEMASK_XYZW; 2045 opcode = OPCODE_TXP; 2046 } else { 2047 src_reg coord_w = coord; 2048 coord_w.swizzle = SWIZZLE_WWWW; 2049 2050 /* For the other TEX opcodes there's no projective version 2051 * since the last slot is taken up by lod info. Do the 2052 * projective divide now. 2053 */ 2054 coord_dst.writemask = WRITEMASK_W; 2055 emit(ir, OPCODE_RCP, coord_dst, projector); 2056 2057 /* In the case where we have to project the coordinates "by hand," 2058 * the shadow comparator value must also be projected. 2059 */ 2060 src_reg tmp_src = coord; 2061 if (ir->shadow_comparator) { 2062 /* Slot the shadow value in as the second to last component of the 2063 * coord. 2064 */ 2065 ir->shadow_comparator->accept(this); 2066 2067 tmp_src = get_temp(glsl_type::vec4_type); 2068 dst_reg tmp_dst = dst_reg(tmp_src); 2069 2070 /* Projective division not allowed for array samplers. */ 2071 assert(!sampler_type->sampler_array); 2072 2073 tmp_dst.writemask = WRITEMASK_Z; 2074 emit(ir, OPCODE_MOV, tmp_dst, this->result); 2075 2076 tmp_dst.writemask = WRITEMASK_XY; 2077 emit(ir, OPCODE_MOV, tmp_dst, coord); 2078 } 2079 2080 coord_dst.writemask = WRITEMASK_XYZ; 2081 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); 2082 2083 coord_dst.writemask = WRITEMASK_XYZW; 2084 coord.swizzle = SWIZZLE_XYZW; 2085 } 2086 } 2087 2088 /* If projection is done and the opcode is not OPCODE_TXP, then the shadow 2089 * comparator was put in the correct place (and projected) by the code, 2090 * above, that handles by-hand projection. 2091 */ 2092 if (ir->shadow_comparator && (!ir->projector || opcode == OPCODE_TXP)) { 2093 /* Slot the shadow value in as the second to last component of the 2094 * coord. 2095 */ 2096 ir->shadow_comparator->accept(this); 2097 2098 /* XXX This will need to be updated for cubemap array samplers. */ 2099 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2100 sampler_type->sampler_array) { 2101 coord_dst.writemask = WRITEMASK_W; 2102 } else { 2103 coord_dst.writemask = WRITEMASK_Z; 2104 } 2105 2106 emit(ir, OPCODE_MOV, coord_dst, this->result); 2107 coord_dst.writemask = WRITEMASK_XYZW; 2108 } 2109 2110 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { 2111 /* Mesa IR stores lod or lod bias in the last channel of the coords. */ 2112 coord_dst.writemask = WRITEMASK_W; 2113 emit(ir, OPCODE_MOV, coord_dst, lod_info); 2114 coord_dst.writemask = WRITEMASK_XYZW; 2115 } 2116 2117 if (opcode == OPCODE_TXD) 2118 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2119 else 2120 inst = emit(ir, opcode, result_dst, coord); 2121 2122 if (ir->shadow_comparator) 2123 inst->tex_shadow = GL_TRUE; 2124 2125 inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program, 2126 prog); 2127 2128 switch (sampler_type->sampler_dimensionality) { 2129 case GLSL_SAMPLER_DIM_1D: 2130 inst->tex_target = (sampler_type->sampler_array) 2131 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2132 break; 2133 case GLSL_SAMPLER_DIM_2D: 2134 inst->tex_target = (sampler_type->sampler_array) 2135 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2136 break; 2137 case GLSL_SAMPLER_DIM_3D: 2138 inst->tex_target = TEXTURE_3D_INDEX; 2139 break; 2140 case GLSL_SAMPLER_DIM_CUBE: 2141 inst->tex_target = TEXTURE_CUBE_INDEX; 2142 break; 2143 case GLSL_SAMPLER_DIM_RECT: 2144 inst->tex_target = TEXTURE_RECT_INDEX; 2145 break; 2146 case GLSL_SAMPLER_DIM_BUF: 2147 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2148 break; 2149 case GLSL_SAMPLER_DIM_EXTERNAL: 2150 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2151 break; 2152 default: 2153 assert(!"Should not get here."); 2154 } 2155 2156 this->result = result_src; 2157} 2158 2159void 2160ir_to_mesa_visitor::visit(ir_return *ir) 2161{ 2162 /* Non-void functions should have been inlined. We may still emit RETs 2163 * from main() unless the EmitNoMainReturn option is set. 2164 */ 2165 assert(!ir->get_value()); 2166 emit(ir, OPCODE_RET); 2167} 2168 2169void 2170ir_to_mesa_visitor::visit(ir_discard *ir) 2171{ 2172 if (!ir->condition) 2173 ir->condition = new(mem_ctx) ir_constant(true); 2174 2175 ir->condition->accept(this); 2176 this->result.negate = ~this->result.negate; 2177 emit(ir, OPCODE_KIL, undef_dst, this->result); 2178} 2179 2180void 2181ir_to_mesa_visitor::visit(ir_if *ir) 2182{ 2183 ir_to_mesa_instruction *if_inst; 2184 2185 ir->condition->accept(this); 2186 assert(this->result.file != PROGRAM_UNDEFINED); 2187 2188 if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); 2189 2190 this->instructions.push_tail(if_inst); 2191 2192 visit_exec_list(&ir->then_instructions, this); 2193 2194 if (!ir->else_instructions.is_empty()) { 2195 emit(ir->condition, OPCODE_ELSE); 2196 visit_exec_list(&ir->else_instructions, this); 2197 } 2198 2199 emit(ir->condition, OPCODE_ENDIF); 2200} 2201 2202void 2203ir_to_mesa_visitor::visit(ir_emit_vertex *) 2204{ 2205 assert(!"Geometry shaders not supported."); 2206} 2207 2208void 2209ir_to_mesa_visitor::visit(ir_end_primitive *) 2210{ 2211 assert(!"Geometry shaders not supported."); 2212} 2213 2214void 2215ir_to_mesa_visitor::visit(ir_barrier *) 2216{ 2217 unreachable("GLSL barrier() not supported."); 2218} 2219 2220ir_to_mesa_visitor::ir_to_mesa_visitor() 2221{ 2222 result.file = PROGRAM_UNDEFINED; 2223 next_temp = 1; 2224 next_signature_id = 1; 2225 current_function = NULL; 2226 mem_ctx = ralloc_context(NULL); 2227} 2228 2229ir_to_mesa_visitor::~ir_to_mesa_visitor() 2230{ 2231 ralloc_free(mem_ctx); 2232} 2233 2234static struct prog_src_register 2235mesa_src_reg_from_ir_src_reg(src_reg reg) 2236{ 2237 struct prog_src_register mesa_reg; 2238 2239 mesa_reg.File = reg.file; 2240 assert(reg.index < (1 << INST_INDEX_BITS)); 2241 mesa_reg.Index = reg.index; 2242 mesa_reg.Swizzle = reg.swizzle; 2243 mesa_reg.RelAddr = reg.reladdr != NULL; 2244 mesa_reg.Negate = reg.negate; 2245 2246 return mesa_reg; 2247} 2248 2249static void 2250set_branchtargets(ir_to_mesa_visitor *v, 2251 struct prog_instruction *mesa_instructions, 2252 int num_instructions) 2253{ 2254 int if_count = 0, loop_count = 0; 2255 int *if_stack, *loop_stack; 2256 int if_stack_pos = 0, loop_stack_pos = 0; 2257 int i, j; 2258 2259 for (i = 0; i < num_instructions; i++) { 2260 switch (mesa_instructions[i].Opcode) { 2261 case OPCODE_IF: 2262 if_count++; 2263 break; 2264 case OPCODE_BGNLOOP: 2265 loop_count++; 2266 break; 2267 case OPCODE_BRK: 2268 case OPCODE_CONT: 2269 mesa_instructions[i].BranchTarget = -1; 2270 break; 2271 default: 2272 break; 2273 } 2274 } 2275 2276 if_stack = rzalloc_array(v->mem_ctx, int, if_count); 2277 loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); 2278 2279 for (i = 0; i < num_instructions; i++) { 2280 switch (mesa_instructions[i].Opcode) { 2281 case OPCODE_IF: 2282 if_stack[if_stack_pos] = i; 2283 if_stack_pos++; 2284 break; 2285 case OPCODE_ELSE: 2286 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 2287 if_stack[if_stack_pos - 1] = i; 2288 break; 2289 case OPCODE_ENDIF: 2290 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 2291 if_stack_pos--; 2292 break; 2293 case OPCODE_BGNLOOP: 2294 loop_stack[loop_stack_pos] = i; 2295 loop_stack_pos++; 2296 break; 2297 case OPCODE_ENDLOOP: 2298 loop_stack_pos--; 2299 /* Rewrite any breaks/conts at this nesting level (haven't 2300 * already had a BranchTarget assigned) to point to the end 2301 * of the loop. 2302 */ 2303 for (j = loop_stack[loop_stack_pos]; j < i; j++) { 2304 if (mesa_instructions[j].Opcode == OPCODE_BRK || 2305 mesa_instructions[j].Opcode == OPCODE_CONT) { 2306 if (mesa_instructions[j].BranchTarget == -1) { 2307 mesa_instructions[j].BranchTarget = i; 2308 } 2309 } 2310 } 2311 /* The loop ends point at each other. */ 2312 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; 2313 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; 2314 break; 2315 case OPCODE_CAL: 2316 foreach_in_list(function_entry, entry, &v->function_signatures) { 2317 if (entry->sig_id == mesa_instructions[i].BranchTarget) { 2318 mesa_instructions[i].BranchTarget = entry->inst; 2319 break; 2320 } 2321 } 2322 break; 2323 default: 2324 break; 2325 } 2326 } 2327} 2328 2329static void 2330print_program(struct prog_instruction *mesa_instructions, 2331 ir_instruction **mesa_instruction_annotation, 2332 int num_instructions) 2333{ 2334 ir_instruction *last_ir = NULL; 2335 int i; 2336 int indent = 0; 2337 2338 for (i = 0; i < num_instructions; i++) { 2339 struct prog_instruction *mesa_inst = mesa_instructions + i; 2340 ir_instruction *ir = mesa_instruction_annotation[i]; 2341 2342 fprintf(stdout, "%3d: ", i); 2343 2344 if (last_ir != ir && ir) { 2345 int j; 2346 2347 for (j = 0; j < indent; j++) { 2348 fprintf(stdout, " "); 2349 } 2350 ir->print(); 2351 printf("\n"); 2352 last_ir = ir; 2353 2354 fprintf(stdout, " "); /* line number spacing. */ 2355 } 2356 2357 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, 2358 PROG_PRINT_DEBUG, NULL); 2359 } 2360} 2361 2362namespace { 2363 2364class add_uniform_to_shader : public program_resource_visitor { 2365public: 2366 add_uniform_to_shader(struct gl_shader_program *shader_program, 2367 struct gl_program_parameter_list *params, 2368 gl_shader_stage shader_type) 2369 : shader_program(shader_program), params(params), idx(-1), 2370 shader_type(shader_type) 2371 { 2372 /* empty */ 2373 } 2374 2375 void process(ir_variable *var) 2376 { 2377 this->idx = -1; 2378 this->program_resource_visitor::process(var); 2379 var->data.param_index = this->idx; 2380 } 2381 2382private: 2383 virtual void visit_field(const glsl_type *type, const char *name, 2384 bool row_major, const glsl_type *record_type, 2385 const enum glsl_interface_packing packing, 2386 bool last_field); 2387 2388 struct gl_shader_program *shader_program; 2389 struct gl_program_parameter_list *params; 2390 int idx; 2391 gl_shader_stage shader_type; 2392}; 2393 2394} /* anonymous namespace */ 2395 2396void 2397add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, 2398 bool /* row_major */, 2399 const glsl_type * /* record_type */, 2400 const enum glsl_interface_packing, 2401 bool /* last_field */) 2402{ 2403 unsigned int size; 2404 2405 /* atomics don't get real storage */ 2406 if (type->contains_atomic()) 2407 return; 2408 2409 if (type->is_vector() || type->is_scalar()) { 2410 size = type->vector_elements; 2411 if (type->is_64bit()) 2412 size *= 2; 2413 } else { 2414 size = type_size(type) * 4; 2415 } 2416 2417 gl_register_file file; 2418 if (type->without_array()->is_sampler()) { 2419 file = PROGRAM_SAMPLER; 2420 } else { 2421 file = PROGRAM_UNIFORM; 2422 } 2423 2424 int index = _mesa_lookup_parameter_index(params, name); 2425 if (index < 0) { 2426 index = _mesa_add_parameter(params, file, name, size, type->gl_type, 2427 NULL, NULL); 2428 2429 /* Sampler uniform values are stored in prog->SamplerUnits, 2430 * and the entry in that array is selected by this index we 2431 * store in ParameterValues[]. 2432 */ 2433 if (file == PROGRAM_SAMPLER) { 2434 unsigned location; 2435 const bool found = 2436 this->shader_program->UniformHash->get(location, 2437 params->Parameters[index].Name); 2438 assert(found); 2439 2440 if (!found) 2441 return; 2442 2443 struct gl_uniform_storage *storage = 2444 &this->shader_program->data->UniformStorage[location]; 2445 2446 assert(storage->type->is_sampler() && 2447 storage->opaque[shader_type].active); 2448 2449 for (unsigned int j = 0; j < size / 4; j++) 2450 params->ParameterValues[index + j][0].f = 2451 storage->opaque[shader_type].index + j; 2452 } 2453 } 2454 2455 /* The first part of the uniform that's processed determines the base 2456 * location of the whole uniform (for structures). 2457 */ 2458 if (this->idx < 0) 2459 this->idx = index; 2460} 2461 2462/** 2463 * Generate the program parameters list for the user uniforms in a shader 2464 * 2465 * \param shader_program Linked shader program. This is only used to 2466 * emit possible link errors to the info log. 2467 * \param sh Shader whose uniforms are to be processed. 2468 * \param params Parameter list to be filled in. 2469 */ 2470void 2471_mesa_generate_parameters_list_for_uniforms(struct gl_shader_program 2472 *shader_program, 2473 struct gl_linked_shader *sh, 2474 struct gl_program_parameter_list 2475 *params) 2476{ 2477 add_uniform_to_shader add(shader_program, params, sh->Stage); 2478 2479 foreach_in_list(ir_instruction, node, sh->ir) { 2480 ir_variable *var = node->as_variable(); 2481 2482 if ((var == NULL) || (var->data.mode != ir_var_uniform) 2483 || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0)) 2484 continue; 2485 2486 add.process(var); 2487 } 2488} 2489 2490void 2491_mesa_associate_uniform_storage(struct gl_context *ctx, 2492 struct gl_shader_program *shader_program, 2493 struct gl_program_parameter_list *params) 2494{ 2495 /* After adding each uniform to the parameter list, connect the storage for 2496 * the parameter with the tracking structure used by the API for the 2497 * uniform. 2498 */ 2499 unsigned last_location = unsigned(~0); 2500 for (unsigned i = 0; i < params->NumParameters; i++) { 2501 if (params->Parameters[i].Type != PROGRAM_UNIFORM) 2502 continue; 2503 2504 unsigned location; 2505 const bool found = 2506 shader_program->UniformHash->get(location, params->Parameters[i].Name); 2507 assert(found); 2508 2509 if (!found) 2510 continue; 2511 2512 struct gl_uniform_storage *storage = 2513 &shader_program->data->UniformStorage[location]; 2514 2515 /* Do not associate any uniform storage to built-in uniforms */ 2516 if (storage->builtin) 2517 continue; 2518 2519 if (location != last_location) { 2520 enum gl_uniform_driver_format format = uniform_native; 2521 2522 unsigned columns = 0; 2523 int dmul = 4 * sizeof(float); 2524 switch (storage->type->base_type) { 2525 case GLSL_TYPE_UINT: 2526 assert(ctx->Const.NativeIntegers); 2527 format = uniform_native; 2528 columns = 1; 2529 break; 2530 case GLSL_TYPE_INT: 2531 format = 2532 (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float; 2533 columns = 1; 2534 break; 2535 2536 case GLSL_TYPE_DOUBLE: 2537 if (storage->type->vector_elements > 2) 2538 dmul *= 2; 2539 /* fallthrough */ 2540 case GLSL_TYPE_FLOAT: 2541 format = uniform_native; 2542 columns = storage->type->matrix_columns; 2543 break; 2544 case GLSL_TYPE_BOOL: 2545 format = uniform_native; 2546 columns = 1; 2547 break; 2548 case GLSL_TYPE_SAMPLER: 2549 case GLSL_TYPE_IMAGE: 2550 case GLSL_TYPE_SUBROUTINE: 2551 format = uniform_native; 2552 columns = 1; 2553 break; 2554 case GLSL_TYPE_ATOMIC_UINT: 2555 case GLSL_TYPE_ARRAY: 2556 case GLSL_TYPE_VOID: 2557 case GLSL_TYPE_STRUCT: 2558 case GLSL_TYPE_ERROR: 2559 case GLSL_TYPE_INTERFACE: 2560 case GLSL_TYPE_FUNCTION: 2561 assert(!"Should not get here."); 2562 break; 2563 } 2564 2565 _mesa_uniform_attach_driver_storage(storage, 2566 dmul * columns, 2567 dmul, 2568 format, 2569 ¶ms->ParameterValues[i]); 2570 2571 /* After attaching the driver's storage to the uniform, propagate any 2572 * data from the linker's backing store. This will cause values from 2573 * initializers in the source code to be copied over. 2574 */ 2575 _mesa_propagate_uniforms_to_driver_storage(storage, 2576 0, 2577 MAX2(1, storage->array_elements)); 2578 2579 last_location = location; 2580 } 2581 } 2582} 2583 2584/* 2585 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 2586 * channels for copy propagation and updates following instructions to 2587 * use the original versions. 2588 * 2589 * The ir_to_mesa_visitor lazily produces code assuming that this pass 2590 * will occur. As an example, a TXP production before this pass: 2591 * 2592 * 0: MOV TEMP[1], INPUT[4].xyyy; 2593 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2594 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 2595 * 2596 * and after: 2597 * 2598 * 0: MOV TEMP[1], INPUT[4].xyyy; 2599 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2600 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 2601 * 2602 * which allows for dead code elimination on TEMP[1]'s writes. 2603 */ 2604void 2605ir_to_mesa_visitor::copy_propagate(void) 2606{ 2607 ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx, 2608 ir_to_mesa_instruction *, 2609 this->next_temp * 4); 2610 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 2611 int level = 0; 2612 2613 foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) { 2614 assert(inst->dst.file != PROGRAM_TEMPORARY 2615 || inst->dst.index < this->next_temp); 2616 2617 /* First, do any copy propagation possible into the src regs. */ 2618 for (int r = 0; r < 3; r++) { 2619 ir_to_mesa_instruction *first = NULL; 2620 bool good = true; 2621 int acp_base = inst->src[r].index * 4; 2622 2623 if (inst->src[r].file != PROGRAM_TEMPORARY || 2624 inst->src[r].reladdr) 2625 continue; 2626 2627 /* See if we can find entries in the ACP consisting of MOVs 2628 * from the same src register for all the swizzled channels 2629 * of this src register reference. 2630 */ 2631 for (int i = 0; i < 4; i++) { 2632 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2633 ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan]; 2634 2635 if (!copy_chan) { 2636 good = false; 2637 break; 2638 } 2639 2640 assert(acp_level[acp_base + src_chan] <= level); 2641 2642 if (!first) { 2643 first = copy_chan; 2644 } else { 2645 if (first->src[0].file != copy_chan->src[0].file || 2646 first->src[0].index != copy_chan->src[0].index) { 2647 good = false; 2648 break; 2649 } 2650 } 2651 } 2652 2653 if (good) { 2654 /* We've now validated that we can copy-propagate to 2655 * replace this src register reference. Do it. 2656 */ 2657 inst->src[r].file = first->src[0].file; 2658 inst->src[r].index = first->src[0].index; 2659 2660 int swizzle = 0; 2661 for (int i = 0; i < 4; i++) { 2662 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2663 ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan]; 2664 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 2665 (3 * i)); 2666 } 2667 inst->src[r].swizzle = swizzle; 2668 } 2669 } 2670 2671 switch (inst->op) { 2672 case OPCODE_BGNLOOP: 2673 case OPCODE_ENDLOOP: 2674 /* End of a basic block, clear the ACP entirely. */ 2675 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2676 break; 2677 2678 case OPCODE_IF: 2679 ++level; 2680 break; 2681 2682 case OPCODE_ENDIF: 2683 case OPCODE_ELSE: 2684 /* Clear all channels written inside the block from the ACP, but 2685 * leaving those that were not touched. 2686 */ 2687 for (int r = 0; r < this->next_temp; r++) { 2688 for (int c = 0; c < 4; c++) { 2689 if (!acp[4 * r + c]) 2690 continue; 2691 2692 if (acp_level[4 * r + c] >= level) 2693 acp[4 * r + c] = NULL; 2694 } 2695 } 2696 if (inst->op == OPCODE_ENDIF) 2697 --level; 2698 break; 2699 2700 default: 2701 /* Continuing the block, clear any written channels from 2702 * the ACP. 2703 */ 2704 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 2705 /* Any temporary might be written, so no copy propagation 2706 * across this instruction. 2707 */ 2708 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2709 } else if (inst->dst.file == PROGRAM_OUTPUT && 2710 inst->dst.reladdr) { 2711 /* Any output might be written, so no copy propagation 2712 * from outputs across this instruction. 2713 */ 2714 for (int r = 0; r < this->next_temp; r++) { 2715 for (int c = 0; c < 4; c++) { 2716 if (!acp[4 * r + c]) 2717 continue; 2718 2719 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 2720 acp[4 * r + c] = NULL; 2721 } 2722 } 2723 } else if (inst->dst.file == PROGRAM_TEMPORARY || 2724 inst->dst.file == PROGRAM_OUTPUT) { 2725 /* Clear where it's used as dst. */ 2726 if (inst->dst.file == PROGRAM_TEMPORARY) { 2727 for (int c = 0; c < 4; c++) { 2728 if (inst->dst.writemask & (1 << c)) { 2729 acp[4 * inst->dst.index + c] = NULL; 2730 } 2731 } 2732 } 2733 2734 /* Clear where it's used as src. */ 2735 for (int r = 0; r < this->next_temp; r++) { 2736 for (int c = 0; c < 4; c++) { 2737 if (!acp[4 * r + c]) 2738 continue; 2739 2740 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 2741 2742 if (acp[4 * r + c]->src[0].file == inst->dst.file && 2743 acp[4 * r + c]->src[0].index == inst->dst.index && 2744 inst->dst.writemask & (1 << src_chan)) 2745 { 2746 acp[4 * r + c] = NULL; 2747 } 2748 } 2749 } 2750 } 2751 break; 2752 } 2753 2754 /* If this is a copy, add it to the ACP. */ 2755 if (inst->op == OPCODE_MOV && 2756 inst->dst.file == PROGRAM_TEMPORARY && 2757 !(inst->dst.file == inst->src[0].file && 2758 inst->dst.index == inst->src[0].index) && 2759 !inst->dst.reladdr && 2760 !inst->saturate && 2761 !inst->src[0].reladdr && 2762 !inst->src[0].negate) { 2763 for (int i = 0; i < 4; i++) { 2764 if (inst->dst.writemask & (1 << i)) { 2765 acp[4 * inst->dst.index + i] = inst; 2766 acp_level[4 * inst->dst.index + i] = level; 2767 } 2768 } 2769 } 2770 } 2771 2772 ralloc_free(acp_level); 2773 ralloc_free(acp); 2774} 2775 2776 2777/** 2778 * Convert a shader's GLSL IR into a Mesa gl_program. 2779 */ 2780static struct gl_program * 2781get_mesa_program(struct gl_context *ctx, 2782 struct gl_shader_program *shader_program, 2783 struct gl_linked_shader *shader) 2784{ 2785 ir_to_mesa_visitor v; 2786 struct prog_instruction *mesa_instructions, *mesa_inst; 2787 ir_instruction **mesa_instruction_annotation; 2788 int i; 2789 struct gl_program *prog; 2790 GLenum target = _mesa_shader_stage_to_program(shader->Stage); 2791 const char *target_string = _mesa_shader_stage_to_string(shader->Stage); 2792 struct gl_shader_compiler_options *options = 2793 &ctx->Const.ShaderCompilerOptions[shader->Stage]; 2794 2795 validate_ir_tree(shader->ir); 2796 2797 prog = shader->Program; 2798 prog->Parameters = _mesa_new_parameter_list(); 2799 v.ctx = ctx; 2800 v.prog = prog; 2801 v.shader_program = shader_program; 2802 v.options = options; 2803 2804 _mesa_generate_parameters_list_for_uniforms(shader_program, shader, 2805 prog->Parameters); 2806 2807 /* Emit Mesa IR for main(). */ 2808 visit_exec_list(shader->ir, &v); 2809 v.emit(NULL, OPCODE_END); 2810 2811 prog->arb.NumTemporaries = v.next_temp; 2812 2813 unsigned num_instructions = v.instructions.length(); 2814 2815 mesa_instructions = rzalloc_array(prog, struct prog_instruction, 2816 num_instructions); 2817 mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *, 2818 num_instructions); 2819 2820 v.copy_propagate(); 2821 2822 /* Convert ir_mesa_instructions into prog_instructions. 2823 */ 2824 mesa_inst = mesa_instructions; 2825 i = 0; 2826 foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) { 2827 mesa_inst->Opcode = inst->op; 2828 if (inst->saturate) 2829 mesa_inst->Saturate = GL_TRUE; 2830 mesa_inst->DstReg.File = inst->dst.file; 2831 mesa_inst->DstReg.Index = inst->dst.index; 2832 mesa_inst->DstReg.WriteMask = inst->dst.writemask; 2833 mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; 2834 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]); 2835 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]); 2836 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]); 2837 mesa_inst->TexSrcUnit = inst->sampler; 2838 mesa_inst->TexSrcTarget = inst->tex_target; 2839 mesa_inst->TexShadow = inst->tex_shadow; 2840 mesa_instruction_annotation[i] = inst->ir; 2841 2842 /* Set IndirectRegisterFiles. */ 2843 if (mesa_inst->DstReg.RelAddr) 2844 prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; 2845 2846 /* Update program's bitmask of indirectly accessed register files */ 2847 for (unsigned src = 0; src < 3; src++) 2848 if (mesa_inst->SrcReg[src].RelAddr) 2849 prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; 2850 2851 switch (mesa_inst->Opcode) { 2852 case OPCODE_IF: 2853 if (options->MaxIfDepth == 0) { 2854 linker_warning(shader_program, 2855 "Couldn't flatten if-statement. " 2856 "This will likely result in software " 2857 "rasterization.\n"); 2858 } 2859 break; 2860 case OPCODE_BGNLOOP: 2861 if (options->EmitNoLoops) { 2862 linker_warning(shader_program, 2863 "Couldn't unroll loop. " 2864 "This will likely result in software " 2865 "rasterization.\n"); 2866 } 2867 break; 2868 case OPCODE_CONT: 2869 if (options->EmitNoCont) { 2870 linker_warning(shader_program, 2871 "Couldn't lower continue-statement. " 2872 "This will likely result in software " 2873 "rasterization.\n"); 2874 } 2875 break; 2876 case OPCODE_ARL: 2877 prog->arb.NumAddressRegs = 1; 2878 break; 2879 default: 2880 break; 2881 } 2882 2883 mesa_inst++; 2884 i++; 2885 2886 if (!shader_program->data->LinkStatus) 2887 break; 2888 } 2889 2890 if (!shader_program->data->LinkStatus) { 2891 goto fail_exit; 2892 } 2893 2894 set_branchtargets(&v, mesa_instructions, num_instructions); 2895 2896 if (ctx->_Shader->Flags & GLSL_DUMP) { 2897 fprintf(stderr, "\n"); 2898 fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string, 2899 shader_program->Name); 2900 _mesa_print_ir(stderr, shader->ir, NULL); 2901 fprintf(stderr, "\n"); 2902 fprintf(stderr, "\n"); 2903 fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string, 2904 shader_program->Name); 2905 print_program(mesa_instructions, mesa_instruction_annotation, 2906 num_instructions); 2907 fflush(stderr); 2908 } 2909 2910 prog->arb.Instructions = mesa_instructions; 2911 prog->arb.NumInstructions = num_instructions; 2912 2913 /* Setting this to NULL prevents a possible double free in the fail_exit 2914 * path (far below). 2915 */ 2916 mesa_instructions = NULL; 2917 2918 do_set_program_inouts(shader->ir, prog, shader->Stage); 2919 2920 prog->ShadowSamplers = shader->shadow_samplers; 2921 prog->ExternalSamplersUsed = gl_external_samplers(prog); 2922 _mesa_update_shader_textures_used(shader_program, prog); 2923 2924 /* Set the gl_FragDepth layout. */ 2925 if (target == GL_FRAGMENT_PROGRAM_ARB) { 2926 prog->info.fs.depth_layout = shader_program->FragDepthLayout; 2927 } 2928 2929 if ((ctx->_Shader->Flags & GLSL_NO_OPT) == 0) { 2930 _mesa_optimize_program(ctx, prog, prog); 2931 } 2932 2933 /* This has to be done last. Any operation that can cause 2934 * prog->ParameterValues to get reallocated (e.g., anything that adds a 2935 * program constant) has to happen before creating this linkage. 2936 */ 2937 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); 2938 if (!shader_program->data->LinkStatus) { 2939 goto fail_exit; 2940 } 2941 2942 return prog; 2943 2944fail_exit: 2945 ralloc_free(mesa_instructions); 2946 _mesa_reference_program(ctx, &shader->Program, NULL); 2947 return NULL; 2948} 2949 2950extern "C" { 2951 2952/** 2953 * Link a shader. 2954 * Called via ctx->Driver.LinkShader() 2955 * This actually involves converting GLSL IR into Mesa gl_programs with 2956 * code lowering and other optimizations. 2957 */ 2958GLboolean 2959_mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 2960{ 2961 assert(prog->data->LinkStatus); 2962 2963 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 2964 if (prog->_LinkedShaders[i] == NULL) 2965 continue; 2966 2967 bool progress; 2968 exec_list *ir = prog->_LinkedShaders[i]->ir; 2969 const struct gl_shader_compiler_options *options = 2970 &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage]; 2971 2972 do { 2973 progress = false; 2974 2975 /* Lowering */ 2976 do_mat_op_to_vec(ir); 2977 lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 2978 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP 2979 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 2980 2981 progress = do_common_optimization(ir, true, true, 2982 options, ctx->Const.NativeIntegers) 2983 || progress; 2984 2985 progress = lower_quadop_vector(ir, true) || progress; 2986 2987 if (options->MaxIfDepth == 0) 2988 progress = lower_discard(ir) || progress; 2989 2990 progress = lower_if_to_cond_assign((gl_shader_stage)i, ir, 2991 options->MaxIfDepth) || progress; 2992 2993 progress = lower_noise(ir) || progress; 2994 2995 /* If there are forms of indirect addressing that the driver 2996 * cannot handle, perform the lowering pass. 2997 */ 2998 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 2999 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 3000 progress = 3001 lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir, 3002 options->EmitNoIndirectInput, 3003 options->EmitNoIndirectOutput, 3004 options->EmitNoIndirectTemp, 3005 options->EmitNoIndirectUniform) 3006 || progress; 3007 3008 progress = do_vec_index_to_cond_assign(ir) || progress; 3009 progress = lower_vector_insert(ir, true) || progress; 3010 } while (progress); 3011 3012 validate_ir_tree(ir); 3013 } 3014 3015 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 3016 struct gl_program *linked_prog; 3017 3018 if (prog->_LinkedShaders[i] == NULL) 3019 continue; 3020 3021 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 3022 3023 if (linked_prog) { 3024 _mesa_copy_linked_program_data(prog, prog->_LinkedShaders[i]); 3025 3026 if (!ctx->Driver.ProgramStringNotify(ctx, 3027 _mesa_shader_stage_to_program(i), 3028 linked_prog)) { 3029 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 3030 NULL); 3031 return GL_FALSE; 3032 } 3033 } 3034 } 3035 3036 build_program_resource_list(ctx, prog); 3037 return prog->data->LinkStatus; 3038} 3039 3040/** 3041 * Link a GLSL shader program. Called via glLinkProgram(). 3042 */ 3043void 3044_mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 3045{ 3046 unsigned int i; 3047 3048 _mesa_clear_shader_program_data(ctx, prog); 3049 3050 prog->data->LinkStatus = GL_TRUE; 3051 3052 for (i = 0; i < prog->NumShaders; i++) { 3053 if (!prog->Shaders[i]->CompileStatus) { 3054 linker_error(prog, "linking with uncompiled shader"); 3055 } 3056 } 3057 3058 if (prog->data->LinkStatus) { 3059 link_shaders(ctx, prog); 3060 } 3061 3062 if (prog->data->LinkStatus) { 3063 if (!ctx->Driver.LinkShader(ctx, prog)) { 3064 prog->data->LinkStatus = GL_FALSE; 3065 } 3066 } 3067 3068 if (ctx->_Shader->Flags & GLSL_DUMP) { 3069 if (!prog->data->LinkStatus) { 3070 fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name); 3071 } 3072 3073 if (prog->data->InfoLog && prog->data->InfoLog[0] != 0) { 3074 fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name); 3075 fprintf(stderr, "%s\n", prog->data->InfoLog); 3076 } 3077 } 3078} 3079 3080} /* extern "C" */ 3081