st_glsl_to_tgsi.cpp revision 71cbc9e3c4c9ef6090ee31e87601ae64af26321e
1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include <stdio.h> 34#include "main/compiler.h" 35#include "ir.h" 36#include "ir_visitor.h" 37#include "ir_print_visitor.h" 38#include "ir_expression_flattening.h" 39#include "glsl_types.h" 40#include "glsl_parser_extras.h" 41#include "../glsl/program.h" 42#include "ir_optimization.h" 43#include "ast.h" 44 45extern "C" { 46#include "main/mtypes.h" 47#include "main/shaderapi.h" 48#include "main/shaderobj.h" 49#include "main/uniforms.h" 50#include "program/hash_table.h" 51#include "program/prog_instruction.h" 52#include "program/prog_optimize.h" 53#include "program/prog_print.h" 54#include "program/program.h" 55#include "program/prog_uniform.h" 56#include "program/prog_parameter.h" 57#include "program/sampler.h" 58 59#include "pipe/p_compiler.h" 60#include "pipe/p_context.h" 61#include "pipe/p_screen.h" 62#include "pipe/p_shader_tokens.h" 63#include "pipe/p_state.h" 64#include "util/u_math.h" 65#include "tgsi/tgsi_ureg.h" 66#include "tgsi/tgsi_info.h" 67#include "st_context.h" 68#include "st_program.h" 69#include "st_glsl_to_tgsi.h" 70#include "st_mesa_to_tgsi.h" 71} 72 73#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 74 (1 << PROGRAM_ENV_PARAM) | \ 75 (1 << PROGRAM_STATE_VAR) | \ 76 (1 << PROGRAM_NAMED_PARAM) | \ 77 (1 << PROGRAM_CONSTANT) | \ 78 (1 << PROGRAM_UNIFORM)) 79 80class st_src_reg; 81class st_dst_reg; 82 83static int swizzle_for_size(int size); 84 85/** 86 * This struct is a corresponding struct to TGSI ureg_src. 87 */ 88class st_src_reg { 89public: 90 st_src_reg(gl_register_file file, int index, const glsl_type *type) 91 { 92 this->file = file; 93 this->index = index; 94 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 95 this->swizzle = swizzle_for_size(type->vector_elements); 96 else 97 this->swizzle = SWIZZLE_XYZW; 98 this->negate = 0; 99 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 100 this->reladdr = NULL; 101 } 102 103 st_src_reg(gl_register_file file, int index, int type) 104 { 105 this->type = type; 106 this->file = file; 107 this->index = index; 108 this->swizzle = SWIZZLE_XYZW; 109 this->negate = 0; 110 this->reladdr = NULL; 111 } 112 113 st_src_reg() 114 { 115 this->type = GLSL_TYPE_ERROR; 116 this->file = PROGRAM_UNDEFINED; 117 this->index = 0; 118 this->swizzle = 0; 119 this->negate = 0; 120 this->reladdr = NULL; 121 } 122 123 explicit st_src_reg(st_dst_reg reg); 124 125 gl_register_file file; /**< PROGRAM_* from Mesa */ 126 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 127 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 128 int negate; /**< NEGATE_XYZW mask from mesa */ 129 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 130 /** Register index should be offset by the integer in this reg. */ 131 st_src_reg *reladdr; 132}; 133 134class st_dst_reg { 135public: 136 st_dst_reg(gl_register_file file, int writemask, int type) 137 { 138 this->file = file; 139 this->index = 0; 140 this->writemask = writemask; 141 this->cond_mask = COND_TR; 142 this->reladdr = NULL; 143 this->type = type; 144 } 145 146 st_dst_reg() 147 { 148 this->type = GLSL_TYPE_ERROR; 149 this->file = PROGRAM_UNDEFINED; 150 this->index = 0; 151 this->writemask = 0; 152 this->cond_mask = COND_TR; 153 this->reladdr = NULL; 154 } 155 156 explicit st_dst_reg(st_src_reg reg); 157 158 gl_register_file file; /**< PROGRAM_* from Mesa */ 159 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 160 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 161 GLuint cond_mask:4; 162 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 163 /** Register index should be offset by the integer in this reg. */ 164 st_src_reg *reladdr; 165}; 166 167st_src_reg::st_src_reg(st_dst_reg reg) 168{ 169 this->type = reg.type; 170 this->file = reg.file; 171 this->index = reg.index; 172 this->swizzle = SWIZZLE_XYZW; 173 this->negate = 0; 174 this->reladdr = NULL; 175} 176 177st_dst_reg::st_dst_reg(st_src_reg reg) 178{ 179 this->type = reg.type; 180 this->file = reg.file; 181 this->index = reg.index; 182 this->writemask = WRITEMASK_XYZW; 183 this->cond_mask = COND_TR; 184 this->reladdr = reg.reladdr; 185} 186 187class glsl_to_tgsi_instruction : public exec_node { 188public: 189 /* Callers of this ralloc-based new need not call delete. It's 190 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 191 static void* operator new(size_t size, void *ctx) 192 { 193 void *node; 194 195 node = rzalloc_size(ctx, size); 196 assert(node != NULL); 197 198 return node; 199 } 200 201 unsigned op; 202 st_dst_reg dst; 203 st_src_reg src[3]; 204 /** Pointer to the ir source this tree came from for debugging */ 205 ir_instruction *ir; 206 GLboolean cond_update; 207 bool saturate; 208 int sampler; /**< sampler index */ 209 int tex_target; /**< One of TEXTURE_*_INDEX */ 210 GLboolean tex_shadow; 211 int dead_mask; /**< Used in dead code elimination */ 212 213 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 214}; 215 216class variable_storage : public exec_node { 217public: 218 variable_storage(ir_variable *var, gl_register_file file, int index) 219 : file(file), index(index), var(var) 220 { 221 /* empty */ 222 } 223 224 gl_register_file file; 225 int index; 226 ir_variable *var; /* variable that maps to this, if any */ 227}; 228 229class function_entry : public exec_node { 230public: 231 ir_function_signature *sig; 232 233 /** 234 * identifier of this function signature used by the program. 235 * 236 * At the point that Mesa instructions for function calls are 237 * generated, we don't know the address of the first instruction of 238 * the function body. So we make the BranchTarget that is called a 239 * small integer and rewrite them during set_branchtargets(). 240 */ 241 int sig_id; 242 243 /** 244 * Pointer to first instruction of the function body. 245 * 246 * Set during function body emits after main() is processed. 247 */ 248 glsl_to_tgsi_instruction *bgn_inst; 249 250 /** 251 * Index of the first instruction of the function body in actual 252 * Mesa IR. 253 * 254 * Set after convertion from glsl_to_tgsi_instruction to prog_instruction. 255 */ 256 int inst; 257 258 /** Storage for the return value. */ 259 st_src_reg return_reg; 260}; 261 262class glsl_to_tgsi_visitor : public ir_visitor { 263public: 264 glsl_to_tgsi_visitor(); 265 ~glsl_to_tgsi_visitor(); 266 267 function_entry *current_function; 268 269 struct gl_context *ctx; 270 struct gl_program *prog; 271 struct gl_shader_program *shader_program; 272 struct gl_shader_compiler_options *options; 273 274 int next_temp; 275 276 int num_address_regs; 277 int samplers_used; 278 bool indirect_addr_temps; 279 bool indirect_addr_consts; 280 281 int glsl_version; 282 283 variable_storage *find_variable_storage(ir_variable *var); 284 285 function_entry *get_function_signature(ir_function_signature *sig); 286 287 st_src_reg get_temp(const glsl_type *type); 288 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 289 290 st_src_reg st_src_reg_for_float(float val); 291 st_src_reg st_src_reg_for_int(int val); 292 st_src_reg st_src_reg_for_type(int type, int val); 293 294 /** 295 * \name Visit methods 296 * 297 * As typical for the visitor pattern, there must be one \c visit method for 298 * each concrete subclass of \c ir_instruction. Virtual base classes within 299 * the hierarchy should not have \c visit methods. 300 */ 301 /*@{*/ 302 virtual void visit(ir_variable *); 303 virtual void visit(ir_loop *); 304 virtual void visit(ir_loop_jump *); 305 virtual void visit(ir_function_signature *); 306 virtual void visit(ir_function *); 307 virtual void visit(ir_expression *); 308 virtual void visit(ir_swizzle *); 309 virtual void visit(ir_dereference_variable *); 310 virtual void visit(ir_dereference_array *); 311 virtual void visit(ir_dereference_record *); 312 virtual void visit(ir_assignment *); 313 virtual void visit(ir_constant *); 314 virtual void visit(ir_call *); 315 virtual void visit(ir_return *); 316 virtual void visit(ir_discard *); 317 virtual void visit(ir_texture *); 318 virtual void visit(ir_if *); 319 /*@}*/ 320 321 st_src_reg result; 322 323 /** List of variable_storage */ 324 exec_list variables; 325 326 /** List of function_entry */ 327 exec_list function_signatures; 328 int next_signature_id; 329 330 /** List of glsl_to_tgsi_instruction */ 331 exec_list instructions; 332 333 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 334 335 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 336 st_dst_reg dst, st_src_reg src0); 337 338 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 339 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 340 341 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 342 st_dst_reg dst, 343 st_src_reg src0, st_src_reg src1, st_src_reg src2); 344 345 unsigned get_opcode(ir_instruction *ir, unsigned op, 346 st_dst_reg dst, 347 st_src_reg src0, st_src_reg src1); 348 349 /** 350 * Emit the correct dot-product instruction for the type of arguments 351 */ 352 void emit_dp(ir_instruction *ir, 353 st_dst_reg dst, 354 st_src_reg src0, 355 st_src_reg src1, 356 unsigned elements); 357 358 void emit_scalar(ir_instruction *ir, unsigned op, 359 st_dst_reg dst, st_src_reg src0); 360 361 void emit_scalar(ir_instruction *ir, unsigned op, 362 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 363 364 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 365 366 void emit_scs(ir_instruction *ir, unsigned op, 367 st_dst_reg dst, const st_src_reg &src); 368 369 GLboolean try_emit_mad(ir_expression *ir, 370 int mul_operand); 371 GLboolean try_emit_sat(ir_expression *ir); 372 373 void emit_swz(ir_expression *ir); 374 375 bool process_move_condition(ir_rvalue *ir); 376 377 void remove_output_reads(gl_register_file type); 378 void simplify_cmp(void); 379 380 void rename_temp_register(int index, int new_index); 381 int get_first_temp_read(int index); 382 int get_first_temp_write(int index); 383 int get_last_temp_read(int index); 384 int get_last_temp_write(int index); 385 386 void copy_propagate(void); 387 void eliminate_dead_code(void); 388 int eliminate_dead_code_advanced(void); 389 void merge_registers(void); 390 void renumber_registers(void); 391 392 void *mem_ctx; 393}; 394 395static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 396 397static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 398 399static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 400 401static void 402fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 403 404static void 405fail_link(struct gl_shader_program *prog, const char *fmt, ...) 406{ 407 va_list args; 408 va_start(args, fmt); 409 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 410 va_end(args); 411 412 prog->LinkStatus = GL_FALSE; 413} 414 415static int 416swizzle_for_size(int size) 417{ 418 int size_swizzles[4] = { 419 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 420 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 421 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 422 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 423 }; 424 425 assert((size >= 1) && (size <= 4)); 426 return size_swizzles[size - 1]; 427} 428 429static bool 430is_tex_instruction(unsigned opcode) 431{ 432 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 433 return info->is_tex; 434} 435 436static unsigned 437num_inst_dst_regs(unsigned opcode) 438{ 439 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 440 return info->num_dst; 441} 442 443static unsigned 444num_inst_src_regs(unsigned opcode) 445{ 446 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 447 return info->is_tex ? info->num_src - 1 : info->num_src; 448} 449 450glsl_to_tgsi_instruction * 451glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 452 st_dst_reg dst, 453 st_src_reg src0, st_src_reg src1, st_src_reg src2) 454{ 455 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 456 int num_reladdr = 0, i; 457 458 op = get_opcode(ir, op, dst, src0, src1); 459 460 /* If we have to do relative addressing, we want to load the ARL 461 * reg directly for one of the regs, and preload the other reladdr 462 * sources into temps. 463 */ 464 num_reladdr += dst.reladdr != NULL; 465 num_reladdr += src0.reladdr != NULL; 466 num_reladdr += src1.reladdr != NULL; 467 num_reladdr += src2.reladdr != NULL; 468 469 reladdr_to_temp(ir, &src2, &num_reladdr); 470 reladdr_to_temp(ir, &src1, &num_reladdr); 471 reladdr_to_temp(ir, &src0, &num_reladdr); 472 473 if (dst.reladdr) { 474 emit_arl(ir, address_reg, *dst.reladdr); 475 num_reladdr--; 476 } 477 assert(num_reladdr == 0); 478 479 inst->op = op; 480 inst->dst = dst; 481 inst->src[0] = src0; 482 inst->src[1] = src1; 483 inst->src[2] = src2; 484 inst->ir = ir; 485 inst->dead_mask = 0; 486 487 inst->function = NULL; 488 489 if (op == TGSI_OPCODE_ARL) 490 this->num_address_regs = 1; 491 492 /* Update indirect addressing status used by TGSI */ 493 if (dst.reladdr) { 494 switch(dst.file) { 495 case PROGRAM_TEMPORARY: 496 this->indirect_addr_temps = true; 497 break; 498 case PROGRAM_LOCAL_PARAM: 499 case PROGRAM_ENV_PARAM: 500 case PROGRAM_STATE_VAR: 501 case PROGRAM_NAMED_PARAM: 502 case PROGRAM_CONSTANT: 503 case PROGRAM_UNIFORM: 504 this->indirect_addr_consts = true; 505 break; 506 default: 507 break; 508 } 509 } 510 else { 511 for (i=0; i<3; i++) { 512 if(inst->src[i].reladdr) { 513 switch(inst->src[i].file) { 514 case PROGRAM_TEMPORARY: 515 this->indirect_addr_temps = true; 516 break; 517 case PROGRAM_LOCAL_PARAM: 518 case PROGRAM_ENV_PARAM: 519 case PROGRAM_STATE_VAR: 520 case PROGRAM_NAMED_PARAM: 521 case PROGRAM_CONSTANT: 522 case PROGRAM_UNIFORM: 523 this->indirect_addr_consts = true; 524 break; 525 default: 526 break; 527 } 528 } 529 } 530 } 531 532 this->instructions.push_tail(inst); 533 534 return inst; 535} 536 537 538glsl_to_tgsi_instruction * 539glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 540 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 541{ 542 return emit(ir, op, dst, src0, src1, undef_src); 543} 544 545glsl_to_tgsi_instruction * 546glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 547 st_dst_reg dst, st_src_reg src0) 548{ 549 assert(dst.writemask != 0); 550 return emit(ir, op, dst, src0, undef_src, undef_src); 551} 552 553glsl_to_tgsi_instruction * 554glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 555{ 556 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 557} 558 559/** 560 * Determines whether to use an integer, unsigned integer, or float opcode 561 * based on the operands and input opcode, then emits the result. 562 * 563 * TODO: type checking for remaining TGSI opcodes 564 */ 565unsigned 566glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 567 st_dst_reg dst, 568 st_src_reg src0, st_src_reg src1) 569{ 570 int type = GLSL_TYPE_FLOAT; 571 572 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 573 type = GLSL_TYPE_FLOAT; 574 else if (glsl_version >= 130) 575 type = src0.type; 576 577#define case4(c, f, i, u) \ 578 case TGSI_OPCODE_##c: \ 579 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 580 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 581 else op = TGSI_OPCODE_##f; \ 582 break; 583#define case3(f, i, u) case4(f, f, i, u) 584#define case2fi(f, i) case4(f, f, i, i) 585#define case2iu(i, u) case4(i, LAST, i, u) 586 587 switch(op) { 588 case2fi(ADD, UADD); 589 case2fi(MUL, UMUL); 590 case2fi(MAD, UMAD); 591 case3(DIV, IDIV, UDIV); 592 case3(MAX, IMAX, UMAX); 593 case3(MIN, IMIN, UMIN); 594 case2iu(MOD, UMOD); 595 596 case2fi(SEQ, USEQ); 597 case2fi(SNE, USNE); 598 case3(SGE, ISGE, USGE); 599 case3(SLT, ISLT, USLT); 600 601 case2iu(SHL, SHL); 602 case2iu(ISHR, USHR); 603 case2iu(NOT, NOT); 604 case2iu(AND, AND); 605 case2iu(OR, OR); 606 case2iu(XOR, XOR); 607 608 default: break; 609 } 610 611 assert(op != TGSI_OPCODE_LAST); 612 return op; 613} 614 615void 616glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 617 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 618 unsigned elements) 619{ 620 static const unsigned dot_opcodes[] = { 621 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 622 }; 623 624 emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 625} 626 627/** 628 * Emits TGSI scalar opcodes to produce unique answers across channels. 629 * 630 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 631 * channel determines the result across all channels. So to do a vec4 632 * of this operation, we want to emit a scalar per source channel used 633 * to produce dest channels. 634 */ 635void 636glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 637 st_dst_reg dst, 638 st_src_reg orig_src0, st_src_reg orig_src1) 639{ 640 int i, j; 641 int done_mask = ~dst.writemask; 642 643 /* TGSI RCP is a scalar operation splatting results to all channels, 644 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 645 * dst channels. 646 */ 647 for (i = 0; i < 4; i++) { 648 GLuint this_mask = (1 << i); 649 glsl_to_tgsi_instruction *inst; 650 st_src_reg src0 = orig_src0; 651 st_src_reg src1 = orig_src1; 652 653 if (done_mask & this_mask) 654 continue; 655 656 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 657 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 658 for (j = i + 1; j < 4; j++) { 659 /* If there is another enabled component in the destination that is 660 * derived from the same inputs, generate its value on this pass as 661 * well. 662 */ 663 if (!(done_mask & (1 << j)) && 664 GET_SWZ(src0.swizzle, j) == src0_swiz && 665 GET_SWZ(src1.swizzle, j) == src1_swiz) { 666 this_mask |= (1 << j); 667 } 668 } 669 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 670 src0_swiz, src0_swiz); 671 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 672 src1_swiz, src1_swiz); 673 674 inst = emit(ir, op, dst, src0, src1); 675 inst->dst.writemask = this_mask; 676 done_mask |= this_mask; 677 } 678} 679 680void 681glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 682 st_dst_reg dst, st_src_reg src0) 683{ 684 st_src_reg undef = undef_src; 685 686 undef.swizzle = SWIZZLE_XXXX; 687 688 emit_scalar(ir, op, dst, src0, undef); 689} 690 691void 692glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 693 st_dst_reg dst, st_src_reg src0) 694{ 695 st_src_reg tmp = get_temp(glsl_type::float_type); 696 697 if (src0.type == GLSL_TYPE_INT) 698 emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); 699 else if (src0.type == GLSL_TYPE_UINT) 700 emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); 701 else 702 tmp = src0; 703 704 emit(ir, TGSI_OPCODE_ARL, dst, tmp); 705} 706 707/** 708 * Emit an TGSI_OPCODE_SCS instruction 709 * 710 * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 711 * Instead of splatting its result across all four components of the 712 * destination, it writes one value to the \c x component and another value to 713 * the \c y component. 714 * 715 * \param ir IR instruction being processed 716 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 717 * on which value is desired. 718 * \param dst Destination register 719 * \param src Source register 720 */ 721void 722glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 723 st_dst_reg dst, 724 const st_src_reg &src) 725{ 726 /* Vertex programs cannot use the SCS opcode. 727 */ 728 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 729 emit_scalar(ir, op, dst, src); 730 return; 731 } 732 733 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 734 const unsigned scs_mask = (1U << component); 735 int done_mask = ~dst.writemask; 736 st_src_reg tmp; 737 738 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 739 740 /* If there are compnents in the destination that differ from the component 741 * that will be written by the SCS instrution, we'll need a temporary. 742 */ 743 if (scs_mask != unsigned(dst.writemask)) { 744 tmp = get_temp(glsl_type::vec4_type); 745 } 746 747 for (unsigned i = 0; i < 4; i++) { 748 unsigned this_mask = (1U << i); 749 st_src_reg src0 = src; 750 751 if ((done_mask & this_mask) != 0) 752 continue; 753 754 /* The source swizzle specified which component of the source generates 755 * sine / cosine for the current component in the destination. The SCS 756 * instruction requires that this value be swizzle to the X component. 757 * Replace the current swizzle with a swizzle that puts the source in 758 * the X component. 759 */ 760 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 761 762 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 763 src0_swiz, src0_swiz); 764 for (unsigned j = i + 1; j < 4; j++) { 765 /* If there is another enabled component in the destination that is 766 * derived from the same inputs, generate its value on this pass as 767 * well. 768 */ 769 if (!(done_mask & (1 << j)) && 770 GET_SWZ(src0.swizzle, j) == src0_swiz) { 771 this_mask |= (1 << j); 772 } 773 } 774 775 if (this_mask != scs_mask) { 776 glsl_to_tgsi_instruction *inst; 777 st_dst_reg tmp_dst = st_dst_reg(tmp); 778 779 /* Emit the SCS instruction. 780 */ 781 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 782 inst->dst.writemask = scs_mask; 783 784 /* Move the result of the SCS instruction to the desired location in 785 * the destination. 786 */ 787 tmp.swizzle = MAKE_SWIZZLE4(component, component, 788 component, component); 789 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 790 inst->dst.writemask = this_mask; 791 } else { 792 /* Emit the SCS instruction to write directly to the destination. 793 */ 794 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 795 inst->dst.writemask = scs_mask; 796 } 797 798 done_mask |= this_mask; 799 } 800} 801 802struct st_src_reg 803glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 804{ 805 st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT); 806 union gl_constant_value uval; 807 808 uval.f = val; 809 src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, 810 &uval, 1, GL_FLOAT, &src.swizzle); 811 812 return src; 813} 814 815struct st_src_reg 816glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 817{ 818 st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT); 819 union gl_constant_value uval; 820 821 assert(glsl_version >= 130); 822 823 uval.i = val; 824 src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, 825 &uval, 1, GL_INT, &src.swizzle); 826 827 return src; 828} 829 830struct st_src_reg 831glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 832{ 833 if (glsl_version >= 130) 834 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 835 st_src_reg_for_int(val); 836 else 837 return st_src_reg_for_float(val); 838} 839 840static int 841type_size(const struct glsl_type *type) 842{ 843 unsigned int i; 844 int size; 845 846 switch (type->base_type) { 847 case GLSL_TYPE_UINT: 848 case GLSL_TYPE_INT: 849 case GLSL_TYPE_FLOAT: 850 case GLSL_TYPE_BOOL: 851 if (type->is_matrix()) { 852 return type->matrix_columns; 853 } else { 854 /* Regardless of size of vector, it gets a vec4. This is bad 855 * packing for things like floats, but otherwise arrays become a 856 * mess. Hopefully a later pass over the code can pack scalars 857 * down if appropriate. 858 */ 859 return 1; 860 } 861 case GLSL_TYPE_ARRAY: 862 assert(type->length > 0); 863 return type_size(type->fields.array) * type->length; 864 case GLSL_TYPE_STRUCT: 865 size = 0; 866 for (i = 0; i < type->length; i++) { 867 size += type_size(type->fields.structure[i].type); 868 } 869 return size; 870 case GLSL_TYPE_SAMPLER: 871 /* Samplers take up one slot in UNIFORMS[], but they're baked in 872 * at link time. 873 */ 874 return 1; 875 default: 876 assert(0); 877 return 0; 878 } 879} 880 881/** 882 * In the initial pass of codegen, we assign temporary numbers to 883 * intermediate results. (not SSA -- variable assignments will reuse 884 * storage). 885 */ 886st_src_reg 887glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 888{ 889 st_src_reg src; 890 int swizzle[4]; 891 int i; 892 893 src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; 894 src.file = PROGRAM_TEMPORARY; 895 src.index = next_temp; 896 src.reladdr = NULL; 897 next_temp += type_size(type); 898 899 if (type->is_array() || type->is_record()) { 900 src.swizzle = SWIZZLE_NOOP; 901 } else { 902 for (i = 0; i < type->vector_elements; i++) 903 swizzle[i] = i; 904 for (; i < 4; i++) 905 swizzle[i] = type->vector_elements - 1; 906 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], 907 swizzle[2], swizzle[3]); 908 } 909 src.negate = 0; 910 911 return src; 912} 913 914variable_storage * 915glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 916{ 917 918 variable_storage *entry; 919 920 foreach_iter(exec_list_iterator, iter, this->variables) { 921 entry = (variable_storage *)iter.get(); 922 923 if (entry->var == var) 924 return entry; 925 } 926 927 return NULL; 928} 929 930void 931glsl_to_tgsi_visitor::visit(ir_variable *ir) 932{ 933 if (strcmp(ir->name, "gl_FragCoord") == 0) { 934 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 935 936 fp->OriginUpperLeft = ir->origin_upper_left; 937 fp->PixelCenterInteger = ir->pixel_center_integer; 938 939 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 940 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 941 switch (ir->depth_layout) { 942 case ir_depth_layout_none: 943 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; 944 break; 945 case ir_depth_layout_any: 946 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; 947 break; 948 case ir_depth_layout_greater: 949 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; 950 break; 951 case ir_depth_layout_less: 952 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; 953 break; 954 case ir_depth_layout_unchanged: 955 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; 956 break; 957 default: 958 assert(0); 959 break; 960 } 961 } 962 963 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 964 unsigned int i; 965 const ir_state_slot *const slots = ir->state_slots; 966 assert(ir->state_slots != NULL); 967 968 /* Check if this statevar's setup in the STATE file exactly 969 * matches how we'll want to reference it as a 970 * struct/array/whatever. If not, then we need to move it into 971 * temporary storage and hope that it'll get copy-propagated 972 * out. 973 */ 974 for (i = 0; i < ir->num_state_slots; i++) { 975 if (slots[i].swizzle != SWIZZLE_XYZW) { 976 break; 977 } 978 } 979 980 struct variable_storage *storage; 981 st_dst_reg dst; 982 if (i == ir->num_state_slots) { 983 /* We'll set the index later. */ 984 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 985 this->variables.push_tail(storage); 986 987 dst = undef_dst; 988 } else { 989 /* The variable_storage constructor allocates slots based on the size 990 * of the type. However, this had better match the number of state 991 * elements that we're going to copy into the new temporary. 992 */ 993 assert((int) ir->num_state_slots == type_size(ir->type)); 994 995 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 996 this->next_temp); 997 this->variables.push_tail(storage); 998 this->next_temp += type_size(ir->type); 999 1000 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1001 glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1002 } 1003 1004 1005 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1006 int index = _mesa_add_state_reference(this->prog->Parameters, 1007 (gl_state_index *)slots[i].tokens); 1008 1009 if (storage->file == PROGRAM_STATE_VAR) { 1010 if (storage->index == -1) { 1011 storage->index = index; 1012 } else { 1013 assert(index == storage->index + (int)i); 1014 } 1015 } else { 1016 st_src_reg src(PROGRAM_STATE_VAR, index, 1017 glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT); 1018 src.swizzle = slots[i].swizzle; 1019 emit(ir, TGSI_OPCODE_MOV, dst, src); 1020 /* even a float takes up a whole vec4 reg in a struct/array. */ 1021 dst.index++; 1022 } 1023 } 1024 1025 if (storage->file == PROGRAM_TEMPORARY && 1026 dst.index != storage->index + (int) ir->num_state_slots) { 1027 fail_link(this->shader_program, 1028 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1029 ir->name, dst.index - storage->index, 1030 type_size(ir->type)); 1031 } 1032 } 1033} 1034 1035void 1036glsl_to_tgsi_visitor::visit(ir_loop *ir) 1037{ 1038 ir_dereference_variable *counter = NULL; 1039 1040 if (ir->counter != NULL) 1041 counter = new(ir) ir_dereference_variable(ir->counter); 1042 1043 if (ir->from != NULL) { 1044 assert(ir->counter != NULL); 1045 1046 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 1047 1048 a->accept(this); 1049 delete a; 1050 } 1051 1052 emit(NULL, TGSI_OPCODE_BGNLOOP); 1053 1054 if (ir->to) { 1055 ir_expression *e = 1056 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1057 counter, ir->to); 1058 ir_if *if_stmt = new(ir) ir_if(e); 1059 1060 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1061 1062 if_stmt->then_instructions.push_tail(brk); 1063 1064 if_stmt->accept(this); 1065 1066 delete if_stmt; 1067 delete e; 1068 delete brk; 1069 } 1070 1071 visit_exec_list(&ir->body_instructions, this); 1072 1073 if (ir->increment) { 1074 ir_expression *e = 1075 new(ir) ir_expression(ir_binop_add, counter->type, 1076 counter, ir->increment); 1077 1078 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1079 1080 a->accept(this); 1081 delete a; 1082 delete e; 1083 } 1084 1085 emit(NULL, TGSI_OPCODE_ENDLOOP); 1086} 1087 1088void 1089glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1090{ 1091 switch (ir->mode) { 1092 case ir_loop_jump::jump_break: 1093 emit(NULL, TGSI_OPCODE_BRK); 1094 break; 1095 case ir_loop_jump::jump_continue: 1096 emit(NULL, TGSI_OPCODE_CONT); 1097 break; 1098 } 1099} 1100 1101 1102void 1103glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1104{ 1105 assert(0); 1106 (void)ir; 1107} 1108 1109void 1110glsl_to_tgsi_visitor::visit(ir_function *ir) 1111{ 1112 /* Ignore function bodies other than main() -- we shouldn't see calls to 1113 * them since they should all be inlined before we get to glsl_to_tgsi. 1114 */ 1115 if (strcmp(ir->name, "main") == 0) { 1116 const ir_function_signature *sig; 1117 exec_list empty; 1118 1119 sig = ir->matching_signature(&empty); 1120 1121 assert(sig); 1122 1123 foreach_iter(exec_list_iterator, iter, sig->body) { 1124 ir_instruction *ir = (ir_instruction *)iter.get(); 1125 1126 ir->accept(this); 1127 } 1128 } 1129} 1130 1131GLboolean 1132glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1133{ 1134 int nonmul_operand = 1 - mul_operand; 1135 st_src_reg a, b, c; 1136 st_dst_reg result_dst; 1137 1138 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1139 if (!expr || expr->operation != ir_binop_mul) 1140 return false; 1141 1142 expr->operands[0]->accept(this); 1143 a = this->result; 1144 expr->operands[1]->accept(this); 1145 b = this->result; 1146 ir->operands[nonmul_operand]->accept(this); 1147 c = this->result; 1148 1149 this->result = get_temp(ir->type); 1150 result_dst = st_dst_reg(this->result); 1151 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1152 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1153 1154 return true; 1155} 1156 1157GLboolean 1158glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1159{ 1160 /* Saturates were only introduced to vertex programs in 1161 * NV_vertex_program3, so don't give them to drivers in the VP. 1162 */ 1163 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1164 return false; 1165 1166 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1167 if (!sat_src) 1168 return false; 1169 1170 sat_src->accept(this); 1171 st_src_reg src = this->result; 1172 1173 this->result = get_temp(ir->type); 1174 st_dst_reg result_dst = st_dst_reg(this->result); 1175 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1176 glsl_to_tgsi_instruction *inst; 1177 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1178 inst->saturate = true; 1179 1180 return true; 1181} 1182 1183void 1184glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1185 st_src_reg *reg, int *num_reladdr) 1186{ 1187 if (!reg->reladdr) 1188 return; 1189 1190 emit_arl(ir, address_reg, *reg->reladdr); 1191 1192 if (*num_reladdr != 1) { 1193 st_src_reg temp = get_temp(glsl_type::vec4_type); 1194 1195 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1196 *reg = temp; 1197 } 1198 1199 (*num_reladdr)--; 1200} 1201 1202void 1203glsl_to_tgsi_visitor::visit(ir_expression *ir) 1204{ 1205 unsigned int operand; 1206 st_src_reg op[Elements(ir->operands)]; 1207 st_src_reg result_src; 1208 st_dst_reg result_dst; 1209 1210 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1211 */ 1212 if (ir->operation == ir_binop_add) { 1213 if (try_emit_mad(ir, 1)) 1214 return; 1215 if (try_emit_mad(ir, 0)) 1216 return; 1217 } 1218 if (try_emit_sat(ir)) 1219 return; 1220 1221 if (ir->operation == ir_quadop_vector) 1222 assert(!"ir_quadop_vector should have been lowered"); 1223 1224 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1225 this->result.file = PROGRAM_UNDEFINED; 1226 ir->operands[operand]->accept(this); 1227 if (this->result.file == PROGRAM_UNDEFINED) { 1228 ir_print_visitor v; 1229 printf("Failed to get tree for expression operand:\n"); 1230 ir->operands[operand]->accept(&v); 1231 exit(1); 1232 } 1233 op[operand] = this->result; 1234 1235 /* Matrix expression operands should have been broken down to vector 1236 * operations already. 1237 */ 1238 assert(!ir->operands[operand]->type->is_matrix()); 1239 } 1240 1241 int vector_elements = ir->operands[0]->type->vector_elements; 1242 if (ir->operands[1]) { 1243 vector_elements = MAX2(vector_elements, 1244 ir->operands[1]->type->vector_elements); 1245 } 1246 1247 this->result.file = PROGRAM_UNDEFINED; 1248 1249 /* Storage for our result. Ideally for an assignment we'd be using 1250 * the actual storage for the result here, instead. 1251 */ 1252 result_src = get_temp(ir->type); 1253 /* convenience for the emit functions below. */ 1254 result_dst = st_dst_reg(result_src); 1255 /* Limit writes to the channels that will be used by result_src later. 1256 * This does limit this temp's use as a temporary for multi-instruction 1257 * sequences. 1258 */ 1259 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1260 1261 switch (ir->operation) { 1262 case ir_unop_logic_not: 1263 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); 1264 break; 1265 case ir_unop_neg: 1266 assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); 1267 if (result_dst.type == GLSL_TYPE_INT) 1268 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1269 else { 1270 op[0].negate = ~op[0].negate; 1271 result_src = op[0]; 1272 } 1273 break; 1274 case ir_unop_abs: 1275 assert(result_dst.type == GLSL_TYPE_FLOAT); 1276 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1277 break; 1278 case ir_unop_sign: 1279 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1280 break; 1281 case ir_unop_rcp: 1282 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1283 break; 1284 1285 case ir_unop_exp2: 1286 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1287 break; 1288 case ir_unop_exp: 1289 case ir_unop_log: 1290 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1291 break; 1292 case ir_unop_log2: 1293 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1294 break; 1295 case ir_unop_sin: 1296 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1297 break; 1298 case ir_unop_cos: 1299 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1300 break; 1301 case ir_unop_sin_reduced: 1302 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1303 break; 1304 case ir_unop_cos_reduced: 1305 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1306 break; 1307 1308 case ir_unop_dFdx: 1309 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1310 break; 1311 case ir_unop_dFdy: 1312 op[0].negate = ~op[0].negate; 1313 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); 1314 break; 1315 1316 case ir_unop_noise: { 1317 /* At some point, a motivated person could add a better 1318 * implementation of noise. Currently not even the nvidia 1319 * binary drivers do anything more than this. In any case, the 1320 * place to do this is in the GL state tracker, not the poor 1321 * driver. 1322 */ 1323 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1324 break; 1325 } 1326 1327 case ir_binop_add: 1328 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1329 break; 1330 case ir_binop_sub: 1331 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1332 break; 1333 1334 case ir_binop_mul: 1335 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1336 break; 1337 case ir_binop_div: 1338 if (result_dst.type == GLSL_TYPE_FLOAT) 1339 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1340 else 1341 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1342 break; 1343 case ir_binop_mod: 1344 if (result_dst.type == GLSL_TYPE_FLOAT) 1345 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1346 else 1347 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1348 break; 1349 1350 case ir_binop_less: 1351 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1352 break; 1353 case ir_binop_greater: 1354 emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); 1355 break; 1356 case ir_binop_lequal: 1357 emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); 1358 break; 1359 case ir_binop_gequal: 1360 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1361 break; 1362 case ir_binop_equal: 1363 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1364 break; 1365 case ir_binop_nequal: 1366 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1367 break; 1368 case ir_binop_all_equal: 1369 /* "==" operator producing a scalar boolean. */ 1370 if (ir->operands[0]->type->is_vector() || 1371 ir->operands[1]->type->is_vector()) { 1372 st_src_reg temp = get_temp(glsl_version >= 130 ? 1373 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1374 glsl_type::vec4_type); 1375 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 1376 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1377 emit_dp(ir, result_dst, temp, temp, vector_elements); 1378 emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); 1379 } else { 1380 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1381 } 1382 break; 1383 case ir_binop_any_nequal: 1384 /* "!=" operator producing a scalar boolean. */ 1385 if (ir->operands[0]->type->is_vector() || 1386 ir->operands[1]->type->is_vector()) { 1387 st_src_reg temp = get_temp(glsl_version >= 130 ? 1388 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1389 glsl_type::vec4_type); 1390 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 1391 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1392 emit_dp(ir, result_dst, temp, temp, vector_elements); 1393 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); 1394 } else { 1395 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1396 } 1397 break; 1398 1399 case ir_unop_any: 1400 assert(ir->operands[0]->type->is_vector()); 1401 emit_dp(ir, result_dst, op[0], op[0], 1402 ir->operands[0]->type->vector_elements); 1403 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); 1404 break; 1405 1406 case ir_binop_logic_xor: 1407 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1408 break; 1409 1410 case ir_binop_logic_or: 1411 /* This could be a saturated add and skip the SNE. */ 1412 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1413 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); 1414 break; 1415 1416 case ir_binop_logic_and: 1417 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ 1418 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1419 break; 1420 1421 case ir_binop_dot: 1422 assert(ir->operands[0]->type->is_vector()); 1423 assert(ir->operands[0]->type == ir->operands[1]->type); 1424 emit_dp(ir, result_dst, op[0], op[1], 1425 ir->operands[0]->type->vector_elements); 1426 break; 1427 1428 case ir_unop_sqrt: 1429 /* sqrt(x) = x * rsq(x). */ 1430 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1431 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1432 /* For incoming channels <= 0, set the result to 0. */ 1433 op[0].negate = ~op[0].negate; 1434 emit(ir, TGSI_OPCODE_CMP, result_dst, 1435 op[0], result_src, st_src_reg_for_float(0.0)); 1436 break; 1437 case ir_unop_rsq: 1438 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1439 break; 1440 case ir_unop_i2f: 1441 case ir_unop_b2f: 1442 if (glsl_version >= 130) { 1443 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1444 break; 1445 } 1446 case ir_unop_b2i: 1447 /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ 1448 result_src = op[0]; 1449 break; 1450 case ir_unop_f2i: 1451 if (glsl_version >= 130) 1452 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1453 else 1454 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1455 break; 1456 case ir_unop_f2b: 1457 case ir_unop_i2b: 1458 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 1459 st_src_reg_for_type(result_dst.type, 0)); 1460 break; 1461 case ir_unop_trunc: 1462 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1463 break; 1464 case ir_unop_ceil: 1465 op[0].negate = ~op[0].negate; 1466 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1467 result_src.negate = ~result_src.negate; 1468 break; 1469 case ir_unop_floor: 1470 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1471 break; 1472 case ir_unop_fract: 1473 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1474 break; 1475 1476 case ir_binop_min: 1477 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1478 break; 1479 case ir_binop_max: 1480 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1481 break; 1482 case ir_binop_pow: 1483 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1484 break; 1485 1486 case ir_unop_bit_not: 1487 if (glsl_version >= 130) { 1488 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1489 break; 1490 } 1491 case ir_unop_u2f: 1492 if (glsl_version >= 130) { 1493 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1494 break; 1495 } 1496 case ir_binop_lshift: 1497 if (glsl_version >= 130) { 1498 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); 1499 break; 1500 } 1501 case ir_binop_rshift: 1502 if (glsl_version >= 130) { 1503 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); 1504 break; 1505 } 1506 case ir_binop_bit_and: 1507 if (glsl_version >= 130) { 1508 emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); 1509 break; 1510 } 1511 case ir_binop_bit_xor: 1512 if (glsl_version >= 130) { 1513 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); 1514 break; 1515 } 1516 case ir_binop_bit_or: 1517 if (glsl_version >= 130) { 1518 emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); 1519 break; 1520 } 1521 case ir_unop_round_even: 1522 assert(!"GLSL 1.30 features unsupported"); 1523 break; 1524 1525 case ir_quadop_vector: 1526 /* This operation should have already been handled. 1527 */ 1528 assert(!"Should not get here."); 1529 break; 1530 } 1531 1532 this->result = result_src; 1533} 1534 1535 1536void 1537glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1538{ 1539 st_src_reg src; 1540 int i; 1541 int swizzle[4]; 1542 1543 /* Note that this is only swizzles in expressions, not those on the left 1544 * hand side of an assignment, which do write masking. See ir_assignment 1545 * for that. 1546 */ 1547 1548 ir->val->accept(this); 1549 src = this->result; 1550 assert(src.file != PROGRAM_UNDEFINED); 1551 1552 for (i = 0; i < 4; i++) { 1553 if (i < ir->type->vector_elements) { 1554 switch (i) { 1555 case 0: 1556 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1557 break; 1558 case 1: 1559 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1560 break; 1561 case 2: 1562 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1563 break; 1564 case 3: 1565 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1566 break; 1567 } 1568 } else { 1569 /* If the type is smaller than a vec4, replicate the last 1570 * channel out. 1571 */ 1572 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1573 } 1574 } 1575 1576 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1577 1578 this->result = src; 1579} 1580 1581void 1582glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1583{ 1584 variable_storage *entry = find_variable_storage(ir->var); 1585 ir_variable *var = ir->var; 1586 1587 if (!entry) { 1588 switch (var->mode) { 1589 case ir_var_uniform: 1590 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1591 var->location); 1592 this->variables.push_tail(entry); 1593 break; 1594 case ir_var_in: 1595 case ir_var_inout: 1596 /* The linker assigns locations for varyings and attributes, 1597 * including deprecated builtins (like gl_Color), user-assign 1598 * generic attributes (glBindVertexLocation), and 1599 * user-defined varyings. 1600 * 1601 * FINISHME: We would hit this path for function arguments. Fix! 1602 */ 1603 assert(var->location != -1); 1604 entry = new(mem_ctx) variable_storage(var, 1605 PROGRAM_INPUT, 1606 var->location); 1607 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && 1608 var->location >= VERT_ATTRIB_GENERIC0) { 1609 _mesa_add_attribute(this->prog->Attributes, 1610 var->name, 1611 _mesa_sizeof_glsl_type(var->type->gl_type), 1612 var->type->gl_type, 1613 var->location - VERT_ATTRIB_GENERIC0); 1614 } 1615 break; 1616 case ir_var_out: 1617 assert(var->location != -1); 1618 entry = new(mem_ctx) variable_storage(var, 1619 PROGRAM_OUTPUT, 1620 var->location); 1621 break; 1622 case ir_var_system_value: 1623 entry = new(mem_ctx) variable_storage(var, 1624 PROGRAM_SYSTEM_VALUE, 1625 var->location); 1626 break; 1627 case ir_var_auto: 1628 case ir_var_temporary: 1629 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1630 this->next_temp); 1631 this->variables.push_tail(entry); 1632 1633 next_temp += type_size(var->type); 1634 break; 1635 } 1636 1637 if (!entry) { 1638 printf("Failed to make storage for %s\n", var->name); 1639 exit(1); 1640 } 1641 } 1642 1643 this->result = st_src_reg(entry->file, entry->index, var->type); 1644 if (glsl_version <= 120) 1645 this->result.type = GLSL_TYPE_FLOAT; 1646} 1647 1648void 1649glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1650{ 1651 ir_constant *index; 1652 st_src_reg src; 1653 int element_size = type_size(ir->type); 1654 1655 index = ir->array_index->constant_expression_value(); 1656 1657 ir->array->accept(this); 1658 src = this->result; 1659 1660 if (index) { 1661 src.index += index->value.i[0] * element_size; 1662 } else { 1663 st_src_reg array_base = this->result; 1664 /* Variable index array dereference. It eats the "vec4" of the 1665 * base of the array and an index that offsets the Mesa register 1666 * index. 1667 */ 1668 ir->array_index->accept(this); 1669 1670 st_src_reg index_reg; 1671 1672 if (element_size == 1) { 1673 index_reg = this->result; 1674 } else { 1675 index_reg = get_temp(glsl_type::float_type); 1676 1677 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 1678 this->result, st_src_reg_for_float(element_size)); 1679 } 1680 1681 src.reladdr = ralloc(mem_ctx, st_src_reg); 1682 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1683 } 1684 1685 /* If the type is smaller than a vec4, replicate the last channel out. */ 1686 if (ir->type->is_scalar() || ir->type->is_vector()) 1687 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1688 else 1689 src.swizzle = SWIZZLE_NOOP; 1690 1691 this->result = src; 1692} 1693 1694void 1695glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 1696{ 1697 unsigned int i; 1698 const glsl_type *struct_type = ir->record->type; 1699 int offset = 0; 1700 1701 ir->record->accept(this); 1702 1703 for (i = 0; i < struct_type->length; i++) { 1704 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1705 break; 1706 offset += type_size(struct_type->fields.structure[i].type); 1707 } 1708 1709 /* If the type is smaller than a vec4, replicate the last channel out. */ 1710 if (ir->type->is_scalar() || ir->type->is_vector()) 1711 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1712 else 1713 this->result.swizzle = SWIZZLE_NOOP; 1714 1715 this->result.index += offset; 1716} 1717 1718/** 1719 * We want to be careful in assignment setup to hit the actual storage 1720 * instead of potentially using a temporary like we might with the 1721 * ir_dereference handler. 1722 */ 1723static st_dst_reg 1724get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 1725{ 1726 /* The LHS must be a dereference. If the LHS is a variable indexed array 1727 * access of a vector, it must be separated into a series conditional moves 1728 * before reaching this point (see ir_vec_index_to_cond_assign). 1729 */ 1730 assert(ir->as_dereference()); 1731 ir_dereference_array *deref_array = ir->as_dereference_array(); 1732 if (deref_array) { 1733 assert(!deref_array->array->type->is_vector()); 1734 } 1735 1736 /* Use the rvalue deref handler for the most part. We'll ignore 1737 * swizzles in it and write swizzles using writemask, though. 1738 */ 1739 ir->accept(v); 1740 return st_dst_reg(v->result); 1741} 1742 1743/** 1744 * Process the condition of a conditional assignment 1745 * 1746 * Examines the condition of a conditional assignment to generate the optimal 1747 * first operand of a \c CMP instruction. If the condition is a relational 1748 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 1749 * used as the source for the \c CMP instruction. Otherwise the comparison 1750 * is processed to a boolean result, and the boolean result is used as the 1751 * operand to the CMP instruction. 1752 */ 1753bool 1754glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 1755{ 1756 ir_rvalue *src_ir = ir; 1757 bool negate = true; 1758 bool switch_order = false; 1759 1760 ir_expression *const expr = ir->as_expression(); 1761 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 1762 bool zero_on_left = false; 1763 1764 if (expr->operands[0]->is_zero()) { 1765 src_ir = expr->operands[1]; 1766 zero_on_left = true; 1767 } else if (expr->operands[1]->is_zero()) { 1768 src_ir = expr->operands[0]; 1769 zero_on_left = false; 1770 } 1771 1772 /* a is - 0 + - 0 + 1773 * (a < 0) T F F ( a < 0) T F F 1774 * (0 < a) F F T (-a < 0) F F T 1775 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 1776 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 1777 * (a > 0) F F T (-a < 0) F F T 1778 * (0 > a) T F F ( a < 0) T F F 1779 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 1780 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 1781 * 1782 * Note that exchanging the order of 0 and 'a' in the comparison simply 1783 * means that the value of 'a' should be negated. 1784 */ 1785 if (src_ir != ir) { 1786 switch (expr->operation) { 1787 case ir_binop_less: 1788 switch_order = false; 1789 negate = zero_on_left; 1790 break; 1791 1792 case ir_binop_greater: 1793 switch_order = false; 1794 negate = !zero_on_left; 1795 break; 1796 1797 case ir_binop_lequal: 1798 switch_order = true; 1799 negate = !zero_on_left; 1800 break; 1801 1802 case ir_binop_gequal: 1803 switch_order = true; 1804 negate = zero_on_left; 1805 break; 1806 1807 default: 1808 /* This isn't the right kind of comparison afterall, so make sure 1809 * the whole condition is visited. 1810 */ 1811 src_ir = ir; 1812 break; 1813 } 1814 } 1815 } 1816 1817 src_ir->accept(this); 1818 1819 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 1820 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 1821 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 1822 * computing the condition. 1823 */ 1824 if (negate) 1825 this->result.negate = ~this->result.negate; 1826 1827 return switch_order; 1828} 1829 1830void 1831glsl_to_tgsi_visitor::visit(ir_assignment *ir) 1832{ 1833 st_dst_reg l; 1834 st_src_reg r; 1835 int i; 1836 1837 ir->rhs->accept(this); 1838 r = this->result; 1839 1840 l = get_assignment_lhs(ir->lhs, this); 1841 1842 /* FINISHME: This should really set to the correct maximal writemask for each 1843 * FINISHME: component written (in the loops below). This case can only 1844 * FINISHME: occur for matrices, arrays, and structures. 1845 */ 1846 if (ir->write_mask == 0) { 1847 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1848 l.writemask = WRITEMASK_XYZW; 1849 } else if (ir->lhs->type->is_scalar() && 1850 ir->lhs->variable_referenced()->mode == ir_var_out) { 1851 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 1852 * FINISHME: W component of fragment shader output zero, work correctly. 1853 */ 1854 l.writemask = WRITEMASK_XYZW; 1855 } else { 1856 int swizzles[4]; 1857 int first_enabled_chan = 0; 1858 int rhs_chan = 0; 1859 1860 l.writemask = ir->write_mask; 1861 1862 for (int i = 0; i < 4; i++) { 1863 if (l.writemask & (1 << i)) { 1864 first_enabled_chan = GET_SWZ(r.swizzle, i); 1865 break; 1866 } 1867 } 1868 1869 /* Swizzle a small RHS vector into the channels being written. 1870 * 1871 * glsl ir treats write_mask as dictating how many channels are 1872 * present on the RHS while Mesa IR treats write_mask as just 1873 * showing which channels of the vec4 RHS get written. 1874 */ 1875 for (int i = 0; i < 4; i++) { 1876 if (l.writemask & (1 << i)) 1877 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 1878 else 1879 swizzles[i] = first_enabled_chan; 1880 } 1881 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 1882 swizzles[2], swizzles[3]); 1883 } 1884 1885 assert(l.file != PROGRAM_UNDEFINED); 1886 assert(r.file != PROGRAM_UNDEFINED); 1887 1888 if (ir->condition) { 1889 const bool switch_order = this->process_move_condition(ir->condition); 1890 st_src_reg condition = this->result; 1891 1892 for (i = 0; i < type_size(ir->lhs->type); i++) { 1893 st_src_reg l_src = st_src_reg(l); 1894 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 1895 1896 if (switch_order) { 1897 emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); 1898 } else { 1899 emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); 1900 } 1901 1902 l.index++; 1903 r.index++; 1904 } 1905 } else { 1906 for (i = 0; i < type_size(ir->lhs->type); i++) { 1907 emit(ir, TGSI_OPCODE_MOV, l, r); 1908 l.index++; 1909 r.index++; 1910 } 1911 } 1912} 1913 1914 1915void 1916glsl_to_tgsi_visitor::visit(ir_constant *ir) 1917{ 1918 st_src_reg src; 1919 GLfloat stack_vals[4] = { 0 }; 1920 gl_constant_value *values = (gl_constant_value *) stack_vals; 1921 GLenum gl_type = GL_NONE; 1922 unsigned int i; 1923 1924 /* Unfortunately, 4 floats is all we can get into 1925 * _mesa_add_unnamed_constant. So, make a temp to store an 1926 * aggregate constant and move each constant value into it. If we 1927 * get lucky, copy propagation will eliminate the extra moves. 1928 */ 1929 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1930 st_src_reg temp_base = get_temp(ir->type); 1931 st_dst_reg temp = st_dst_reg(temp_base); 1932 1933 foreach_iter(exec_list_iterator, iter, ir->components) { 1934 ir_constant *field_value = (ir_constant *)iter.get(); 1935 int size = type_size(field_value->type); 1936 1937 assert(size > 0); 1938 1939 field_value->accept(this); 1940 src = this->result; 1941 1942 for (i = 0; i < (unsigned int)size; i++) { 1943 emit(ir, TGSI_OPCODE_MOV, temp, src); 1944 1945 src.index++; 1946 temp.index++; 1947 } 1948 } 1949 this->result = temp_base; 1950 return; 1951 } 1952 1953 if (ir->type->is_array()) { 1954 st_src_reg temp_base = get_temp(ir->type); 1955 st_dst_reg temp = st_dst_reg(temp_base); 1956 int size = type_size(ir->type->fields.array); 1957 1958 assert(size > 0); 1959 1960 for (i = 0; i < ir->type->length; i++) { 1961 ir->array_elements[i]->accept(this); 1962 src = this->result; 1963 for (int j = 0; j < size; j++) { 1964 emit(ir, TGSI_OPCODE_MOV, temp, src); 1965 1966 src.index++; 1967 temp.index++; 1968 } 1969 } 1970 this->result = temp_base; 1971 return; 1972 } 1973 1974 if (ir->type->is_matrix()) { 1975 st_src_reg mat = get_temp(ir->type); 1976 st_dst_reg mat_column = st_dst_reg(mat); 1977 1978 for (i = 0; i < ir->type->matrix_columns; i++) { 1979 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 1980 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 1981 1982 src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); 1983 src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, 1984 values, 1985 ir->type->vector_elements, 1986 GL_FLOAT, 1987 &src.swizzle); 1988 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 1989 1990 mat_column.index++; 1991 } 1992 1993 this->result = mat; 1994 return; 1995 } 1996 1997 src.file = PROGRAM_CONSTANT; 1998 switch (ir->type->base_type) { 1999 case GLSL_TYPE_FLOAT: 2000 gl_type = GL_FLOAT; 2001 for (i = 0; i < ir->type->vector_elements; i++) { 2002 values[i].f = ir->value.f[i]; 2003 } 2004 break; 2005 case GLSL_TYPE_UINT: 2006 gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT; 2007 for (i = 0; i < ir->type->vector_elements; i++) { 2008 if (glsl_version >= 130) 2009 values[i].u = ir->value.u[i]; 2010 else 2011 values[i].f = ir->value.u[i]; 2012 } 2013 break; 2014 case GLSL_TYPE_INT: 2015 gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT; 2016 for (i = 0; i < ir->type->vector_elements; i++) { 2017 if (glsl_version >= 130) 2018 values[i].i = ir->value.i[i]; 2019 else 2020 values[i].f = ir->value.i[i]; 2021 } 2022 break; 2023 case GLSL_TYPE_BOOL: 2024 gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT; 2025 for (i = 0; i < ir->type->vector_elements; i++) { 2026 if (glsl_version >= 130) 2027 values[i].b = ir->value.b[i]; 2028 else 2029 values[i].f = ir->value.b[i]; 2030 } 2031 break; 2032 default: 2033 assert(!"Non-float/uint/int/bool constant"); 2034 } 2035 2036 this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); 2037 this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, 2038 values, ir->type->vector_elements, gl_type, 2039 &this->result.swizzle); 2040} 2041 2042function_entry * 2043glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2044{ 2045 function_entry *entry; 2046 2047 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2048 entry = (function_entry *)iter.get(); 2049 2050 if (entry->sig == sig) 2051 return entry; 2052 } 2053 2054 entry = ralloc(mem_ctx, function_entry); 2055 entry->sig = sig; 2056 entry->sig_id = this->next_signature_id++; 2057 entry->bgn_inst = NULL; 2058 2059 /* Allocate storage for all the parameters. */ 2060 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2061 ir_variable *param = (ir_variable *)iter.get(); 2062 variable_storage *storage; 2063 2064 storage = find_variable_storage(param); 2065 assert(!storage); 2066 2067 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2068 this->next_temp); 2069 this->variables.push_tail(storage); 2070 2071 this->next_temp += type_size(param->type); 2072 } 2073 2074 if (!sig->return_type->is_void()) { 2075 entry->return_reg = get_temp(sig->return_type); 2076 } else { 2077 entry->return_reg = undef_src; 2078 } 2079 2080 this->function_signatures.push_tail(entry); 2081 return entry; 2082} 2083 2084void 2085glsl_to_tgsi_visitor::visit(ir_call *ir) 2086{ 2087 glsl_to_tgsi_instruction *call_inst; 2088 ir_function_signature *sig = ir->get_callee(); 2089 function_entry *entry = get_function_signature(sig); 2090 int i; 2091 2092 /* Process in parameters. */ 2093 exec_list_iterator sig_iter = sig->parameters.iterator(); 2094 foreach_iter(exec_list_iterator, iter, *ir) { 2095 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2096 ir_variable *param = (ir_variable *)sig_iter.get(); 2097 2098 if (param->mode == ir_var_in || 2099 param->mode == ir_var_inout) { 2100 variable_storage *storage = find_variable_storage(param); 2101 assert(storage); 2102 2103 param_rval->accept(this); 2104 st_src_reg r = this->result; 2105 2106 st_dst_reg l; 2107 l.file = storage->file; 2108 l.index = storage->index; 2109 l.reladdr = NULL; 2110 l.writemask = WRITEMASK_XYZW; 2111 l.cond_mask = COND_TR; 2112 2113 for (i = 0; i < type_size(param->type); i++) { 2114 emit(ir, TGSI_OPCODE_MOV, l, r); 2115 l.index++; 2116 r.index++; 2117 } 2118 } 2119 2120 sig_iter.next(); 2121 } 2122 assert(!sig_iter.has_next()); 2123 2124 /* Emit call instruction */ 2125 call_inst = emit(ir, TGSI_OPCODE_CAL); 2126 call_inst->function = entry; 2127 2128 /* Process out parameters. */ 2129 sig_iter = sig->parameters.iterator(); 2130 foreach_iter(exec_list_iterator, iter, *ir) { 2131 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2132 ir_variable *param = (ir_variable *)sig_iter.get(); 2133 2134 if (param->mode == ir_var_out || 2135 param->mode == ir_var_inout) { 2136 variable_storage *storage = find_variable_storage(param); 2137 assert(storage); 2138 2139 st_src_reg r; 2140 r.file = storage->file; 2141 r.index = storage->index; 2142 r.reladdr = NULL; 2143 r.swizzle = SWIZZLE_NOOP; 2144 r.negate = 0; 2145 2146 param_rval->accept(this); 2147 st_dst_reg l = st_dst_reg(this->result); 2148 2149 for (i = 0; i < type_size(param->type); i++) { 2150 emit(ir, TGSI_OPCODE_MOV, l, r); 2151 l.index++; 2152 r.index++; 2153 } 2154 } 2155 2156 sig_iter.next(); 2157 } 2158 assert(!sig_iter.has_next()); 2159 2160 /* Process return value. */ 2161 this->result = entry->return_reg; 2162} 2163 2164void 2165glsl_to_tgsi_visitor::visit(ir_texture *ir) 2166{ 2167 st_src_reg result_src, coord, lod_info, projector, dx, dy; 2168 st_dst_reg result_dst, coord_dst; 2169 glsl_to_tgsi_instruction *inst = NULL; 2170 unsigned opcode = TGSI_OPCODE_NOP; 2171 2172 ir->coordinate->accept(this); 2173 2174 /* Put our coords in a temp. We'll need to modify them for shadow, 2175 * projection, or LOD, so the only case we'd use it as is is if 2176 * we're doing plain old texturing. Mesa IR optimization should 2177 * handle cleaning up our mess in that case. 2178 */ 2179 coord = get_temp(glsl_type::vec4_type); 2180 coord_dst = st_dst_reg(coord); 2181 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2182 2183 if (ir->projector) { 2184 ir->projector->accept(this); 2185 projector = this->result; 2186 } 2187 2188 /* Storage for our result. Ideally for an assignment we'd be using 2189 * the actual storage for the result here, instead. 2190 */ 2191 result_src = get_temp(glsl_type::vec4_type); 2192 result_dst = st_dst_reg(result_src); 2193 2194 switch (ir->op) { 2195 case ir_tex: 2196 opcode = TGSI_OPCODE_TEX; 2197 break; 2198 case ir_txb: 2199 opcode = TGSI_OPCODE_TXB; 2200 ir->lod_info.bias->accept(this); 2201 lod_info = this->result; 2202 break; 2203 case ir_txl: 2204 opcode = TGSI_OPCODE_TXL; 2205 ir->lod_info.lod->accept(this); 2206 lod_info = this->result; 2207 break; 2208 case ir_txd: 2209 opcode = TGSI_OPCODE_TXD; 2210 ir->lod_info.grad.dPdx->accept(this); 2211 dx = this->result; 2212 ir->lod_info.grad.dPdy->accept(this); 2213 dy = this->result; 2214 break; 2215 case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */ 2216 assert(!"GLSL 1.30 features unsupported"); 2217 break; 2218 } 2219 2220 if (ir->projector) { 2221 if (opcode == TGSI_OPCODE_TEX) { 2222 /* Slot the projector in as the last component of the coord. */ 2223 coord_dst.writemask = WRITEMASK_W; 2224 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2225 coord_dst.writemask = WRITEMASK_XYZW; 2226 opcode = TGSI_OPCODE_TXP; 2227 } else { 2228 st_src_reg coord_w = coord; 2229 coord_w.swizzle = SWIZZLE_WWWW; 2230 2231 /* For the other TEX opcodes there's no projective version 2232 * since the last slot is taken up by LOD info. Do the 2233 * projective divide now. 2234 */ 2235 coord_dst.writemask = WRITEMASK_W; 2236 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2237 2238 /* In the case where we have to project the coordinates "by hand," 2239 * the shadow comparator value must also be projected. 2240 */ 2241 st_src_reg tmp_src = coord; 2242 if (ir->shadow_comparitor) { 2243 /* Slot the shadow value in as the second to last component of the 2244 * coord. 2245 */ 2246 ir->shadow_comparitor->accept(this); 2247 2248 tmp_src = get_temp(glsl_type::vec4_type); 2249 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2250 2251 tmp_dst.writemask = WRITEMASK_Z; 2252 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2253 2254 tmp_dst.writemask = WRITEMASK_XY; 2255 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2256 } 2257 2258 coord_dst.writemask = WRITEMASK_XYZ; 2259 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2260 2261 coord_dst.writemask = WRITEMASK_XYZW; 2262 coord.swizzle = SWIZZLE_XYZW; 2263 } 2264 } 2265 2266 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2267 * comparator was put in the correct place (and projected) by the code, 2268 * above, that handles by-hand projection. 2269 */ 2270 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2271 /* Slot the shadow value in as the second to last component of the 2272 * coord. 2273 */ 2274 ir->shadow_comparitor->accept(this); 2275 coord_dst.writemask = WRITEMASK_Z; 2276 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2277 coord_dst.writemask = WRITEMASK_XYZW; 2278 } 2279 2280 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) { 2281 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2282 coord_dst.writemask = WRITEMASK_W; 2283 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2284 coord_dst.writemask = WRITEMASK_XYZW; 2285 } 2286 2287 if (opcode == TGSI_OPCODE_TXD) 2288 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2289 else 2290 inst = emit(ir, opcode, result_dst, coord); 2291 2292 if (ir->shadow_comparitor) 2293 inst->tex_shadow = GL_TRUE; 2294 2295 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2296 this->shader_program, 2297 this->prog); 2298 2299 const glsl_type *sampler_type = ir->sampler->type; 2300 2301 switch (sampler_type->sampler_dimensionality) { 2302 case GLSL_SAMPLER_DIM_1D: 2303 inst->tex_target = (sampler_type->sampler_array) 2304 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2305 break; 2306 case GLSL_SAMPLER_DIM_2D: 2307 inst->tex_target = (sampler_type->sampler_array) 2308 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2309 break; 2310 case GLSL_SAMPLER_DIM_3D: 2311 inst->tex_target = TEXTURE_3D_INDEX; 2312 break; 2313 case GLSL_SAMPLER_DIM_CUBE: 2314 inst->tex_target = TEXTURE_CUBE_INDEX; 2315 break; 2316 case GLSL_SAMPLER_DIM_RECT: 2317 inst->tex_target = TEXTURE_RECT_INDEX; 2318 break; 2319 case GLSL_SAMPLER_DIM_BUF: 2320 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2321 break; 2322 default: 2323 assert(!"Should not get here."); 2324 } 2325 2326 this->result = result_src; 2327} 2328 2329void 2330glsl_to_tgsi_visitor::visit(ir_return *ir) 2331{ 2332 if (ir->get_value()) { 2333 st_dst_reg l; 2334 int i; 2335 2336 assert(current_function); 2337 2338 ir->get_value()->accept(this); 2339 st_src_reg r = this->result; 2340 2341 l = st_dst_reg(current_function->return_reg); 2342 2343 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2344 emit(ir, TGSI_OPCODE_MOV, l, r); 2345 l.index++; 2346 r.index++; 2347 } 2348 } 2349 2350 emit(ir, TGSI_OPCODE_RET); 2351} 2352 2353void 2354glsl_to_tgsi_visitor::visit(ir_discard *ir) 2355{ 2356 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 2357 2358 if (ir->condition) { 2359 ir->condition->accept(this); 2360 this->result.negate = ~this->result.negate; 2361 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2362 } else { 2363 emit(ir, TGSI_OPCODE_KILP); 2364 } 2365 2366 fp->UsesKill = GL_TRUE; 2367} 2368 2369void 2370glsl_to_tgsi_visitor::visit(ir_if *ir) 2371{ 2372 glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; 2373 glsl_to_tgsi_instruction *prev_inst; 2374 2375 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2376 2377 ir->condition->accept(this); 2378 assert(this->result.file != PROGRAM_UNDEFINED); 2379 2380 if (this->options->EmitCondCodes) { 2381 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2382 2383 /* See if we actually generated any instruction for generating 2384 * the condition. If not, then cook up a move to a temp so we 2385 * have something to set cond_update on. 2386 */ 2387 if (cond_inst == prev_inst) { 2388 st_src_reg temp = get_temp(glsl_type::bool_type); 2389 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2390 } 2391 cond_inst->cond_update = GL_TRUE; 2392 2393 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2394 if_inst->dst.cond_mask = COND_NE; 2395 } else { 2396 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2397 } 2398 2399 this->instructions.push_tail(if_inst); 2400 2401 visit_exec_list(&ir->then_instructions, this); 2402 2403 if (!ir->else_instructions.is_empty()) { 2404 else_inst = emit(ir->condition, TGSI_OPCODE_ELSE); 2405 visit_exec_list(&ir->else_instructions, this); 2406 } 2407 2408 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2409} 2410 2411glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2412{ 2413 result.file = PROGRAM_UNDEFINED; 2414 next_temp = 1; 2415 next_signature_id = 1; 2416 current_function = NULL; 2417 num_address_regs = 0; 2418 indirect_addr_temps = false; 2419 indirect_addr_consts = false; 2420 mem_ctx = ralloc_context(NULL); 2421} 2422 2423glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2424{ 2425 ralloc_free(mem_ctx); 2426} 2427 2428extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2429{ 2430 delete v; 2431} 2432 2433 2434/** 2435 * Count resources used by the given gpu program (number of texture 2436 * samplers, etc). 2437 */ 2438static void 2439count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2440{ 2441 v->samplers_used = 0; 2442 2443 foreach_iter(exec_list_iterator, iter, v->instructions) { 2444 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2445 2446 if (is_tex_instruction(inst->op)) { 2447 v->samplers_used |= 1 << inst->sampler; 2448 2449 prog->SamplerTargets[inst->sampler] = 2450 (gl_texture_index)inst->tex_target; 2451 if (inst->tex_shadow) { 2452 prog->ShadowSamplers |= 1 << inst->sampler; 2453 } 2454 } 2455 } 2456 2457 prog->SamplersUsed = v->samplers_used; 2458 _mesa_update_shader_textures_used(prog); 2459} 2460 2461 2462/** 2463 * Check if the given vertex/fragment/shader program is within the 2464 * resource limits of the context (number of texture units, etc). 2465 * If any of those checks fail, record a linker error. 2466 * 2467 * XXX more checks are needed... 2468 */ 2469static void 2470check_resources(const struct gl_context *ctx, 2471 struct gl_shader_program *shader_program, 2472 glsl_to_tgsi_visitor *prog, 2473 struct gl_program *proginfo) 2474{ 2475 switch (proginfo->Target) { 2476 case GL_VERTEX_PROGRAM_ARB: 2477 if (_mesa_bitcount(prog->samplers_used) > 2478 ctx->Const.MaxVertexTextureImageUnits) { 2479 fail_link(shader_program, "Too many vertex shader texture samplers"); 2480 } 2481 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2482 fail_link(shader_program, "Too many vertex shader constants"); 2483 } 2484 break; 2485 case MESA_GEOMETRY_PROGRAM: 2486 if (_mesa_bitcount(prog->samplers_used) > 2487 ctx->Const.MaxGeometryTextureImageUnits) { 2488 fail_link(shader_program, "Too many geometry shader texture samplers"); 2489 } 2490 if (proginfo->Parameters->NumParameters > 2491 MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { 2492 fail_link(shader_program, "Too many geometry shader constants"); 2493 } 2494 break; 2495 case GL_FRAGMENT_PROGRAM_ARB: 2496 if (_mesa_bitcount(prog->samplers_used) > 2497 ctx->Const.MaxTextureImageUnits) { 2498 fail_link(shader_program, "Too many fragment shader texture samplers"); 2499 } 2500 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2501 fail_link(shader_program, "Too many fragment shader constants"); 2502 } 2503 break; 2504 default: 2505 _mesa_problem(ctx, "unexpected program type in check_resources()"); 2506 } 2507} 2508 2509 2510 2511struct uniform_sort { 2512 struct gl_uniform *u; 2513 int pos; 2514}; 2515 2516/* The shader_program->Uniforms list is almost sorted in increasing 2517 * uniform->{Frag,Vert}Pos locations, but not quite when there are 2518 * uniforms shared between targets. We need to add parameters in 2519 * increasing order for the targets. 2520 */ 2521static int 2522sort_uniforms(const void *a, const void *b) 2523{ 2524 struct uniform_sort *u1 = (struct uniform_sort *)a; 2525 struct uniform_sort *u2 = (struct uniform_sort *)b; 2526 2527 return u1->pos - u2->pos; 2528} 2529 2530/* Add the uniforms to the parameters. The linker chose locations 2531 * in our parameters lists (which weren't created yet), which the 2532 * uniforms code will use to poke values into our parameters list 2533 * when uniforms are updated. 2534 */ 2535static void 2536add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, 2537 struct gl_shader *shader, 2538 struct gl_program *prog) 2539{ 2540 unsigned int i; 2541 unsigned int next_sampler = 0, num_uniforms = 0; 2542 struct uniform_sort *sorted_uniforms; 2543 2544 sorted_uniforms = ralloc_array(NULL, struct uniform_sort, 2545 shader_program->Uniforms->NumUniforms); 2546 2547 for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { 2548 struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; 2549 int parameter_index = -1; 2550 2551 switch (shader->Type) { 2552 case GL_VERTEX_SHADER: 2553 parameter_index = uniform->VertPos; 2554 break; 2555 case GL_FRAGMENT_SHADER: 2556 parameter_index = uniform->FragPos; 2557 break; 2558 case GL_GEOMETRY_SHADER: 2559 parameter_index = uniform->GeomPos; 2560 break; 2561 } 2562 2563 /* Only add uniforms used in our target. */ 2564 if (parameter_index != -1) { 2565 sorted_uniforms[num_uniforms].pos = parameter_index; 2566 sorted_uniforms[num_uniforms].u = uniform; 2567 num_uniforms++; 2568 } 2569 } 2570 2571 qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), 2572 sort_uniforms); 2573 2574 for (i = 0; i < num_uniforms; i++) { 2575 struct gl_uniform *uniform = sorted_uniforms[i].u; 2576 int parameter_index = sorted_uniforms[i].pos; 2577 const glsl_type *type = uniform->Type; 2578 unsigned int size; 2579 2580 if (type->is_vector() || 2581 type->is_scalar()) { 2582 size = type->vector_elements; 2583 } else { 2584 size = type_size(type) * 4; 2585 } 2586 2587 gl_register_file file; 2588 if (type->is_sampler() || 2589 (type->is_array() && type->fields.array->is_sampler())) { 2590 file = PROGRAM_SAMPLER; 2591 } else { 2592 file = PROGRAM_UNIFORM; 2593 } 2594 2595 GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, 2596 uniform->Name); 2597 2598 if (index < 0) { 2599 index = _mesa_add_parameter(prog->Parameters, file, 2600 uniform->Name, size, type->gl_type, 2601 NULL, NULL, 0x0); 2602 2603 /* Sampler uniform values are stored in prog->SamplerUnits, 2604 * and the entry in that array is selected by this index we 2605 * store in ParameterValues[]. 2606 */ 2607 if (file == PROGRAM_SAMPLER) { 2608 for (unsigned int j = 0; j < size / 4; j++) 2609 prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; 2610 } 2611 2612 /* The location chosen in the Parameters list here (returned 2613 * from _mesa_add_uniform) has to match what the linker chose. 2614 */ 2615 if (index != parameter_index) { 2616 fail_link(shader_program, "Allocation of uniform `%s' to target " 2617 "failed (%d vs %d)\n", 2618 uniform->Name, index, parameter_index); 2619 } 2620 } 2621 } 2622 2623 ralloc_free(sorted_uniforms); 2624} 2625 2626static void 2627set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2628 struct gl_shader_program *shader_program, 2629 const char *name, const glsl_type *type, 2630 ir_constant *val) 2631{ 2632 if (type->is_record()) { 2633 ir_constant *field_constant; 2634 2635 field_constant = (ir_constant *)val->components.get_head(); 2636 2637 for (unsigned int i = 0; i < type->length; i++) { 2638 const glsl_type *field_type = type->fields.structure[i].type; 2639 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2640 type->fields.structure[i].name); 2641 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2642 field_type, field_constant); 2643 field_constant = (ir_constant *)field_constant->next; 2644 } 2645 return; 2646 } 2647 2648 int loc = _mesa_get_uniform_location(ctx, shader_program, name); 2649 2650 if (loc == -1) { 2651 fail_link(shader_program, 2652 "Couldn't find uniform for initializer %s\n", name); 2653 return; 2654 } 2655 2656 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2657 ir_constant *element; 2658 const glsl_type *element_type; 2659 if (type->is_array()) { 2660 element = val->array_elements[i]; 2661 element_type = type->fields.array; 2662 } else { 2663 element = val; 2664 element_type = type; 2665 } 2666 2667 void *values; 2668 2669 if (element_type->base_type == GLSL_TYPE_BOOL) { 2670 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2671 for (unsigned int j = 0; j < element_type->components(); j++) { 2672 conv[j] = element->value.b[j]; 2673 } 2674 values = (void *)conv; 2675 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2676 element_type->vector_elements, 2677 1); 2678 } else { 2679 values = &element->value; 2680 } 2681 2682 if (element_type->is_matrix()) { 2683 _mesa_uniform_matrix(ctx, shader_program, 2684 element_type->matrix_columns, 2685 element_type->vector_elements, 2686 loc, 1, GL_FALSE, (GLfloat *)values); 2687 loc += element_type->matrix_columns; 2688 } else { 2689 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2690 values, element_type->gl_type); 2691 loc += type_size(element_type); 2692 } 2693 } 2694} 2695 2696static void 2697set_uniform_initializers(struct gl_context *ctx, 2698 struct gl_shader_program *shader_program) 2699{ 2700 void *mem_ctx = NULL; 2701 2702 for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { 2703 struct gl_shader *shader = shader_program->_LinkedShaders[i]; 2704 2705 if (shader == NULL) 2706 continue; 2707 2708 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2709 ir_instruction *ir = (ir_instruction *)iter.get(); 2710 ir_variable *var = ir->as_variable(); 2711 2712 if (!var || var->mode != ir_var_uniform || !var->constant_value) 2713 continue; 2714 2715 if (!mem_ctx) 2716 mem_ctx = ralloc_context(NULL); 2717 2718 set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, 2719 var->type, var->constant_value); 2720 } 2721 } 2722 2723 ralloc_free(mem_ctx); 2724} 2725 2726/* 2727 * Scan/rewrite program to remove reads of custom (output) registers. 2728 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING 2729 * (for vertex shaders). 2730 * In GLSL shaders, varying vars can be read and written. 2731 * On some hardware, trying to read an output register causes trouble. 2732 * So, rewrite the program to use a temporary register in this case. 2733 * 2734 * Based on _mesa_remove_output_reads from programopt.c. 2735 */ 2736void 2737glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) 2738{ 2739 GLuint i; 2740 GLint outputMap[VERT_RESULT_MAX]; 2741 GLint outputTypes[VERT_RESULT_MAX]; 2742 GLuint numVaryingReads = 0; 2743 GLboolean usedTemps[MAX_PROGRAM_TEMPS]; 2744 GLuint firstTemp = 0; 2745 2746 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, 2747 usedTemps, MAX_PROGRAM_TEMPS); 2748 2749 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); 2750 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); 2751 2752 for (i = 0; i < VERT_RESULT_MAX; i++) 2753 outputMap[i] = -1; 2754 2755 /* look for instructions which read from varying vars */ 2756 foreach_iter(exec_list_iterator, iter, this->instructions) { 2757 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2758 const GLuint numSrc = num_inst_src_regs(inst->op); 2759 GLuint j; 2760 for (j = 0; j < numSrc; j++) { 2761 if (inst->src[j].file == type) { 2762 /* replace the read with a temp reg */ 2763 const GLuint var = inst->src[j].index; 2764 if (outputMap[var] == -1) { 2765 numVaryingReads++; 2766 outputMap[var] = _mesa_find_free_register(usedTemps, 2767 MAX_PROGRAM_TEMPS, 2768 firstTemp); 2769 outputTypes[var] = inst->src[j].type; 2770 firstTemp = outputMap[var] + 1; 2771 } 2772 inst->src[j].file = PROGRAM_TEMPORARY; 2773 inst->src[j].index = outputMap[var]; 2774 } 2775 } 2776 } 2777 2778 if (numVaryingReads == 0) 2779 return; /* nothing to be done */ 2780 2781 /* look for instructions which write to the varying vars identified above */ 2782 foreach_iter(exec_list_iterator, iter, this->instructions) { 2783 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2784 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { 2785 /* change inst to write to the temp reg, instead of the varying */ 2786 inst->dst.file = PROGRAM_TEMPORARY; 2787 inst->dst.index = outputMap[inst->dst.index]; 2788 } 2789 } 2790 2791 /* insert new MOV instructions at the end */ 2792 for (i = 0; i < VERT_RESULT_MAX; i++) { 2793 if (outputMap[i] >= 0) { 2794 /* MOV VAR[i], TEMP[tmp]; */ 2795 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); 2796 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); 2797 dst.index = i; 2798 this->emit(NULL, TGSI_OPCODE_MOV, dst, src); 2799 } 2800 } 2801} 2802 2803/** 2804 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 2805 * are read from the given src in this instruction 2806 */ 2807static int 2808get_src_arg_mask(st_dst_reg dst, st_src_reg src) 2809{ 2810 int read_mask = 0, comp; 2811 2812 /* Now, given the src swizzle and the written channels, find which 2813 * components are actually read 2814 */ 2815 for (comp = 0; comp < 4; ++comp) { 2816 const unsigned coord = GET_SWZ(src.swizzle, comp); 2817 ASSERT(coord < 4); 2818 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 2819 read_mask |= 1 << coord; 2820 } 2821 2822 return read_mask; 2823} 2824 2825/** 2826 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 2827 * instruction is the first instruction to write to register T0. There are 2828 * several lowering passes done in GLSL IR (e.g. branches and 2829 * relative addressing) that create a large number of conditional assignments 2830 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 2831 * 2832 * Here is why this conversion is safe: 2833 * CMP T0, T1 T2 T0 can be expanded to: 2834 * if (T1 < 0.0) 2835 * MOV T0, T2; 2836 * else 2837 * MOV T0, T0; 2838 * 2839 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 2840 * as the original program. If (T1 < 0.0) evaluates to false, executing 2841 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 2842 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 2843 * because any instruction that was going to read from T0 after this was going 2844 * to read a garbage value anyway. 2845 */ 2846void 2847glsl_to_tgsi_visitor::simplify_cmp(void) 2848{ 2849 unsigned tempWrites[MAX_PROGRAM_TEMPS]; 2850 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 2851 2852 memset(tempWrites, 0, sizeof(tempWrites)); 2853 memset(outputWrites, 0, sizeof(outputWrites)); 2854 2855 foreach_iter(exec_list_iterator, iter, this->instructions) { 2856 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2857 unsigned prevWriteMask = 0; 2858 2859 /* Give up if we encounter relative addressing or flow control. */ 2860 if (inst->dst.reladdr || 2861 tgsi_get_opcode_info(inst->op)->is_branch || 2862 inst->op == TGSI_OPCODE_BGNSUB || 2863 inst->op == TGSI_OPCODE_CONT || 2864 inst->op == TGSI_OPCODE_END || 2865 inst->op == TGSI_OPCODE_ENDSUB || 2866 inst->op == TGSI_OPCODE_RET) { 2867 return; 2868 } 2869 2870 if (inst->dst.file == PROGRAM_OUTPUT) { 2871 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 2872 prevWriteMask = outputWrites[inst->dst.index]; 2873 outputWrites[inst->dst.index] |= inst->dst.writemask; 2874 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 2875 assert(inst->dst.index < MAX_PROGRAM_TEMPS); 2876 prevWriteMask = tempWrites[inst->dst.index]; 2877 tempWrites[inst->dst.index] |= inst->dst.writemask; 2878 } 2879 2880 /* For a CMP to be considered a conditional write, the destination 2881 * register and source register two must be the same. */ 2882 if (inst->op == TGSI_OPCODE_CMP 2883 && !(inst->dst.writemask & prevWriteMask) 2884 && inst->src[2].file == inst->dst.file 2885 && inst->src[2].index == inst->dst.index 2886 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 2887 2888 inst->op = TGSI_OPCODE_MOV; 2889 inst->src[0] = inst->src[1]; 2890 } 2891 } 2892} 2893 2894/* Replaces all references to a temporary register index with another index. */ 2895void 2896glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 2897{ 2898 foreach_iter(exec_list_iterator, iter, this->instructions) { 2899 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2900 unsigned j; 2901 2902 for (j=0; j < num_inst_src_regs(inst->op); j++) { 2903 if (inst->src[j].file == PROGRAM_TEMPORARY && 2904 inst->src[j].index == index) { 2905 inst->src[j].index = new_index; 2906 } 2907 } 2908 2909 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 2910 inst->dst.index = new_index; 2911 } 2912 } 2913} 2914 2915int 2916glsl_to_tgsi_visitor::get_first_temp_read(int index) 2917{ 2918 int depth = 0; /* loop depth */ 2919 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 2920 unsigned i = 0, j; 2921 2922 foreach_iter(exec_list_iterator, iter, this->instructions) { 2923 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2924 2925 for (j=0; j < num_inst_src_regs(inst->op); j++) { 2926 if (inst->src[j].file == PROGRAM_TEMPORARY && 2927 inst->src[j].index == index) { 2928 return (depth == 0) ? i : loop_start; 2929 } 2930 } 2931 2932 if (inst->op == TGSI_OPCODE_BGNLOOP) { 2933 if(depth++ == 0) 2934 loop_start = i; 2935 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 2936 if (--depth == 0) 2937 loop_start = -1; 2938 } 2939 assert(depth >= 0); 2940 2941 i++; 2942 } 2943 2944 return -1; 2945} 2946 2947int 2948glsl_to_tgsi_visitor::get_first_temp_write(int index) 2949{ 2950 int depth = 0; /* loop depth */ 2951 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 2952 int i = 0; 2953 2954 foreach_iter(exec_list_iterator, iter, this->instructions) { 2955 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2956 2957 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 2958 return (depth == 0) ? i : loop_start; 2959 } 2960 2961 if (inst->op == TGSI_OPCODE_BGNLOOP) { 2962 if(depth++ == 0) 2963 loop_start = i; 2964 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 2965 if (--depth == 0) 2966 loop_start = -1; 2967 } 2968 assert(depth >= 0); 2969 2970 i++; 2971 } 2972 2973 return -1; 2974} 2975 2976int 2977glsl_to_tgsi_visitor::get_last_temp_read(int index) 2978{ 2979 int depth = 0; /* loop depth */ 2980 int last = -1; /* index of last instruction that reads the temporary */ 2981 unsigned i = 0, j; 2982 2983 foreach_iter(exec_list_iterator, iter, this->instructions) { 2984 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2985 2986 for (j=0; j < num_inst_src_regs(inst->op); j++) { 2987 if (inst->src[j].file == PROGRAM_TEMPORARY && 2988 inst->src[j].index == index) { 2989 last = (depth == 0) ? i : -2; 2990 } 2991 } 2992 2993 if (inst->op == TGSI_OPCODE_BGNLOOP) 2994 depth++; 2995 else if (inst->op == TGSI_OPCODE_ENDLOOP) 2996 if (--depth == 0 && last == -2) 2997 last = i; 2998 assert(depth >= 0); 2999 3000 i++; 3001 } 3002 3003 assert(last >= -1); 3004 return last; 3005} 3006 3007int 3008glsl_to_tgsi_visitor::get_last_temp_write(int index) 3009{ 3010 int depth = 0; /* loop depth */ 3011 int last = -1; /* index of last instruction that writes to the temporary */ 3012 int i = 0; 3013 3014 foreach_iter(exec_list_iterator, iter, this->instructions) { 3015 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3016 3017 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3018 last = (depth == 0) ? i : -2; 3019 3020 if (inst->op == TGSI_OPCODE_BGNLOOP) 3021 depth++; 3022 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3023 if (--depth == 0 && last == -2) 3024 last = i; 3025 assert(depth >= 0); 3026 3027 i++; 3028 } 3029 3030 assert(last >= -1); 3031 return last; 3032} 3033 3034/* 3035 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3036 * channels for copy propagation and updates following instructions to 3037 * use the original versions. 3038 * 3039 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3040 * will occur. As an example, a TXP production before this pass: 3041 * 3042 * 0: MOV TEMP[1], INPUT[4].xyyy; 3043 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3044 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3045 * 3046 * and after: 3047 * 3048 * 0: MOV TEMP[1], INPUT[4].xyyy; 3049 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3050 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3051 * 3052 * which allows for dead code elimination on TEMP[1]'s writes. 3053 */ 3054void 3055glsl_to_tgsi_visitor::copy_propagate(void) 3056{ 3057 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3058 glsl_to_tgsi_instruction *, 3059 this->next_temp * 4); 3060 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3061 int level = 0; 3062 3063 foreach_iter(exec_list_iterator, iter, this->instructions) { 3064 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3065 3066 assert(inst->dst.file != PROGRAM_TEMPORARY 3067 || inst->dst.index < this->next_temp); 3068 3069 /* First, do any copy propagation possible into the src regs. */ 3070 for (int r = 0; r < 3; r++) { 3071 glsl_to_tgsi_instruction *first = NULL; 3072 bool good = true; 3073 int acp_base = inst->src[r].index * 4; 3074 3075 if (inst->src[r].file != PROGRAM_TEMPORARY || 3076 inst->src[r].reladdr) 3077 continue; 3078 3079 /* See if we can find entries in the ACP consisting of MOVs 3080 * from the same src register for all the swizzled channels 3081 * of this src register reference. 3082 */ 3083 for (int i = 0; i < 4; i++) { 3084 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3085 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3086 3087 if (!copy_chan) { 3088 good = false; 3089 break; 3090 } 3091 3092 assert(acp_level[acp_base + src_chan] <= level); 3093 3094 if (!first) { 3095 first = copy_chan; 3096 } else { 3097 if (first->src[0].file != copy_chan->src[0].file || 3098 first->src[0].index != copy_chan->src[0].index) { 3099 good = false; 3100 break; 3101 } 3102 } 3103 } 3104 3105 if (good) { 3106 /* We've now validated that we can copy-propagate to 3107 * replace this src register reference. Do it. 3108 */ 3109 inst->src[r].file = first->src[0].file; 3110 inst->src[r].index = first->src[0].index; 3111 3112 int swizzle = 0; 3113 for (int i = 0; i < 4; i++) { 3114 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3115 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3116 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3117 (3 * i)); 3118 } 3119 inst->src[r].swizzle = swizzle; 3120 } 3121 } 3122 3123 switch (inst->op) { 3124 case TGSI_OPCODE_BGNLOOP: 3125 case TGSI_OPCODE_ENDLOOP: 3126 /* End of a basic block, clear the ACP entirely. */ 3127 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3128 break; 3129 3130 case TGSI_OPCODE_IF: 3131 ++level; 3132 break; 3133 3134 case TGSI_OPCODE_ENDIF: 3135 case TGSI_OPCODE_ELSE: 3136 /* Clear all channels written inside the block from the ACP, but 3137 * leaving those that were not touched. 3138 */ 3139 for (int r = 0; r < this->next_temp; r++) { 3140 for (int c = 0; c < 4; c++) { 3141 if (!acp[4 * r + c]) 3142 continue; 3143 3144 if (acp_level[4 * r + c] >= level) 3145 acp[4 * r + c] = NULL; 3146 } 3147 } 3148 if (inst->op == TGSI_OPCODE_ENDIF) 3149 --level; 3150 break; 3151 3152 default: 3153 /* Continuing the block, clear any written channels from 3154 * the ACP. 3155 */ 3156 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3157 /* Any temporary might be written, so no copy propagation 3158 * across this instruction. 3159 */ 3160 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3161 } else if (inst->dst.file == PROGRAM_OUTPUT && 3162 inst->dst.reladdr) { 3163 /* Any output might be written, so no copy propagation 3164 * from outputs across this instruction. 3165 */ 3166 for (int r = 0; r < this->next_temp; r++) { 3167 for (int c = 0; c < 4; c++) { 3168 if (!acp[4 * r + c]) 3169 continue; 3170 3171 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3172 acp[4 * r + c] = NULL; 3173 } 3174 } 3175 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3176 inst->dst.file == PROGRAM_OUTPUT) { 3177 /* Clear where it's used as dst. */ 3178 if (inst->dst.file == PROGRAM_TEMPORARY) { 3179 for (int c = 0; c < 4; c++) { 3180 if (inst->dst.writemask & (1 << c)) { 3181 acp[4 * inst->dst.index + c] = NULL; 3182 } 3183 } 3184 } 3185 3186 /* Clear where it's used as src. */ 3187 for (int r = 0; r < this->next_temp; r++) { 3188 for (int c = 0; c < 4; c++) { 3189 if (!acp[4 * r + c]) 3190 continue; 3191 3192 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3193 3194 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3195 acp[4 * r + c]->src[0].index == inst->dst.index && 3196 inst->dst.writemask & (1 << src_chan)) 3197 { 3198 acp[4 * r + c] = NULL; 3199 } 3200 } 3201 } 3202 } 3203 break; 3204 } 3205 3206 /* If this is a copy, add it to the ACP. */ 3207 if (inst->op == TGSI_OPCODE_MOV && 3208 inst->dst.file == PROGRAM_TEMPORARY && 3209 !inst->dst.reladdr && 3210 !inst->saturate && 3211 !inst->src[0].reladdr && 3212 !inst->src[0].negate) { 3213 for (int i = 0; i < 4; i++) { 3214 if (inst->dst.writemask & (1 << i)) { 3215 acp[4 * inst->dst.index + i] = inst; 3216 acp_level[4 * inst->dst.index + i] = level; 3217 } 3218 } 3219 } 3220 } 3221 3222 ralloc_free(acp_level); 3223 ralloc_free(acp); 3224} 3225 3226/* 3227 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3228 * 3229 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3230 * will occur. As an example, a TXP production after copy propagation but 3231 * before this pass: 3232 * 3233 * 0: MOV TEMP[1], INPUT[4].xyyy; 3234 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3235 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3236 * 3237 * and after this pass: 3238 * 3239 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3240 * 3241 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3242 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3243 */ 3244void 3245glsl_to_tgsi_visitor::eliminate_dead_code(void) 3246{ 3247 int i; 3248 3249 for (i=0; i < this->next_temp; i++) { 3250 int last_read = get_last_temp_read(i); 3251 int j = 0; 3252 3253 foreach_iter(exec_list_iterator, iter, this->instructions) { 3254 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3255 3256 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3257 j > last_read) 3258 { 3259 iter.remove(); 3260 delete inst; 3261 } 3262 3263 j++; 3264 } 3265 } 3266} 3267 3268/* 3269 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3270 * code elimination. This is less primitive than eliminate_dead_code(), as it 3271 * is per-channel and can detect consecutive writes without a read between them 3272 * as dead code. However, there is some dead code that can be eliminated by 3273 * eliminate_dead_code() but not this function - for example, this function 3274 * cannot eliminate an instruction writing to a register that is never read and 3275 * is the only instruction writing to that register. 3276 * 3277 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3278 * will occur. 3279 */ 3280int 3281glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3282{ 3283 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3284 glsl_to_tgsi_instruction *, 3285 this->next_temp * 4); 3286 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3287 int level = 0; 3288 int removed = 0; 3289 3290 foreach_iter(exec_list_iterator, iter, this->instructions) { 3291 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3292 3293 assert(inst->dst.file != PROGRAM_TEMPORARY 3294 || inst->dst.index < this->next_temp); 3295 3296 switch (inst->op) { 3297 case TGSI_OPCODE_BGNLOOP: 3298 case TGSI_OPCODE_ENDLOOP: 3299 /* End of a basic block, clear the write array entirely. 3300 * FIXME: This keeps us from killing dead code when the writes are 3301 * on either side of a loop, even when the register isn't touched 3302 * inside the loop. 3303 */ 3304 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3305 break; 3306 3307 case TGSI_OPCODE_IF: 3308 ++level; 3309 break; 3310 3311 case TGSI_OPCODE_ENDIF: 3312 --level; 3313 break; 3314 3315 case TGSI_OPCODE_ELSE: 3316 /* Clear all channels written inside the preceding if block from the 3317 * write array, but leave those that were not touched. 3318 * 3319 * FIXME: This destroys opportunities to remove dead code inside of 3320 * IF blocks that are followed by an ELSE block. 3321 */ 3322 for (int r = 0; r < this->next_temp; r++) { 3323 for (int c = 0; c < 4; c++) { 3324 if (!writes[4 * r + c]) 3325 continue; 3326 3327 if (write_level[4 * r + c] >= level) 3328 writes[4 * r + c] = NULL; 3329 } 3330 } 3331 break; 3332 3333 default: 3334 /* Continuing the block, clear any channels from the write array that 3335 * are read by this instruction. 3336 */ 3337 for (int i = 0; i < 4; i++) { 3338 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3339 /* Any temporary might be read, so no dead code elimination 3340 * across this instruction. 3341 */ 3342 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3343 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3344 /* Clear where it's used as src. */ 3345 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3346 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3347 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3348 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3349 3350 for (int c = 0; c < 4; c++) { 3351 if (src_chans & (1 << c)) { 3352 writes[4 * inst->src[i].index + c] = NULL; 3353 } 3354 } 3355 } 3356 } 3357 break; 3358 } 3359 3360 /* If this instruction writes to a temporary, add it to the write array. 3361 * If there is already an instruction in the write array for one or more 3362 * of the channels, flag that channel write as dead. 3363 */ 3364 if (inst->dst.file == PROGRAM_TEMPORARY && 3365 !inst->dst.reladdr && 3366 !inst->saturate) { 3367 for (int c = 0; c < 4; c++) { 3368 if (inst->dst.writemask & (1 << c)) { 3369 if (writes[4 * inst->dst.index + c]) { 3370 if (write_level[4 * inst->dst.index + c] < level) 3371 continue; 3372 else 3373 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3374 } 3375 writes[4 * inst->dst.index + c] = inst; 3376 write_level[4 * inst->dst.index + c] = level; 3377 } 3378 } 3379 } 3380 } 3381 3382 /* Anything still in the write array at this point is dead code. */ 3383 for (int r = 0; r < this->next_temp; r++) { 3384 for (int c = 0; c < 4; c++) { 3385 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3386 if (inst) 3387 inst->dead_mask |= (1 << c); 3388 } 3389 } 3390 3391 /* Now actually remove the instructions that are completely dead and update 3392 * the writemask of other instructions with dead channels. 3393 */ 3394 foreach_iter(exec_list_iterator, iter, this->instructions) { 3395 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3396 3397 if (!inst->dead_mask || !inst->dst.writemask) 3398 continue; 3399 else if (inst->dead_mask == inst->dst.writemask) { 3400 iter.remove(); 3401 delete inst; 3402 removed++; 3403 } else 3404 inst->dst.writemask &= ~(inst->dead_mask); 3405 } 3406 3407 ralloc_free(write_level); 3408 ralloc_free(writes); 3409 3410 return removed; 3411} 3412 3413/* Merges temporary registers together where possible to reduce the number of 3414 * registers needed to run a program. 3415 * 3416 * Produces optimal code only after copy propagation and dead code elimination 3417 * have been run. */ 3418void 3419glsl_to_tgsi_visitor::merge_registers(void) 3420{ 3421 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3422 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3423 int i, j; 3424 3425 /* Read the indices of the last read and first write to each temp register 3426 * into an array so that we don't have to traverse the instruction list as 3427 * much. */ 3428 for (i=0; i < this->next_temp; i++) { 3429 last_reads[i] = get_last_temp_read(i); 3430 first_writes[i] = get_first_temp_write(i); 3431 } 3432 3433 /* Start looking for registers with non-overlapping usages that can be 3434 * merged together. */ 3435 for (i=0; i < this->next_temp; i++) { 3436 /* Don't touch unused registers. */ 3437 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3438 3439 for (j=0; j < this->next_temp; j++) { 3440 /* Don't touch unused registers. */ 3441 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3442 3443 /* We can merge the two registers if the first write to j is after or 3444 * in the same instruction as the last read from i. Note that the 3445 * register at index i will always be used earlier or at the same time 3446 * as the register at index j. */ 3447 if (first_writes[i] <= first_writes[j] && 3448 last_reads[i] <= first_writes[j]) 3449 { 3450 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3451 3452 /* Update the first_writes and last_reads arrays with the new 3453 * values for the merged register index, and mark the newly unused 3454 * register index as such. */ 3455 last_reads[i] = last_reads[j]; 3456 first_writes[j] = -1; 3457 last_reads[j] = -1; 3458 } 3459 } 3460 } 3461 3462 ralloc_free(last_reads); 3463 ralloc_free(first_writes); 3464} 3465 3466/* Reassign indices to temporary registers by reusing unused indices created 3467 * by optimization passes. */ 3468void 3469glsl_to_tgsi_visitor::renumber_registers(void) 3470{ 3471 int i = 0; 3472 int new_index = 0; 3473 3474 for (i=0; i < this->next_temp; i++) { 3475 if (get_first_temp_read(i) < 0) continue; 3476 if (i != new_index) 3477 rename_temp_register(i, new_index); 3478 new_index++; 3479 } 3480 3481 this->next_temp = new_index; 3482} 3483 3484/* ------------------------- TGSI conversion stuff -------------------------- */ 3485struct label { 3486 unsigned branch_target; 3487 unsigned token; 3488}; 3489 3490/** 3491 * Intermediate state used during shader translation. 3492 */ 3493struct st_translate { 3494 struct ureg_program *ureg; 3495 3496 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 3497 struct ureg_src *constants; 3498 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3499 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3500 struct ureg_dst address[1]; 3501 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3502 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3503 3504 /* Extra info for handling point size clamping in vertex shader */ 3505 struct ureg_dst pointSizeResult; /**< Actual point size output register */ 3506 struct ureg_src pointSizeConst; /**< Point size range constant register */ 3507 GLint pointSizeOutIndex; /**< Temp point size output register */ 3508 GLboolean prevInstWrotePointSize; 3509 3510 const GLuint *inputMapping; 3511 const GLuint *outputMapping; 3512 3513 /* For every instruction that contains a label (eg CALL), keep 3514 * details so that we can go back afterwards and emit the correct 3515 * tgsi instruction number for each label. 3516 */ 3517 struct label *labels; 3518 unsigned labels_size; 3519 unsigned labels_count; 3520 3521 /* Keep a record of the tgsi instruction number that each mesa 3522 * instruction starts at, will be used to fix up labels after 3523 * translation. 3524 */ 3525 unsigned *insn; 3526 unsigned insn_size; 3527 unsigned insn_count; 3528 3529 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3530 3531 boolean error; 3532}; 3533 3534/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3535static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3536 TGSI_SEMANTIC_FACE, 3537 TGSI_SEMANTIC_INSTANCEID 3538}; 3539 3540/** 3541 * Make note of a branch to a label in the TGSI code. 3542 * After we've emitted all instructions, we'll go over the list 3543 * of labels built here and patch the TGSI code with the actual 3544 * location of each label. 3545 */ 3546static unsigned *get_label( struct st_translate *t, 3547 unsigned branch_target ) 3548{ 3549 unsigned i; 3550 3551 if (t->labels_count + 1 >= t->labels_size) { 3552 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3553 t->labels = (struct label *)realloc(t->labels, 3554 t->labels_size * sizeof t->labels[0]); 3555 if (t->labels == NULL) { 3556 static unsigned dummy; 3557 t->error = TRUE; 3558 return &dummy; 3559 } 3560 } 3561 3562 i = t->labels_count++; 3563 t->labels[i].branch_target = branch_target; 3564 return &t->labels[i].token; 3565} 3566 3567/** 3568 * Called prior to emitting the TGSI code for each Mesa instruction. 3569 * Allocate additional space for instructions if needed. 3570 * Update the insn[] array so the next Mesa instruction points to 3571 * the next TGSI instruction. 3572 */ 3573static void set_insn_start( struct st_translate *t, 3574 unsigned start ) 3575{ 3576 if (t->insn_count + 1 >= t->insn_size) { 3577 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 3578 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]); 3579 if (t->insn == NULL) { 3580 t->error = TRUE; 3581 return; 3582 } 3583 } 3584 3585 t->insn[t->insn_count++] = start; 3586} 3587 3588/** 3589 * Map a Mesa dst register to a TGSI ureg_dst register. 3590 */ 3591static struct ureg_dst 3592dst_register( struct st_translate *t, 3593 gl_register_file file, 3594 GLuint index ) 3595{ 3596 switch( file ) { 3597 case PROGRAM_UNDEFINED: 3598 return ureg_dst_undef(); 3599 3600 case PROGRAM_TEMPORARY: 3601 if (ureg_dst_is_undef(t->temps[index])) 3602 t->temps[index] = ureg_DECL_temporary( t->ureg ); 3603 3604 return t->temps[index]; 3605 3606 case PROGRAM_OUTPUT: 3607 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 3608 t->prevInstWrotePointSize = GL_TRUE; 3609 3610 if (t->procType == TGSI_PROCESSOR_VERTEX) 3611 assert(index < VERT_RESULT_MAX); 3612 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 3613 assert(index < FRAG_RESULT_MAX); 3614 else 3615 assert(index < GEOM_RESULT_MAX); 3616 3617 assert(t->outputMapping[index] < Elements(t->outputs)); 3618 3619 return t->outputs[t->outputMapping[index]]; 3620 3621 case PROGRAM_ADDRESS: 3622 return t->address[index]; 3623 3624 default: 3625 debug_assert( 0 ); 3626 return ureg_dst_undef(); 3627 } 3628} 3629 3630/** 3631 * Map a Mesa src register to a TGSI ureg_src register. 3632 */ 3633static struct ureg_src 3634src_register( struct st_translate *t, 3635 gl_register_file file, 3636 GLuint index ) 3637{ 3638 switch( file ) { 3639 case PROGRAM_UNDEFINED: 3640 return ureg_src_undef(); 3641 3642 case PROGRAM_TEMPORARY: 3643 assert(index >= 0); 3644 assert(index < Elements(t->temps)); 3645 if (ureg_dst_is_undef(t->temps[index])) 3646 t->temps[index] = ureg_DECL_temporary( t->ureg ); 3647 return ureg_src(t->temps[index]); 3648 3649 case PROGRAM_NAMED_PARAM: 3650 case PROGRAM_ENV_PARAM: 3651 case PROGRAM_LOCAL_PARAM: 3652 case PROGRAM_UNIFORM: 3653 assert(index >= 0); 3654 return t->constants[index]; 3655 case PROGRAM_STATE_VAR: 3656 case PROGRAM_CONSTANT: /* ie, immediate */ 3657 if (index < 0) 3658 return ureg_DECL_constant( t->ureg, 0 ); 3659 else 3660 return t->constants[index]; 3661 3662 case PROGRAM_INPUT: 3663 assert(t->inputMapping[index] < Elements(t->inputs)); 3664 return t->inputs[t->inputMapping[index]]; 3665 3666 case PROGRAM_OUTPUT: 3667 assert(t->outputMapping[index] < Elements(t->outputs)); 3668 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 3669 3670 case PROGRAM_ADDRESS: 3671 return ureg_src(t->address[index]); 3672 3673 case PROGRAM_SYSTEM_VALUE: 3674 assert(index < Elements(t->systemValues)); 3675 return t->systemValues[index]; 3676 3677 default: 3678 debug_assert( 0 ); 3679 return ureg_src_undef(); 3680 } 3681} 3682 3683/** 3684 * Create a TGSI ureg_dst register from an st_dst_reg. 3685 */ 3686static struct ureg_dst 3687translate_dst( struct st_translate *t, 3688 const st_dst_reg *dst_reg, 3689 boolean saturate ) 3690{ 3691 struct ureg_dst dst = dst_register( t, 3692 dst_reg->file, 3693 dst_reg->index ); 3694 3695 dst = ureg_writemask( dst, 3696 dst_reg->writemask ); 3697 3698 if (saturate) 3699 dst = ureg_saturate( dst ); 3700 3701 if (dst_reg->reladdr != NULL) 3702 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); 3703 3704 return dst; 3705} 3706 3707/** 3708 * Create a TGSI ureg_src register from an st_src_reg. 3709 */ 3710static struct ureg_src 3711translate_src( struct st_translate *t, 3712 const st_src_reg *src_reg ) 3713{ 3714 struct ureg_src src = src_register( t, src_reg->file, src_reg->index ); 3715 3716 src = ureg_swizzle( src, 3717 GET_SWZ( src_reg->swizzle, 0 ) & 0x3, 3718 GET_SWZ( src_reg->swizzle, 1 ) & 0x3, 3719 GET_SWZ( src_reg->swizzle, 2 ) & 0x3, 3720 GET_SWZ( src_reg->swizzle, 3 ) & 0x3); 3721 3722 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 3723 src = ureg_negate(src); 3724 3725 if (src_reg->reladdr != NULL) { 3726 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 3727 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 3728 * set the bit for src.Negate. So we have to do the operation manually 3729 * here to work around the compiler's problems. */ 3730 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 3731 struct ureg_src addr = ureg_src(t->address[0]); 3732 src.Indirect = 1; 3733 src.IndirectFile = addr.File; 3734 src.IndirectIndex = addr.Index; 3735 src.IndirectSwizzle = addr.SwizzleX; 3736 3737 if (src_reg->file != PROGRAM_INPUT && 3738 src_reg->file != PROGRAM_OUTPUT) { 3739 /* If src_reg->index was negative, it was set to zero in 3740 * src_register(). Reassign it now. But don't do this 3741 * for input/output regs since they get remapped while 3742 * const buffers don't. 3743 */ 3744 src.Index = src_reg->index; 3745 } 3746 } 3747 3748 return src; 3749} 3750 3751static void 3752compile_tgsi_instruction(struct st_translate *t, 3753 const struct glsl_to_tgsi_instruction *inst) 3754{ 3755 struct ureg_program *ureg = t->ureg; 3756 GLuint i; 3757 struct ureg_dst dst[1]; 3758 struct ureg_src src[4]; 3759 unsigned num_dst; 3760 unsigned num_src; 3761 3762 num_dst = num_inst_dst_regs( inst->op ); 3763 num_src = num_inst_src_regs( inst->op ); 3764 3765 if (num_dst) 3766 dst[0] = translate_dst( t, 3767 &inst->dst, 3768 inst->saturate); 3769 3770 for (i = 0; i < num_src; i++) 3771 src[i] = translate_src( t, &inst->src[i] ); 3772 3773 switch( inst->op ) { 3774 case TGSI_OPCODE_BGNLOOP: 3775 case TGSI_OPCODE_CAL: 3776 case TGSI_OPCODE_ELSE: 3777 case TGSI_OPCODE_ENDLOOP: 3778 case TGSI_OPCODE_IF: 3779 debug_assert(num_dst == 0); 3780 ureg_label_insn( ureg, 3781 inst->op, 3782 src, num_src, 3783 get_label( t, 3784 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 )); 3785 return; 3786 3787 case TGSI_OPCODE_TEX: 3788 case TGSI_OPCODE_TXB: 3789 case TGSI_OPCODE_TXD: 3790 case TGSI_OPCODE_TXL: 3791 case TGSI_OPCODE_TXP: 3792 src[num_src++] = t->samplers[inst->sampler]; 3793 ureg_tex_insn( ureg, 3794 inst->op, 3795 dst, num_dst, 3796 translate_texture_target( inst->tex_target, 3797 inst->tex_shadow ), 3798 src, num_src ); 3799 return; 3800 3801 case TGSI_OPCODE_SCS: 3802 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); 3803 ureg_insn( ureg, 3804 inst->op, 3805 dst, num_dst, 3806 src, num_src ); 3807 break; 3808 3809 default: 3810 ureg_insn( ureg, 3811 inst->op, 3812 dst, num_dst, 3813 src, num_src ); 3814 break; 3815 } 3816} 3817 3818/** 3819 * Emit the TGSI instructions to adjust the WPOS pixel center convention 3820 * Basically, add (adjX, adjY) to the fragment position. 3821 */ 3822static void 3823emit_adjusted_wpos( struct st_translate *t, 3824 const struct gl_program *program, 3825 GLfloat adjX, GLfloat adjY) 3826{ 3827 struct ureg_program *ureg = t->ureg; 3828 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 3829 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 3830 3831 /* Note that we bias X and Y and pass Z and W through unchanged. 3832 * The shader might also use gl_FragCoord.w and .z. 3833 */ 3834 ureg_ADD(ureg, wpos_temp, wpos_input, 3835 ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); 3836 3837 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 3838} 3839 3840 3841/** 3842 * Emit the TGSI instructions for inverting the WPOS y coordinate. 3843 * This code is unavoidable because it also depends on whether 3844 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 3845 */ 3846static void 3847emit_wpos_inversion( struct st_translate *t, 3848 const struct gl_program *program, 3849 boolean invert) 3850{ 3851 struct ureg_program *ureg = t->ureg; 3852 3853 /* Fragment program uses fragment position input. 3854 * Need to replace instances of INPUT[WPOS] with temp T 3855 * where T = INPUT[WPOS] by y is inverted. 3856 */ 3857 static const gl_state_index wposTransformState[STATE_LENGTH] 3858 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 3859 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 3860 3861 /* XXX: note we are modifying the incoming shader here! Need to 3862 * do this before emitting the constant decls below, or this 3863 * will be missed: 3864 */ 3865 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 3866 wposTransformState); 3867 3868 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 3869 struct ureg_dst wpos_temp; 3870 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 3871 3872 /* MOV wpos_temp, input[wpos] 3873 */ 3874 if (wpos_input.File == TGSI_FILE_TEMPORARY) 3875 wpos_temp = ureg_dst(wpos_input); 3876 else { 3877 wpos_temp = ureg_DECL_temporary( ureg ); 3878 ureg_MOV( ureg, wpos_temp, wpos_input ); 3879 } 3880 3881 if (invert) { 3882 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 3883 */ 3884 ureg_MAD( ureg, 3885 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 3886 wpos_input, 3887 ureg_scalar(wpostrans, 0), 3888 ureg_scalar(wpostrans, 1)); 3889 } else { 3890 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 3891 */ 3892 ureg_MAD( ureg, 3893 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 3894 wpos_input, 3895 ureg_scalar(wpostrans, 2), 3896 ureg_scalar(wpostrans, 3)); 3897 } 3898 3899 /* Use wpos_temp as position input from here on: 3900 */ 3901 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 3902} 3903 3904 3905/** 3906 * Emit fragment position/ooordinate code. 3907 */ 3908static void 3909emit_wpos(struct st_context *st, 3910 struct st_translate *t, 3911 const struct gl_program *program, 3912 struct ureg_program *ureg) 3913{ 3914 const struct gl_fragment_program *fp = 3915 (const struct gl_fragment_program *) program; 3916 struct pipe_screen *pscreen = st->pipe->screen; 3917 boolean invert = FALSE; 3918 3919 if (fp->OriginUpperLeft) { 3920 /* Fragment shader wants origin in upper-left */ 3921 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 3922 /* the driver supports upper-left origin */ 3923 } 3924 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 3925 /* the driver supports lower-left origin, need to invert Y */ 3926 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 3927 invert = TRUE; 3928 } 3929 else 3930 assert(0); 3931 } 3932 else { 3933 /* Fragment shader wants origin in lower-left */ 3934 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 3935 /* the driver supports lower-left origin */ 3936 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 3937 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 3938 /* the driver supports upper-left origin, need to invert Y */ 3939 invert = TRUE; 3940 else 3941 assert(0); 3942 } 3943 3944 if (fp->PixelCenterInteger) { 3945 /* Fragment shader wants pixel center integer */ 3946 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 3947 /* the driver supports pixel center integer */ 3948 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 3949 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 3950 /* the driver supports pixel center half integer, need to bias X,Y */ 3951 emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); 3952 else 3953 assert(0); 3954 } 3955 else { 3956 /* Fragment shader wants pixel center half integer */ 3957 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 3958 /* the driver supports pixel center half integer */ 3959 } 3960 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 3961 /* the driver supports pixel center integer, need to bias X,Y */ 3962 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 3963 emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); 3964 } 3965 else 3966 assert(0); 3967 } 3968 3969 /* we invert after adjustment so that we avoid the MOV to temporary, 3970 * and reuse the adjustment ADD instead */ 3971 emit_wpos_inversion(t, program, invert); 3972} 3973 3974/** 3975 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 3976 * TGSI uses +1 for front, -1 for back. 3977 * This function converts the TGSI value to the GL value. Simply clamping/ 3978 * saturating the value to [0,1] does the job. 3979 */ 3980static void 3981emit_face_var(struct st_translate *t) 3982{ 3983 struct ureg_program *ureg = t->ureg; 3984 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 3985 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 3986 3987 /* MOV_SAT face_temp, input[face] */ 3988 face_temp = ureg_saturate(face_temp); 3989 ureg_MOV(ureg, face_temp, face_input); 3990 3991 /* Use face_temp as face input from here on: */ 3992 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 3993} 3994 3995static void 3996emit_edgeflags(struct st_translate *t) 3997{ 3998 struct ureg_program *ureg = t->ureg; 3999 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4000 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4001 4002 ureg_MOV(ureg, edge_dst, edge_src); 4003} 4004 4005/** 4006 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4007 * \param program the program to translate 4008 * \param numInputs number of input registers used 4009 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4010 * input indexes 4011 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4012 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4013 * each input 4014 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4015 * \param numOutputs number of output registers used 4016 * \param outputMapping maps Mesa fragment program outputs to TGSI 4017 * generic outputs 4018 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4019 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4020 * each output 4021 * 4022 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4023 */ 4024extern "C" enum pipe_error 4025st_translate_program( 4026 struct gl_context *ctx, 4027 uint procType, 4028 struct ureg_program *ureg, 4029 glsl_to_tgsi_visitor *program, 4030 const struct gl_program *proginfo, 4031 GLuint numInputs, 4032 const GLuint inputMapping[], 4033 const ubyte inputSemanticName[], 4034 const ubyte inputSemanticIndex[], 4035 const GLuint interpMode[], 4036 GLuint numOutputs, 4037 const GLuint outputMapping[], 4038 const ubyte outputSemanticName[], 4039 const ubyte outputSemanticIndex[], 4040 boolean passthrough_edgeflags ) 4041{ 4042 struct st_translate translate, *t; 4043 unsigned i; 4044 enum pipe_error ret = PIPE_OK; 4045 4046 assert(numInputs <= Elements(t->inputs)); 4047 assert(numOutputs <= Elements(t->outputs)); 4048 4049 t = &translate; 4050 memset(t, 0, sizeof *t); 4051 4052 t->procType = procType; 4053 t->inputMapping = inputMapping; 4054 t->outputMapping = outputMapping; 4055 t->ureg = ureg; 4056 t->pointSizeOutIndex = -1; 4057 t->prevInstWrotePointSize = GL_FALSE; 4058 4059 /* 4060 * Declare input attributes. 4061 */ 4062 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4063 for (i = 0; i < numInputs; i++) { 4064 t->inputs[i] = ureg_DECL_fs_input(ureg, 4065 inputSemanticName[i], 4066 inputSemanticIndex[i], 4067 interpMode[i]); 4068 } 4069 4070 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4071 /* Must do this after setting up t->inputs, and before 4072 * emitting constant references, below: 4073 */ 4074 emit_wpos(st_context(ctx), t, proginfo, ureg); 4075 } 4076 4077 if (proginfo->InputsRead & FRAG_BIT_FACE) 4078 emit_face_var(t); 4079 4080 /* 4081 * Declare output attributes. 4082 */ 4083 for (i = 0; i < numOutputs; i++) { 4084 switch (outputSemanticName[i]) { 4085 case TGSI_SEMANTIC_POSITION: 4086 t->outputs[i] = ureg_DECL_output( ureg, 4087 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 4088 outputSemanticIndex[i] ); 4089 4090 t->outputs[i] = ureg_writemask( t->outputs[i], 4091 TGSI_WRITEMASK_Z ); 4092 break; 4093 case TGSI_SEMANTIC_STENCIL: 4094 t->outputs[i] = ureg_DECL_output( ureg, 4095 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4096 outputSemanticIndex[i] ); 4097 t->outputs[i] = ureg_writemask( t->outputs[i], 4098 TGSI_WRITEMASK_Y ); 4099 break; 4100 case TGSI_SEMANTIC_COLOR: 4101 t->outputs[i] = ureg_DECL_output( ureg, 4102 TGSI_SEMANTIC_COLOR, 4103 outputSemanticIndex[i] ); 4104 break; 4105 default: 4106 debug_assert(0); 4107 return PIPE_ERROR_BAD_INPUT; 4108 } 4109 } 4110 } 4111 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4112 for (i = 0; i < numInputs; i++) { 4113 t->inputs[i] = ureg_DECL_gs_input(ureg, 4114 i, 4115 inputSemanticName[i], 4116 inputSemanticIndex[i]); 4117 } 4118 4119 for (i = 0; i < numOutputs; i++) { 4120 t->outputs[i] = ureg_DECL_output( ureg, 4121 outputSemanticName[i], 4122 outputSemanticIndex[i] ); 4123 } 4124 } 4125 else { 4126 assert(procType == TGSI_PROCESSOR_VERTEX); 4127 4128 for (i = 0; i < numInputs; i++) { 4129 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4130 } 4131 4132 for (i = 0; i < numOutputs; i++) { 4133 t->outputs[i] = ureg_DECL_output( ureg, 4134 outputSemanticName[i], 4135 outputSemanticIndex[i] ); 4136 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { 4137 /* Writing to the point size result register requires special 4138 * handling to implement clamping. 4139 */ 4140 static const gl_state_index pointSizeClampState[STATE_LENGTH] 4141 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4142 /* XXX: note we are modifying the incoming shader here! Need to 4143 * do this before emitting the constant decls below, or this 4144 * will be missed. 4145 */ 4146 unsigned pointSizeClampConst = 4147 _mesa_add_state_reference(proginfo->Parameters, 4148 pointSizeClampState); 4149 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); 4150 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); 4151 t->pointSizeResult = t->outputs[i]; 4152 t->pointSizeOutIndex = i; 4153 t->outputs[i] = psizregtemp; 4154 } 4155 } 4156 if (passthrough_edgeflags) 4157 emit_edgeflags(t); 4158 } 4159 4160 /* Declare address register. 4161 */ 4162 if (program->num_address_regs > 0) { 4163 debug_assert( program->num_address_regs == 1 ); 4164 t->address[0] = ureg_DECL_address( ureg ); 4165 } 4166 4167 /* Declare misc input registers 4168 */ 4169 { 4170 GLbitfield sysInputs = proginfo->SystemValuesRead; 4171 unsigned numSys = 0; 4172 for (i = 0; sysInputs; i++) { 4173 if (sysInputs & (1 << i)) { 4174 unsigned semName = mesa_sysval_to_semantic[i]; 4175 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4176 numSys++; 4177 sysInputs &= ~(1 << i); 4178 } 4179 } 4180 } 4181 4182 if (program->indirect_addr_temps) { 4183 /* If temps are accessed with indirect addressing, declare temporaries 4184 * in sequential order. Else, we declare them on demand elsewhere. 4185 * (Note: the number of temporaries is equal to program->next_temp) 4186 */ 4187 for (i = 0; i < (unsigned)program->next_temp; i++) { 4188 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4189 t->temps[i] = ureg_DECL_temporary( t->ureg ); 4190 } 4191 } 4192 4193 /* Emit constants and immediates. Mesa uses a single index space 4194 * for these, so we put all the translated regs in t->constants. 4195 * XXX: this entire if block depends on proginfo->Parameters from Mesa IR 4196 */ 4197 if (proginfo->Parameters) { 4198 t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); 4199 if (t->constants == NULL) { 4200 ret = PIPE_ERROR_OUT_OF_MEMORY; 4201 goto out; 4202 } 4203 4204 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4205 switch (proginfo->Parameters->Parameters[i].Type) { 4206 case PROGRAM_ENV_PARAM: 4207 case PROGRAM_LOCAL_PARAM: 4208 case PROGRAM_STATE_VAR: 4209 case PROGRAM_NAMED_PARAM: 4210 case PROGRAM_UNIFORM: 4211 t->constants[i] = ureg_DECL_constant( ureg, i ); 4212 break; 4213 4214 /* Emit immediates only when there's no indirect addressing of 4215 * the const buffer. 4216 * FIXME: Be smarter and recognize param arrays: 4217 * indirect addressing is only valid within the referenced 4218 * array. 4219 */ 4220 case PROGRAM_CONSTANT: 4221 if (program->indirect_addr_consts) 4222 t->constants[i] = ureg_DECL_constant( ureg, i ); 4223 else 4224 switch(proginfo->Parameters->Parameters[i].DataType) 4225 { 4226 case GL_FLOAT: 4227 case GL_FLOAT_VEC2: 4228 case GL_FLOAT_VEC3: 4229 case GL_FLOAT_VEC4: 4230 t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4); 4231 break; 4232 case GL_INT: 4233 case GL_INT_VEC2: 4234 case GL_INT_VEC3: 4235 case GL_INT_VEC4: 4236 t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4); 4237 break; 4238 case GL_UNSIGNED_INT: 4239 case GL_UNSIGNED_INT_VEC2: 4240 case GL_UNSIGNED_INT_VEC3: 4241 case GL_UNSIGNED_INT_VEC4: 4242 case GL_BOOL: 4243 case GL_BOOL_VEC2: 4244 case GL_BOOL_VEC3: 4245 case GL_BOOL_VEC4: 4246 t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4); 4247 break; 4248 default: 4249 assert(!"should not get here"); 4250 } 4251 break; 4252 default: 4253 break; 4254 } 4255 } 4256 } 4257 4258 /* texture samplers */ 4259 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4260 if (program->samplers_used & (1 << i)) { 4261 t->samplers[i] = ureg_DECL_sampler( ureg, i ); 4262 } 4263 } 4264 4265 /* Emit each instruction in turn: 4266 */ 4267 foreach_iter(exec_list_iterator, iter, program->instructions) { 4268 set_insn_start( t, ureg_get_instruction_number( ureg )); 4269 compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() ); 4270 4271 if (t->prevInstWrotePointSize && proginfo->Id) { 4272 /* The previous instruction wrote to the (fake) vertex point size 4273 * result register. Now we need to clamp that value to the min/max 4274 * point size range, putting the result into the real point size 4275 * register. 4276 * Note that we can't do this easily at the end of program due to 4277 * possible early return. 4278 */ 4279 set_insn_start( t, ureg_get_instruction_number( ureg )); 4280 ureg_MAX( t->ureg, 4281 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), 4282 ureg_src(t->outputs[t->pointSizeOutIndex]), 4283 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 4284 ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), 4285 ureg_src(t->outputs[t->pointSizeOutIndex]), 4286 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 4287 } 4288 t->prevInstWrotePointSize = GL_FALSE; 4289 } 4290 4291 /* Fix up all emitted labels: 4292 */ 4293 for (i = 0; i < t->labels_count; i++) { 4294 ureg_fixup_label( ureg, 4295 t->labels[i].token, 4296 t->insn[t->labels[i].branch_target] ); 4297 } 4298 4299out: 4300 FREE(t->insn); 4301 FREE(t->labels); 4302 FREE(t->constants); 4303 4304 if (t->error) { 4305 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4306 } 4307 4308 return ret; 4309} 4310/* ----------------------------- End TGSI code ------------------------------ */ 4311 4312/** 4313 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4314 * generating Mesa IR. 4315 */ 4316static struct gl_program * 4317get_mesa_program(struct gl_context *ctx, 4318 struct gl_shader_program *shader_program, 4319 struct gl_shader *shader) 4320{ 4321 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); 4322 struct gl_program *prog; 4323 GLenum target; 4324 const char *target_string; 4325 GLboolean progress; 4326 struct gl_shader_compiler_options *options = 4327 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4328 4329 switch (shader->Type) { 4330 case GL_VERTEX_SHADER: 4331 target = GL_VERTEX_PROGRAM_ARB; 4332 target_string = "vertex"; 4333 break; 4334 case GL_FRAGMENT_SHADER: 4335 target = GL_FRAGMENT_PROGRAM_ARB; 4336 target_string = "fragment"; 4337 break; 4338 case GL_GEOMETRY_SHADER: 4339 target = GL_GEOMETRY_PROGRAM_NV; 4340 target_string = "geometry"; 4341 break; 4342 default: 4343 assert(!"should not be reached"); 4344 return NULL; 4345 } 4346 4347 validate_ir_tree(shader->ir); 4348 4349 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4350 if (!prog) 4351 return NULL; 4352 prog->Parameters = _mesa_new_parameter_list(); 4353 prog->Varying = _mesa_new_parameter_list(); 4354 prog->Attributes = _mesa_new_parameter_list(); 4355 v->ctx = ctx; 4356 v->prog = prog; 4357 v->shader_program = shader_program; 4358 v->options = options; 4359 v->glsl_version = ctx->Const.GLSLVersion; 4360 4361 add_uniforms_to_parameters_list(shader_program, shader, prog); 4362 4363 /* Emit intermediate IR for main(). */ 4364 visit_exec_list(shader->ir, v); 4365 4366 /* Now emit bodies for any functions that were used. */ 4367 do { 4368 progress = GL_FALSE; 4369 4370 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4371 function_entry *entry = (function_entry *)iter.get(); 4372 4373 if (!entry->bgn_inst) { 4374 v->current_function = entry; 4375 4376 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4377 entry->bgn_inst->function = entry; 4378 4379 visit_exec_list(&entry->sig->body, v); 4380 4381 glsl_to_tgsi_instruction *last; 4382 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4383 if (last->op != TGSI_OPCODE_RET) 4384 v->emit(NULL, TGSI_OPCODE_RET); 4385 4386 glsl_to_tgsi_instruction *end; 4387 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4388 end->function = entry; 4389 4390 progress = GL_TRUE; 4391 } 4392 } 4393 } while (progress); 4394 4395#if 0 4396 /* Print out some information (for debugging purposes) used by the 4397 * optimization passes. */ 4398 for (i=0; i < v->next_temp; i++) { 4399 int fr = v->get_first_temp_read(i); 4400 int fw = v->get_first_temp_write(i); 4401 int lr = v->get_last_temp_read(i); 4402 int lw = v->get_last_temp_write(i); 4403 4404 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4405 assert(fw <= fr); 4406 } 4407#endif 4408 4409 /* Remove reads to output registers, and to varyings in vertex shaders. */ 4410 v->remove_output_reads(PROGRAM_OUTPUT); 4411 if (target == GL_VERTEX_PROGRAM_ARB) 4412 v->remove_output_reads(PROGRAM_VARYING); 4413 4414 /* Perform the simplify_cmp optimization, which is required by r300g. */ 4415 v->simplify_cmp(); 4416 4417 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. 4418 * FIXME: These passes to optimize temporary registers don't work when there 4419 * is indirect addressing of the temporary register space. We need proper 4420 * array support so that we don't have to give up these passes in every 4421 * shader that uses arrays. 4422 */ 4423 if (!v->indirect_addr_temps) { 4424 v->copy_propagate(); 4425 while (v->eliminate_dead_code_advanced()); 4426 v->eliminate_dead_code(); 4427 v->merge_registers(); 4428 v->renumber_registers(); 4429 } 4430 4431 /* Write the END instruction. */ 4432 v->emit(NULL, TGSI_OPCODE_END); 4433 4434 if (ctx->Shader.Flags & GLSL_DUMP) { 4435 printf("\n"); 4436 printf("GLSL IR for linked %s program %d:\n", target_string, 4437 shader_program->Name); 4438 _mesa_print_ir(shader->ir, NULL); 4439 printf("\n"); 4440 printf("\n"); 4441 } 4442 4443 prog->Instructions = NULL; 4444 prog->NumInstructions = 0; 4445 4446 do_set_program_inouts(shader->ir, prog); 4447 count_resources(v, prog); 4448 4449 check_resources(ctx, shader_program, v, prog); 4450 4451 _mesa_reference_program(ctx, &shader->Program, prog); 4452 4453 struct st_vertex_program *stvp; 4454 struct st_fragment_program *stfp; 4455 struct st_geometry_program *stgp; 4456 4457 switch (shader->Type) { 4458 case GL_VERTEX_SHADER: 4459 stvp = (struct st_vertex_program *)prog; 4460 stvp->glsl_to_tgsi = v; 4461 break; 4462 case GL_FRAGMENT_SHADER: 4463 stfp = (struct st_fragment_program *)prog; 4464 stfp->glsl_to_tgsi = v; 4465 break; 4466 case GL_GEOMETRY_SHADER: 4467 stgp = (struct st_geometry_program *)prog; 4468 stgp->glsl_to_tgsi = v; 4469 break; 4470 default: 4471 assert(!"should not be reached"); 4472 return NULL; 4473 } 4474 4475 return prog; 4476} 4477 4478extern "C" { 4479 4480struct gl_shader * 4481st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 4482{ 4483 struct gl_shader *shader; 4484 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 4485 type == GL_GEOMETRY_SHADER_ARB); 4486 shader = rzalloc(NULL, struct gl_shader); 4487 if (shader) { 4488 shader->Type = type; 4489 shader->Name = name; 4490 _mesa_init_shader(ctx, shader); 4491 } 4492 return shader; 4493} 4494 4495struct gl_shader_program * 4496st_new_shader_program(struct gl_context *ctx, GLuint name) 4497{ 4498 struct gl_shader_program *shProg; 4499 shProg = rzalloc(NULL, struct gl_shader_program); 4500 if (shProg) { 4501 shProg->Name = name; 4502 _mesa_init_shader_program(ctx, shProg); 4503 } 4504 return shProg; 4505} 4506 4507/** 4508 * Link a shader. 4509 * Called via ctx->Driver.LinkShader() 4510 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 4511 * with code lowering and other optimizations. 4512 */ 4513GLboolean 4514st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4515{ 4516 assert(prog->LinkStatus); 4517 4518 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4519 if (prog->_LinkedShaders[i] == NULL) 4520 continue; 4521 4522 bool progress; 4523 exec_list *ir = prog->_LinkedShaders[i]->ir; 4524 const struct gl_shader_compiler_options *options = 4525 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 4526 4527 do { 4528 progress = false; 4529 4530 /* Lowering */ 4531 do_mat_op_to_vec(ir); 4532 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 4533 | LOG_TO_LOG2 4534 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 4535 4536 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 4537 4538 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; 4539 4540 progress = lower_quadop_vector(ir, true) || progress; 4541 4542 if (options->EmitNoIfs) { 4543 progress = lower_discard(ir) || progress; 4544 progress = lower_if_to_cond_assign(ir) || progress; 4545 } 4546 4547 if (options->EmitNoNoise) 4548 progress = lower_noise(ir) || progress; 4549 4550 /* If there are forms of indirect addressing that the driver 4551 * cannot handle, perform the lowering pass. 4552 */ 4553 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 4554 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 4555 progress = 4556 lower_variable_index_to_cond_assign(ir, 4557 options->EmitNoIndirectInput, 4558 options->EmitNoIndirectOutput, 4559 options->EmitNoIndirectTemp, 4560 options->EmitNoIndirectUniform) 4561 || progress; 4562 4563 progress = do_vec_index_to_cond_assign(ir) || progress; 4564 } while (progress); 4565 4566 validate_ir_tree(ir); 4567 } 4568 4569 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4570 struct gl_program *linked_prog; 4571 4572 if (prog->_LinkedShaders[i] == NULL) 4573 continue; 4574 4575 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 4576 4577 if (linked_prog) { 4578 bool ok = true; 4579 4580 switch (prog->_LinkedShaders[i]->Type) { 4581 case GL_VERTEX_SHADER: 4582 _mesa_reference_vertprog(ctx, &prog->VertexProgram, 4583 (struct gl_vertex_program *)linked_prog); 4584 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, 4585 linked_prog); 4586 break; 4587 case GL_FRAGMENT_SHADER: 4588 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, 4589 (struct gl_fragment_program *)linked_prog); 4590 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, 4591 linked_prog); 4592 break; 4593 case GL_GEOMETRY_SHADER: 4594 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, 4595 (struct gl_geometry_program *)linked_prog); 4596 ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, 4597 linked_prog); 4598 break; 4599 } 4600 if (!ok) { 4601 return GL_FALSE; 4602 } 4603 } 4604 4605 _mesa_reference_program(ctx, &linked_prog, NULL); 4606 } 4607 4608 return GL_TRUE; 4609} 4610 4611 4612/** 4613 * Link a GLSL shader program. Called via glLinkProgram(). 4614 */ 4615void 4616st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4617{ 4618 unsigned int i; 4619 4620 _mesa_clear_shader_program_data(ctx, prog); 4621 4622 prog->LinkStatus = GL_TRUE; 4623 4624 for (i = 0; i < prog->NumShaders; i++) { 4625 if (!prog->Shaders[i]->CompileStatus) { 4626 fail_link(prog, "linking with uncompiled shader"); 4627 prog->LinkStatus = GL_FALSE; 4628 } 4629 } 4630 4631 prog->Varying = _mesa_new_parameter_list(); 4632 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); 4633 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); 4634 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); 4635 4636 if (prog->LinkStatus) { 4637 link_shaders(ctx, prog); 4638 } 4639 4640 if (prog->LinkStatus) { 4641 if (!ctx->Driver.LinkShader(ctx, prog)) { 4642 prog->LinkStatus = GL_FALSE; 4643 } 4644 } 4645 4646 set_uniform_initializers(ctx, prog); 4647 4648 if (ctx->Shader.Flags & GLSL_DUMP) { 4649 if (!prog->LinkStatus) { 4650 printf("GLSL shader program %d failed to link\n", prog->Name); 4651 } 4652 4653 if (prog->InfoLog && prog->InfoLog[0] != 0) { 4654 printf("GLSL shader program %d info log:\n", prog->Name); 4655 printf("%s\n", prog->InfoLog); 4656 } 4657 } 4658} 4659 4660} /* extern "C" */ 4661