st_glsl_to_tgsi.cpp revision 5768ed6429937940bd48f5de4f8383273952880a
1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to Mesa's gl_program representation and to TGSI. 31 */ 32 33#include <stdio.h> 34#include "main/compiler.h" 35#include "ir.h" 36#include "ir_visitor.h" 37#include "ir_print_visitor.h" 38#include "ir_expression_flattening.h" 39#include "glsl_types.h" 40#include "glsl_parser_extras.h" 41#include "../glsl/program.h" 42#include "ir_optimization.h" 43#include "ast.h" 44 45extern "C" { 46#include "main/mtypes.h" 47#include "main/shaderapi.h" 48#include "main/shaderobj.h" 49#include "main/uniforms.h" 50#include "program/hash_table.h" 51#include "program/prog_instruction.h" 52#include "program/prog_optimize.h" 53#include "program/prog_print.h" 54#include "program/program.h" 55#include "program/prog_uniform.h" 56#include "program/prog_parameter.h" 57#include "program/sampler.h" 58 59#include "pipe/p_compiler.h" 60#include "pipe/p_context.h" 61#include "pipe/p_screen.h" 62#include "pipe/p_shader_tokens.h" 63#include "pipe/p_state.h" 64#include "util/u_math.h" 65#include "tgsi/tgsi_ureg.h" 66#include "tgsi/tgsi_dump.h" 67#include "st_context.h" 68#include "st_program.h" 69#include "st_glsl_to_tgsi.h" 70#include "st_mesa_to_tgsi.h" 71 72#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 73 (1 << PROGRAM_ENV_PARAM) | \ 74 (1 << PROGRAM_STATE_VAR) | \ 75 (1 << PROGRAM_NAMED_PARAM) | \ 76 (1 << PROGRAM_CONSTANT) | \ 77 (1 << PROGRAM_UNIFORM)) 78} 79 80class st_src_reg; 81class st_dst_reg; 82 83static int swizzle_for_size(int size); 84 85/** 86 * This struct is a corresponding struct to Mesa prog_src_register, with 87 * wider fields. 88 */ 89class st_src_reg { 90public: 91 st_src_reg(gl_register_file file, int index, const glsl_type *type) 92 { 93 this->file = file; 94 this->index = index; 95 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 96 this->swizzle = swizzle_for_size(type->vector_elements); 97 else 98 this->swizzle = SWIZZLE_XYZW; 99 this->negate = 0; 100 this->reladdr = NULL; 101 } 102 103 st_src_reg(gl_register_file file, int index) 104 { 105 this->file = file; 106 this->index = index; 107 this->swizzle = SWIZZLE_XYZW; 108 this->negate = 0; 109 this->reladdr = NULL; 110 } 111 112 st_src_reg() 113 { 114 this->file = PROGRAM_UNDEFINED; 115 this->index = 0; 116 this->swizzle = 0; 117 this->negate = 0; 118 this->reladdr = NULL; 119 } 120 121 explicit st_src_reg(st_dst_reg reg); 122 123 gl_register_file file; /**< PROGRAM_* from Mesa */ 124 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 125 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 126 int negate; /**< NEGATE_XYZW mask from mesa */ 127 /** Register index should be offset by the integer in this reg. */ 128 st_src_reg *reladdr; 129}; 130 131class st_dst_reg { 132public: 133 st_dst_reg(gl_register_file file, int writemask) 134 { 135 this->file = file; 136 this->index = 0; 137 this->writemask = writemask; 138 this->cond_mask = COND_TR; 139 this->reladdr = NULL; 140 } 141 142 st_dst_reg() 143 { 144 this->file = PROGRAM_UNDEFINED; 145 this->index = 0; 146 this->writemask = 0; 147 this->cond_mask = COND_TR; 148 this->reladdr = NULL; 149 } 150 151 explicit st_dst_reg(st_src_reg reg); 152 153 gl_register_file file; /**< PROGRAM_* from Mesa */ 154 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 155 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 156 GLuint cond_mask:4; 157 /** Register index should be offset by the integer in this reg. */ 158 st_src_reg *reladdr; 159}; 160 161st_src_reg::st_src_reg(st_dst_reg reg) 162{ 163 this->file = reg.file; 164 this->index = reg.index; 165 this->swizzle = SWIZZLE_XYZW; 166 this->negate = 0; 167 this->reladdr = NULL; 168} 169 170st_dst_reg::st_dst_reg(st_src_reg reg) 171{ 172 this->file = reg.file; 173 this->index = reg.index; 174 this->writemask = WRITEMASK_XYZW; 175 this->cond_mask = COND_TR; 176 this->reladdr = reg.reladdr; 177} 178 179class glsl_to_tgsi_instruction : public exec_node { 180public: 181 /* Callers of this ralloc-based new need not call delete. It's 182 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 183 static void* operator new(size_t size, void *ctx) 184 { 185 void *node; 186 187 node = rzalloc_size(ctx, size); 188 assert(node != NULL); 189 190 return node; 191 } 192 193 enum prog_opcode op; 194 st_dst_reg dst; 195 st_src_reg src[3]; 196 /** Pointer to the ir source this tree came from for debugging */ 197 ir_instruction *ir; 198 GLboolean cond_update; 199 bool saturate; 200 int sampler; /**< sampler index */ 201 int tex_target; /**< One of TEXTURE_*_INDEX */ 202 GLboolean tex_shadow; 203 204 class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */ 205}; 206 207class variable_storage : public exec_node { 208public: 209 variable_storage(ir_variable *var, gl_register_file file, int index) 210 : file(file), index(index), var(var) 211 { 212 /* empty */ 213 } 214 215 gl_register_file file; 216 int index; 217 ir_variable *var; /* variable that maps to this, if any */ 218}; 219 220class function_entry : public exec_node { 221public: 222 ir_function_signature *sig; 223 224 /** 225 * identifier of this function signature used by the program. 226 * 227 * At the point that Mesa instructions for function calls are 228 * generated, we don't know the address of the first instruction of 229 * the function body. So we make the BranchTarget that is called a 230 * small integer and rewrite them during set_branchtargets(). 231 */ 232 int sig_id; 233 234 /** 235 * Pointer to first instruction of the function body. 236 * 237 * Set during function body emits after main() is processed. 238 */ 239 glsl_to_tgsi_instruction *bgn_inst; 240 241 /** 242 * Index of the first instruction of the function body in actual 243 * Mesa IR. 244 * 245 * Set after convertion from glsl_to_tgsi_instruction to prog_instruction. 246 */ 247 int inst; 248 249 /** Storage for the return value. */ 250 st_src_reg return_reg; 251}; 252 253class glsl_to_tgsi_visitor : public ir_visitor { 254public: 255 glsl_to_tgsi_visitor(); 256 ~glsl_to_tgsi_visitor(); 257 258 function_entry *current_function; 259 260 struct gl_context *ctx; 261 struct gl_program *prog; 262 struct gl_shader_program *shader_program; 263 struct gl_shader_compiler_options *options; 264 265 int next_temp; 266 267 int num_address_regs; 268 int samplers_used; 269 bool indirect_addr_temps; 270 bool indirect_addr_consts; 271 272 variable_storage *find_variable_storage(ir_variable *var); 273 274 function_entry *get_function_signature(ir_function_signature *sig); 275 276 st_src_reg get_temp(const glsl_type *type); 277 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 278 279 st_src_reg st_src_reg_for_float(float val); 280 281 /** 282 * \name Visit methods 283 * 284 * As typical for the visitor pattern, there must be one \c visit method for 285 * each concrete subclass of \c ir_instruction. Virtual base classes within 286 * the hierarchy should not have \c visit methods. 287 */ 288 /*@{*/ 289 virtual void visit(ir_variable *); 290 virtual void visit(ir_loop *); 291 virtual void visit(ir_loop_jump *); 292 virtual void visit(ir_function_signature *); 293 virtual void visit(ir_function *); 294 virtual void visit(ir_expression *); 295 virtual void visit(ir_swizzle *); 296 virtual void visit(ir_dereference_variable *); 297 virtual void visit(ir_dereference_array *); 298 virtual void visit(ir_dereference_record *); 299 virtual void visit(ir_assignment *); 300 virtual void visit(ir_constant *); 301 virtual void visit(ir_call *); 302 virtual void visit(ir_return *); 303 virtual void visit(ir_discard *); 304 virtual void visit(ir_texture *); 305 virtual void visit(ir_if *); 306 /*@}*/ 307 308 st_src_reg result; 309 310 /** List of variable_storage */ 311 exec_list variables; 312 313 /** List of function_entry */ 314 exec_list function_signatures; 315 int next_signature_id; 316 317 /** List of glsl_to_tgsi_instruction */ 318 exec_list instructions; 319 320 glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op); 321 322 glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, 323 st_dst_reg dst, st_src_reg src0); 324 325 glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, 326 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 327 328 glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, 329 st_dst_reg dst, 330 st_src_reg src0, st_src_reg src1, st_src_reg src2); 331 332 /** 333 * Emit the correct dot-product instruction for the type of arguments 334 */ 335 void emit_dp(ir_instruction *ir, 336 st_dst_reg dst, 337 st_src_reg src0, 338 st_src_reg src1, 339 unsigned elements); 340 341 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 342 st_dst_reg dst, st_src_reg src0); 343 344 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 345 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 346 347 void emit_scs(ir_instruction *ir, enum prog_opcode op, 348 st_dst_reg dst, const st_src_reg &src); 349 350 GLboolean try_emit_mad(ir_expression *ir, 351 int mul_operand); 352 GLboolean try_emit_sat(ir_expression *ir); 353 354 void emit_swz(ir_expression *ir); 355 356 bool process_move_condition(ir_rvalue *ir); 357 358 void remove_output_reads(gl_register_file type); 359 360 void rename_temp_register(int index, int new_index); 361 int get_first_temp_read(int index); 362 int get_first_temp_write(int index); 363 int get_last_temp_read(int index); 364 int get_last_temp_write(int index); 365 366 void copy_propagate(void); 367 void eliminate_dead_code(void); 368 void merge_registers(void); 369 void renumber_registers(void); 370 371 void *mem_ctx; 372}; 373 374static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); 375 376static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); 377 378static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); 379 380static void 381fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 382 383static void 384fail_link(struct gl_shader_program *prog, const char *fmt, ...) 385{ 386 va_list args; 387 va_start(args, fmt); 388 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 389 va_end(args); 390 391 prog->LinkStatus = GL_FALSE; 392} 393 394static int 395swizzle_for_size(int size) 396{ 397 int size_swizzles[4] = { 398 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 399 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 400 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 401 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 402 }; 403 404 assert((size >= 1) && (size <= 4)); 405 return size_swizzles[size - 1]; 406} 407 408glsl_to_tgsi_instruction * 409glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, 410 st_dst_reg dst, 411 st_src_reg src0, st_src_reg src1, st_src_reg src2) 412{ 413 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 414 int num_reladdr = 0, i; 415 416 /* If we have to do relative addressing, we want to load the ARL 417 * reg directly for one of the regs, and preload the other reladdr 418 * sources into temps. 419 */ 420 num_reladdr += dst.reladdr != NULL; 421 num_reladdr += src0.reladdr != NULL; 422 num_reladdr += src1.reladdr != NULL; 423 num_reladdr += src2.reladdr != NULL; 424 425 reladdr_to_temp(ir, &src2, &num_reladdr); 426 reladdr_to_temp(ir, &src1, &num_reladdr); 427 reladdr_to_temp(ir, &src0, &num_reladdr); 428 429 if (dst.reladdr) { 430 emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); 431 num_reladdr--; 432 } 433 assert(num_reladdr == 0); 434 435 inst->op = op; 436 inst->dst = dst; 437 inst->src[0] = src0; 438 inst->src[1] = src1; 439 inst->src[2] = src2; 440 inst->ir = ir; 441 442 inst->function = NULL; 443 444 if (op == OPCODE_ARL) 445 this->num_address_regs = 1; 446 447 /* Update indirect addressing status used by TGSI */ 448 if (dst.reladdr) { 449 switch(dst.file) { 450 case PROGRAM_TEMPORARY: 451 this->indirect_addr_temps = true; 452 break; 453 case PROGRAM_LOCAL_PARAM: 454 case PROGRAM_ENV_PARAM: 455 case PROGRAM_STATE_VAR: 456 case PROGRAM_NAMED_PARAM: 457 case PROGRAM_CONSTANT: 458 case PROGRAM_UNIFORM: 459 this->indirect_addr_consts = true; 460 break; 461 default: 462 break; 463 } 464 } 465 else { 466 for (i=0; i<3; i++) { 467 if(inst->src[i].reladdr) { 468 switch(dst.file) { 469 case PROGRAM_TEMPORARY: 470 this->indirect_addr_temps = true; 471 break; 472 case PROGRAM_LOCAL_PARAM: 473 case PROGRAM_ENV_PARAM: 474 case PROGRAM_STATE_VAR: 475 case PROGRAM_NAMED_PARAM: 476 case PROGRAM_CONSTANT: 477 case PROGRAM_UNIFORM: 478 this->indirect_addr_consts = true; 479 break; 480 default: 481 break; 482 } 483 } 484 } 485 } 486 487 this->instructions.push_tail(inst); 488 489 return inst; 490} 491 492 493glsl_to_tgsi_instruction * 494glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, 495 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 496{ 497 return emit(ir, op, dst, src0, src1, undef_src); 498} 499 500glsl_to_tgsi_instruction * 501glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, 502 st_dst_reg dst, st_src_reg src0) 503{ 504 assert(dst.writemask != 0); 505 return emit(ir, op, dst, src0, undef_src, undef_src); 506} 507 508glsl_to_tgsi_instruction * 509glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op) 510{ 511 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 512} 513 514void 515glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 516 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 517 unsigned elements) 518{ 519 static const gl_inst_opcode dot_opcodes[] = { 520 OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 521 }; 522 523 emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 524} 525 526/** 527 * Emits Mesa scalar opcodes to produce unique answers across channels. 528 * 529 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X 530 * channel determines the result across all channels. So to do a vec4 531 * of this operation, we want to emit a scalar per source channel used 532 * to produce dest channels. 533 */ 534void 535glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 536 st_dst_reg dst, 537 st_src_reg orig_src0, st_src_reg orig_src1) 538{ 539 int i, j; 540 int done_mask = ~dst.writemask; 541 542 /* Mesa RCP is a scalar operation splatting results to all channels, 543 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 544 * dst channels. 545 */ 546 for (i = 0; i < 4; i++) { 547 GLuint this_mask = (1 << i); 548 glsl_to_tgsi_instruction *inst; 549 st_src_reg src0 = orig_src0; 550 st_src_reg src1 = orig_src1; 551 552 if (done_mask & this_mask) 553 continue; 554 555 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 556 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 557 for (j = i + 1; j < 4; j++) { 558 /* If there is another enabled component in the destination that is 559 * derived from the same inputs, generate its value on this pass as 560 * well. 561 */ 562 if (!(done_mask & (1 << j)) && 563 GET_SWZ(src0.swizzle, j) == src0_swiz && 564 GET_SWZ(src1.swizzle, j) == src1_swiz) { 565 this_mask |= (1 << j); 566 } 567 } 568 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 569 src0_swiz, src0_swiz); 570 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 571 src1_swiz, src1_swiz); 572 573 inst = emit(ir, op, dst, src0, src1); 574 inst->dst.writemask = this_mask; 575 done_mask |= this_mask; 576 } 577} 578 579void 580glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 581 st_dst_reg dst, st_src_reg src0) 582{ 583 st_src_reg undef = undef_src; 584 585 undef.swizzle = SWIZZLE_XXXX; 586 587 emit_scalar(ir, op, dst, src0, undef); 588} 589 590/** 591 * Emit an OPCODE_SCS instruction 592 * 593 * The \c SCS opcode functions a bit differently than the other Mesa (or 594 * ARB_fragment_program) opcodes. Instead of splatting its result across all 595 * four components of the destination, it writes one value to the \c x 596 * component and another value to the \c y component. 597 * 598 * \param ir IR instruction being processed 599 * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which 600 * value is desired. 601 * \param dst Destination register 602 * \param src Source register 603 */ 604void 605glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, 606 st_dst_reg dst, 607 const st_src_reg &src) 608{ 609 /* Vertex programs cannot use the SCS opcode. 610 */ 611 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 612 emit_scalar(ir, op, dst, src); 613 return; 614 } 615 616 const unsigned component = (op == OPCODE_SIN) ? 0 : 1; 617 const unsigned scs_mask = (1U << component); 618 int done_mask = ~dst.writemask; 619 st_src_reg tmp; 620 621 assert(op == OPCODE_SIN || op == OPCODE_COS); 622 623 /* If there are compnents in the destination that differ from the component 624 * that will be written by the SCS instrution, we'll need a temporary. 625 */ 626 if (scs_mask != unsigned(dst.writemask)) { 627 tmp = get_temp(glsl_type::vec4_type); 628 } 629 630 for (unsigned i = 0; i < 4; i++) { 631 unsigned this_mask = (1U << i); 632 st_src_reg src0 = src; 633 634 if ((done_mask & this_mask) != 0) 635 continue; 636 637 /* The source swizzle specified which component of the source generates 638 * sine / cosine for the current component in the destination. The SCS 639 * instruction requires that this value be swizzle to the X component. 640 * Replace the current swizzle with a swizzle that puts the source in 641 * the X component. 642 */ 643 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 644 645 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 646 src0_swiz, src0_swiz); 647 for (unsigned j = i + 1; j < 4; j++) { 648 /* If there is another enabled component in the destination that is 649 * derived from the same inputs, generate its value on this pass as 650 * well. 651 */ 652 if (!(done_mask & (1 << j)) && 653 GET_SWZ(src0.swizzle, j) == src0_swiz) { 654 this_mask |= (1 << j); 655 } 656 } 657 658 if (this_mask != scs_mask) { 659 glsl_to_tgsi_instruction *inst; 660 st_dst_reg tmp_dst = st_dst_reg(tmp); 661 662 /* Emit the SCS instruction. 663 */ 664 inst = emit(ir, OPCODE_SCS, tmp_dst, src0); 665 inst->dst.writemask = scs_mask; 666 667 /* Move the result of the SCS instruction to the desired location in 668 * the destination. 669 */ 670 tmp.swizzle = MAKE_SWIZZLE4(component, component, 671 component, component); 672 inst = emit(ir, OPCODE_SCS, dst, tmp); 673 inst->dst.writemask = this_mask; 674 } else { 675 /* Emit the SCS instruction to write directly to the destination. 676 */ 677 glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); 678 inst->dst.writemask = scs_mask; 679 } 680 681 done_mask |= this_mask; 682 } 683} 684 685struct st_src_reg 686glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 687{ 688 st_src_reg src(PROGRAM_CONSTANT, -1, NULL); 689 690 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 691 &val, 1, &src.swizzle); 692 693 return src; 694} 695 696static int 697type_size(const struct glsl_type *type) 698{ 699 unsigned int i; 700 int size; 701 702 switch (type->base_type) { 703 case GLSL_TYPE_UINT: 704 case GLSL_TYPE_INT: 705 case GLSL_TYPE_FLOAT: 706 case GLSL_TYPE_BOOL: 707 if (type->is_matrix()) { 708 return type->matrix_columns; 709 } else { 710 /* Regardless of size of vector, it gets a vec4. This is bad 711 * packing for things like floats, but otherwise arrays become a 712 * mess. Hopefully a later pass over the code can pack scalars 713 * down if appropriate. 714 */ 715 return 1; 716 } 717 case GLSL_TYPE_ARRAY: 718 assert(type->length > 0); 719 return type_size(type->fields.array) * type->length; 720 case GLSL_TYPE_STRUCT: 721 size = 0; 722 for (i = 0; i < type->length; i++) { 723 size += type_size(type->fields.structure[i].type); 724 } 725 return size; 726 case GLSL_TYPE_SAMPLER: 727 /* Samplers take up one slot in UNIFORMS[], but they're baked in 728 * at link time. 729 */ 730 return 1; 731 default: 732 assert(0); 733 return 0; 734 } 735} 736 737/** 738 * In the initial pass of codegen, we assign temporary numbers to 739 * intermediate results. (not SSA -- variable assignments will reuse 740 * storage). Actual register allocation for the Mesa VM occurs in a 741 * pass over the Mesa IR later. 742 */ 743st_src_reg 744glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 745{ 746 st_src_reg src; 747 int swizzle[4]; 748 int i; 749 750 src.file = PROGRAM_TEMPORARY; 751 src.index = next_temp; 752 src.reladdr = NULL; 753 next_temp += type_size(type); 754 755 if (type->is_array() || type->is_record()) { 756 src.swizzle = SWIZZLE_NOOP; 757 } else { 758 for (i = 0; i < type->vector_elements; i++) 759 swizzle[i] = i; 760 for (; i < 4; i++) 761 swizzle[i] = type->vector_elements - 1; 762 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], 763 swizzle[2], swizzle[3]); 764 } 765 src.negate = 0; 766 767 return src; 768} 769 770variable_storage * 771glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 772{ 773 774 variable_storage *entry; 775 776 foreach_iter(exec_list_iterator, iter, this->variables) { 777 entry = (variable_storage *)iter.get(); 778 779 if (entry->var == var) 780 return entry; 781 } 782 783 return NULL; 784} 785 786void 787glsl_to_tgsi_visitor::visit(ir_variable *ir) 788{ 789 if (strcmp(ir->name, "gl_FragCoord") == 0) { 790 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 791 792 fp->OriginUpperLeft = ir->origin_upper_left; 793 fp->PixelCenterInteger = ir->pixel_center_integer; 794 795 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 796 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 797 switch (ir->depth_layout) { 798 case ir_depth_layout_none: 799 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; 800 break; 801 case ir_depth_layout_any: 802 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; 803 break; 804 case ir_depth_layout_greater: 805 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; 806 break; 807 case ir_depth_layout_less: 808 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; 809 break; 810 case ir_depth_layout_unchanged: 811 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; 812 break; 813 default: 814 assert(0); 815 break; 816 } 817 } 818 819 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 820 unsigned int i; 821 const ir_state_slot *const slots = ir->state_slots; 822 assert(ir->state_slots != NULL); 823 824 /* Check if this statevar's setup in the STATE file exactly 825 * matches how we'll want to reference it as a 826 * struct/array/whatever. If not, then we need to move it into 827 * temporary storage and hope that it'll get copy-propagated 828 * out. 829 */ 830 for (i = 0; i < ir->num_state_slots; i++) { 831 if (slots[i].swizzle != SWIZZLE_XYZW) { 832 break; 833 } 834 } 835 836 struct variable_storage *storage; 837 st_dst_reg dst; 838 if (i == ir->num_state_slots) { 839 /* We'll set the index later. */ 840 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 841 this->variables.push_tail(storage); 842 843 dst = undef_dst; 844 } else { 845 /* The variable_storage constructor allocates slots based on the size 846 * of the type. However, this had better match the number of state 847 * elements that we're going to copy into the new temporary. 848 */ 849 assert((int) ir->num_state_slots == type_size(ir->type)); 850 851 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 852 this->next_temp); 853 this->variables.push_tail(storage); 854 this->next_temp += type_size(ir->type); 855 856 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); 857 } 858 859 860 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 861 int index = _mesa_add_state_reference(this->prog->Parameters, 862 (gl_state_index *)slots[i].tokens); 863 864 if (storage->file == PROGRAM_STATE_VAR) { 865 if (storage->index == -1) { 866 storage->index = index; 867 } else { 868 assert(index == storage->index + (int)i); 869 } 870 } else { 871 st_src_reg src(PROGRAM_STATE_VAR, index, NULL); 872 src.swizzle = slots[i].swizzle; 873 emit(ir, OPCODE_MOV, dst, src); 874 /* even a float takes up a whole vec4 reg in a struct/array. */ 875 dst.index++; 876 } 877 } 878 879 if (storage->file == PROGRAM_TEMPORARY && 880 dst.index != storage->index + (int) ir->num_state_slots) { 881 fail_link(this->shader_program, 882 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 883 ir->name, dst.index - storage->index, 884 type_size(ir->type)); 885 } 886 } 887} 888 889void 890glsl_to_tgsi_visitor::visit(ir_loop *ir) 891{ 892 ir_dereference_variable *counter = NULL; 893 894 if (ir->counter != NULL) 895 counter = new(ir) ir_dereference_variable(ir->counter); 896 897 if (ir->from != NULL) { 898 assert(ir->counter != NULL); 899 900 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 901 902 a->accept(this); 903 delete a; 904 } 905 906 emit(NULL, OPCODE_BGNLOOP); 907 908 if (ir->to) { 909 ir_expression *e = 910 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 911 counter, ir->to); 912 ir_if *if_stmt = new(ir) ir_if(e); 913 914 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 915 916 if_stmt->then_instructions.push_tail(brk); 917 918 if_stmt->accept(this); 919 920 delete if_stmt; 921 delete e; 922 delete brk; 923 } 924 925 visit_exec_list(&ir->body_instructions, this); 926 927 if (ir->increment) { 928 ir_expression *e = 929 new(ir) ir_expression(ir_binop_add, counter->type, 930 counter, ir->increment); 931 932 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 933 934 a->accept(this); 935 delete a; 936 delete e; 937 } 938 939 emit(NULL, OPCODE_ENDLOOP); 940} 941 942void 943glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 944{ 945 switch (ir->mode) { 946 case ir_loop_jump::jump_break: 947 emit(NULL, OPCODE_BRK); 948 break; 949 case ir_loop_jump::jump_continue: 950 emit(NULL, OPCODE_CONT); 951 break; 952 } 953} 954 955 956void 957glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 958{ 959 assert(0); 960 (void)ir; 961} 962 963void 964glsl_to_tgsi_visitor::visit(ir_function *ir) 965{ 966 /* Ignore function bodies other than main() -- we shouldn't see calls to 967 * them since they should all be inlined before we get to glsl_to_tgsi. 968 */ 969 if (strcmp(ir->name, "main") == 0) { 970 const ir_function_signature *sig; 971 exec_list empty; 972 973 sig = ir->matching_signature(&empty); 974 975 assert(sig); 976 977 foreach_iter(exec_list_iterator, iter, sig->body) { 978 ir_instruction *ir = (ir_instruction *)iter.get(); 979 980 ir->accept(this); 981 } 982 } 983} 984 985GLboolean 986glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 987{ 988 int nonmul_operand = 1 - mul_operand; 989 st_src_reg a, b, c; 990 991 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 992 if (!expr || expr->operation != ir_binop_mul) 993 return false; 994 995 expr->operands[0]->accept(this); 996 a = this->result; 997 expr->operands[1]->accept(this); 998 b = this->result; 999 ir->operands[nonmul_operand]->accept(this); 1000 c = this->result; 1001 1002 this->result = get_temp(ir->type); 1003 emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c); 1004 1005 return true; 1006} 1007 1008GLboolean 1009glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1010{ 1011 /* Saturates were only introduced to vertex programs in 1012 * NV_vertex_program3, so don't give them to drivers in the VP. 1013 */ 1014 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1015 return false; 1016 1017 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1018 if (!sat_src) 1019 return false; 1020 1021 sat_src->accept(this); 1022 st_src_reg src = this->result; 1023 1024 this->result = get_temp(ir->type); 1025 glsl_to_tgsi_instruction *inst; 1026 inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src); 1027 inst->saturate = true; 1028 1029 return true; 1030} 1031 1032void 1033glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1034 st_src_reg *reg, int *num_reladdr) 1035{ 1036 if (!reg->reladdr) 1037 return; 1038 1039 emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); 1040 1041 if (*num_reladdr != 1) { 1042 st_src_reg temp = get_temp(glsl_type::vec4_type); 1043 1044 emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg); 1045 *reg = temp; 1046 } 1047 1048 (*num_reladdr)--; 1049} 1050 1051void 1052glsl_to_tgsi_visitor::emit_swz(ir_expression *ir) 1053{ 1054 /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. 1055 * This means that each of the operands is either an immediate value of -1, 1056 * 0, or 1, or is a component from one source register (possibly with 1057 * negation). 1058 */ 1059 uint8_t components[4] = { 0 }; 1060 bool negate[4] = { false }; 1061 ir_variable *var = NULL; 1062 1063 for (unsigned i = 0; i < ir->type->vector_elements; i++) { 1064 ir_rvalue *op = ir->operands[i]; 1065 1066 assert(op->type->is_scalar()); 1067 1068 while (op != NULL) { 1069 switch (op->ir_type) { 1070 case ir_type_constant: { 1071 1072 assert(op->type->is_scalar()); 1073 1074 const ir_constant *const c = op->as_constant(); 1075 if (c->is_one()) { 1076 components[i] = SWIZZLE_ONE; 1077 } else if (c->is_zero()) { 1078 components[i] = SWIZZLE_ZERO; 1079 } else if (c->is_negative_one()) { 1080 components[i] = SWIZZLE_ONE; 1081 negate[i] = true; 1082 } else { 1083 assert(!"SWZ constant must be 0.0 or 1.0."); 1084 } 1085 1086 op = NULL; 1087 break; 1088 } 1089 1090 case ir_type_dereference_variable: { 1091 ir_dereference_variable *const deref = 1092 (ir_dereference_variable *) op; 1093 1094 assert((var == NULL) || (deref->var == var)); 1095 components[i] = SWIZZLE_X; 1096 var = deref->var; 1097 op = NULL; 1098 break; 1099 } 1100 1101 case ir_type_expression: { 1102 ir_expression *const expr = (ir_expression *) op; 1103 1104 assert(expr->operation == ir_unop_neg); 1105 negate[i] = true; 1106 1107 op = expr->operands[0]; 1108 break; 1109 } 1110 1111 case ir_type_swizzle: { 1112 ir_swizzle *const swiz = (ir_swizzle *) op; 1113 1114 components[i] = swiz->mask.x; 1115 op = swiz->val; 1116 break; 1117 } 1118 1119 default: 1120 assert(!"Should not get here."); 1121 return; 1122 } 1123 } 1124 } 1125 1126 assert(var != NULL); 1127 1128 ir_dereference_variable *const deref = 1129 new(mem_ctx) ir_dereference_variable(var); 1130 1131 this->result.file = PROGRAM_UNDEFINED; 1132 deref->accept(this); 1133 if (this->result.file == PROGRAM_UNDEFINED) { 1134 ir_print_visitor v; 1135 printf("Failed to get tree for expression operand:\n"); 1136 deref->accept(&v); 1137 exit(1); 1138 } 1139 1140 st_src_reg src; 1141 1142 src = this->result; 1143 src.swizzle = MAKE_SWIZZLE4(components[0], 1144 components[1], 1145 components[2], 1146 components[3]); 1147 src.negate = ((unsigned(negate[0]) << 0) 1148 | (unsigned(negate[1]) << 1) 1149 | (unsigned(negate[2]) << 2) 1150 | (unsigned(negate[3]) << 3)); 1151 1152 /* Storage for our result. Ideally for an assignment we'd be using the 1153 * actual storage for the result here, instead. 1154 */ 1155 const st_src_reg result_src = get_temp(ir->type); 1156 st_dst_reg result_dst = st_dst_reg(result_src); 1157 1158 /* Limit writes to the channels that will be used by result_src later. 1159 * This does limit this temp's use as a temporary for multi-instruction 1160 * sequences. 1161 */ 1162 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1163 1164 emit(ir, OPCODE_SWZ, result_dst, src); 1165 this->result = result_src; 1166} 1167 1168void 1169glsl_to_tgsi_visitor::visit(ir_expression *ir) 1170{ 1171 unsigned int operand; 1172 st_src_reg op[Elements(ir->operands)]; 1173 st_src_reg result_src; 1174 st_dst_reg result_dst; 1175 1176 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) 1177 */ 1178 if (ir->operation == ir_binop_add) { 1179 if (try_emit_mad(ir, 1)) 1180 return; 1181 if (try_emit_mad(ir, 0)) 1182 return; 1183 } 1184 if (try_emit_sat(ir)) 1185 return; 1186 1187 if (ir->operation == ir_quadop_vector) { 1188 this->emit_swz(ir); 1189 return; 1190 } 1191 1192 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1193 this->result.file = PROGRAM_UNDEFINED; 1194 ir->operands[operand]->accept(this); 1195 if (this->result.file == PROGRAM_UNDEFINED) { 1196 ir_print_visitor v; 1197 printf("Failed to get tree for expression operand:\n"); 1198 ir->operands[operand]->accept(&v); 1199 exit(1); 1200 } 1201 op[operand] = this->result; 1202 1203 /* Matrix expression operands should have been broken down to vector 1204 * operations already. 1205 */ 1206 assert(!ir->operands[operand]->type->is_matrix()); 1207 } 1208 1209 int vector_elements = ir->operands[0]->type->vector_elements; 1210 if (ir->operands[1]) { 1211 vector_elements = MAX2(vector_elements, 1212 ir->operands[1]->type->vector_elements); 1213 } 1214 1215 this->result.file = PROGRAM_UNDEFINED; 1216 1217 /* Storage for our result. Ideally for an assignment we'd be using 1218 * the actual storage for the result here, instead. 1219 */ 1220 result_src = get_temp(ir->type); 1221 /* convenience for the emit functions below. */ 1222 result_dst = st_dst_reg(result_src); 1223 /* Limit writes to the channels that will be used by result_src later. 1224 * This does limit this temp's use as a temporary for multi-instruction 1225 * sequences. 1226 */ 1227 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1228 1229 switch (ir->operation) { 1230 case ir_unop_logic_not: 1231 emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); 1232 break; 1233 case ir_unop_neg: 1234 op[0].negate = ~op[0].negate; 1235 result_src = op[0]; 1236 break; 1237 case ir_unop_abs: 1238 emit(ir, OPCODE_ABS, result_dst, op[0]); 1239 break; 1240 case ir_unop_sign: 1241 emit(ir, OPCODE_SSG, result_dst, op[0]); 1242 break; 1243 case ir_unop_rcp: 1244 emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); 1245 break; 1246 1247 case ir_unop_exp2: 1248 emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); 1249 break; 1250 case ir_unop_exp: 1251 case ir_unop_log: 1252 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1253 break; 1254 case ir_unop_log2: 1255 emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); 1256 break; 1257 case ir_unop_sin: 1258 emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); 1259 break; 1260 case ir_unop_cos: 1261 emit_scalar(ir, OPCODE_COS, result_dst, op[0]); 1262 break; 1263 case ir_unop_sin_reduced: 1264 emit_scs(ir, OPCODE_SIN, result_dst, op[0]); 1265 break; 1266 case ir_unop_cos_reduced: 1267 emit_scs(ir, OPCODE_COS, result_dst, op[0]); 1268 break; 1269 1270 case ir_unop_dFdx: 1271 emit(ir, OPCODE_DDX, result_dst, op[0]); 1272 break; 1273 case ir_unop_dFdy: 1274 emit(ir, OPCODE_DDY, result_dst, op[0]); 1275 break; 1276 1277 case ir_unop_noise: { 1278 const enum prog_opcode opcode = 1279 prog_opcode(OPCODE_NOISE1 1280 + (ir->operands[0]->type->vector_elements) - 1); 1281 assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); 1282 1283 emit(ir, opcode, result_dst, op[0]); 1284 break; 1285 } 1286 1287 case ir_binop_add: 1288 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1289 break; 1290 case ir_binop_sub: 1291 emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); 1292 break; 1293 1294 case ir_binop_mul: 1295 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1296 break; 1297 case ir_binop_div: 1298 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1299 case ir_binop_mod: 1300 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1301 break; 1302 1303 case ir_binop_less: 1304 emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); 1305 break; 1306 case ir_binop_greater: 1307 emit(ir, OPCODE_SGT, result_dst, op[0], op[1]); 1308 break; 1309 case ir_binop_lequal: 1310 emit(ir, OPCODE_SLE, result_dst, op[0], op[1]); 1311 break; 1312 case ir_binop_gequal: 1313 emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); 1314 break; 1315 case ir_binop_equal: 1316 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); 1317 break; 1318 case ir_binop_nequal: 1319 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); 1320 break; 1321 case ir_binop_all_equal: 1322 /* "==" operator producing a scalar boolean. */ 1323 if (ir->operands[0]->type->is_vector() || 1324 ir->operands[1]->type->is_vector()) { 1325 st_src_reg temp = get_temp(glsl_type::vec4_type); 1326 emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1327 emit_dp(ir, result_dst, temp, temp, vector_elements); 1328 emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); 1329 } else { 1330 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); 1331 } 1332 break; 1333 case ir_binop_any_nequal: 1334 /* "!=" operator producing a scalar boolean. */ 1335 if (ir->operands[0]->type->is_vector() || 1336 ir->operands[1]->type->is_vector()) { 1337 st_src_reg temp = get_temp(glsl_type::vec4_type); 1338 emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1339 emit_dp(ir, result_dst, temp, temp, vector_elements); 1340 emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); 1341 } else { 1342 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); 1343 } 1344 break; 1345 1346 case ir_unop_any: 1347 assert(ir->operands[0]->type->is_vector()); 1348 emit_dp(ir, result_dst, op[0], op[0], 1349 ir->operands[0]->type->vector_elements); 1350 emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); 1351 break; 1352 1353 case ir_binop_logic_xor: 1354 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); 1355 break; 1356 1357 case ir_binop_logic_or: 1358 /* This could be a saturated add and skip the SNE. */ 1359 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1360 emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); 1361 break; 1362 1363 case ir_binop_logic_and: 1364 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ 1365 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1366 break; 1367 1368 case ir_binop_dot: 1369 assert(ir->operands[0]->type->is_vector()); 1370 assert(ir->operands[0]->type == ir->operands[1]->type); 1371 emit_dp(ir, result_dst, op[0], op[1], 1372 ir->operands[0]->type->vector_elements); 1373 break; 1374 1375 case ir_unop_sqrt: 1376 /* sqrt(x) = x * rsq(x). */ 1377 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1378 emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); 1379 /* For incoming channels <= 0, set the result to 0. */ 1380 op[0].negate = ~op[0].negate; 1381 emit(ir, OPCODE_CMP, result_dst, 1382 op[0], result_src, st_src_reg_for_float(0.0)); 1383 break; 1384 case ir_unop_rsq: 1385 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1386 break; 1387 case ir_unop_i2f: 1388 case ir_unop_b2f: 1389 case ir_unop_b2i: 1390 /* Mesa IR lacks types, ints are stored as truncated floats. */ 1391 result_src = op[0]; 1392 break; 1393 case ir_unop_f2i: 1394 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1395 break; 1396 case ir_unop_f2b: 1397 case ir_unop_i2b: 1398 emit(ir, OPCODE_SNE, result_dst, 1399 op[0], st_src_reg_for_float(0.0)); 1400 break; 1401 case ir_unop_trunc: 1402 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1403 break; 1404 case ir_unop_ceil: 1405 op[0].negate = ~op[0].negate; 1406 emit(ir, OPCODE_FLR, result_dst, op[0]); 1407 result_src.negate = ~result_src.negate; 1408 break; 1409 case ir_unop_floor: 1410 emit(ir, OPCODE_FLR, result_dst, op[0]); 1411 break; 1412 case ir_unop_fract: 1413 emit(ir, OPCODE_FRC, result_dst, op[0]); 1414 break; 1415 1416 case ir_binop_min: 1417 emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); 1418 break; 1419 case ir_binop_max: 1420 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 1421 break; 1422 case ir_binop_pow: 1423 emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); 1424 break; 1425 1426 case ir_unop_bit_not: 1427 case ir_unop_u2f: 1428 case ir_binop_lshift: 1429 case ir_binop_rshift: 1430 case ir_binop_bit_and: 1431 case ir_binop_bit_xor: 1432 case ir_binop_bit_or: 1433 case ir_unop_round_even: 1434 assert(!"GLSL 1.30 features unsupported"); 1435 break; 1436 1437 case ir_quadop_vector: 1438 /* This operation should have already been handled. 1439 */ 1440 assert(!"Should not get here."); 1441 break; 1442 } 1443 1444 this->result = result_src; 1445} 1446 1447 1448void 1449glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1450{ 1451 st_src_reg src; 1452 int i; 1453 int swizzle[4]; 1454 1455 /* Note that this is only swizzles in expressions, not those on the left 1456 * hand side of an assignment, which do write masking. See ir_assignment 1457 * for that. 1458 */ 1459 1460 ir->val->accept(this); 1461 src = this->result; 1462 assert(src.file != PROGRAM_UNDEFINED); 1463 1464 for (i = 0; i < 4; i++) { 1465 if (i < ir->type->vector_elements) { 1466 switch (i) { 1467 case 0: 1468 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1469 break; 1470 case 1: 1471 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1472 break; 1473 case 2: 1474 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1475 break; 1476 case 3: 1477 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1478 break; 1479 } 1480 } else { 1481 /* If the type is smaller than a vec4, replicate the last 1482 * channel out. 1483 */ 1484 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1485 } 1486 } 1487 1488 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1489 1490 this->result = src; 1491} 1492 1493void 1494glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1495{ 1496 variable_storage *entry = find_variable_storage(ir->var); 1497 ir_variable *var = ir->var; 1498 1499 if (!entry) { 1500 switch (var->mode) { 1501 case ir_var_uniform: 1502 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1503 var->location); 1504 this->variables.push_tail(entry); 1505 break; 1506 case ir_var_in: 1507 case ir_var_inout: 1508 /* The linker assigns locations for varyings and attributes, 1509 * including deprecated builtins (like gl_Color), user-assign 1510 * generic attributes (glBindVertexLocation), and 1511 * user-defined varyings. 1512 * 1513 * FINISHME: We would hit this path for function arguments. Fix! 1514 */ 1515 assert(var->location != -1); 1516 entry = new(mem_ctx) variable_storage(var, 1517 PROGRAM_INPUT, 1518 var->location); 1519 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && 1520 var->location >= VERT_ATTRIB_GENERIC0) { 1521 _mesa_add_attribute(this->prog->Attributes, 1522 var->name, 1523 _mesa_sizeof_glsl_type(var->type->gl_type), 1524 var->type->gl_type, 1525 var->location - VERT_ATTRIB_GENERIC0); 1526 } 1527 break; 1528 case ir_var_out: 1529 assert(var->location != -1); 1530 entry = new(mem_ctx) variable_storage(var, 1531 PROGRAM_OUTPUT, 1532 var->location); 1533 break; 1534 case ir_var_system_value: 1535 entry = new(mem_ctx) variable_storage(var, 1536 PROGRAM_SYSTEM_VALUE, 1537 var->location); 1538 break; 1539 case ir_var_auto: 1540 case ir_var_temporary: 1541 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1542 this->next_temp); 1543 this->variables.push_tail(entry); 1544 1545 next_temp += type_size(var->type); 1546 break; 1547 } 1548 1549 if (!entry) { 1550 printf("Failed to make storage for %s\n", var->name); 1551 exit(1); 1552 } 1553 } 1554 1555 this->result = st_src_reg(entry->file, entry->index, var->type); 1556} 1557 1558void 1559glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1560{ 1561 ir_constant *index; 1562 st_src_reg src; 1563 int element_size = type_size(ir->type); 1564 1565 index = ir->array_index->constant_expression_value(); 1566 1567 ir->array->accept(this); 1568 src = this->result; 1569 1570 if (index) { 1571 src.index += index->value.i[0] * element_size; 1572 } else { 1573 st_src_reg array_base = this->result; 1574 /* Variable index array dereference. It eats the "vec4" of the 1575 * base of the array and an index that offsets the Mesa register 1576 * index. 1577 */ 1578 ir->array_index->accept(this); 1579 1580 st_src_reg index_reg; 1581 1582 if (element_size == 1) { 1583 index_reg = this->result; 1584 } else { 1585 index_reg = get_temp(glsl_type::float_type); 1586 1587 emit(ir, OPCODE_MUL, st_dst_reg(index_reg), 1588 this->result, st_src_reg_for_float(element_size)); 1589 } 1590 1591 src.reladdr = ralloc(mem_ctx, st_src_reg); 1592 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1593 } 1594 1595 /* If the type is smaller than a vec4, replicate the last channel out. */ 1596 if (ir->type->is_scalar() || ir->type->is_vector()) 1597 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1598 else 1599 src.swizzle = SWIZZLE_NOOP; 1600 1601 this->result = src; 1602} 1603 1604void 1605glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 1606{ 1607 unsigned int i; 1608 const glsl_type *struct_type = ir->record->type; 1609 int offset = 0; 1610 1611 ir->record->accept(this); 1612 1613 for (i = 0; i < struct_type->length; i++) { 1614 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1615 break; 1616 offset += type_size(struct_type->fields.structure[i].type); 1617 } 1618 1619 /* If the type is smaller than a vec4, replicate the last channel out. */ 1620 if (ir->type->is_scalar() || ir->type->is_vector()) 1621 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1622 else 1623 this->result.swizzle = SWIZZLE_NOOP; 1624 1625 this->result.index += offset; 1626} 1627 1628/** 1629 * We want to be careful in assignment setup to hit the actual storage 1630 * instead of potentially using a temporary like we might with the 1631 * ir_dereference handler. 1632 */ 1633static st_dst_reg 1634get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 1635{ 1636 /* The LHS must be a dereference. If the LHS is a variable indexed array 1637 * access of a vector, it must be separated into a series conditional moves 1638 * before reaching this point (see ir_vec_index_to_cond_assign). 1639 */ 1640 assert(ir->as_dereference()); 1641 ir_dereference_array *deref_array = ir->as_dereference_array(); 1642 if (deref_array) { 1643 assert(!deref_array->array->type->is_vector()); 1644 } 1645 1646 /* Use the rvalue deref handler for the most part. We'll ignore 1647 * swizzles in it and write swizzles using writemask, though. 1648 */ 1649 ir->accept(v); 1650 return st_dst_reg(v->result); 1651} 1652 1653/** 1654 * Process the condition of a conditional assignment 1655 * 1656 * Examines the condition of a conditional assignment to generate the optimal 1657 * first operand of a \c CMP instruction. If the condition is a relational 1658 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 1659 * used as the source for the \c CMP instruction. Otherwise the comparison 1660 * is processed to a boolean result, and the boolean result is used as the 1661 * operand to the CMP instruction. 1662 */ 1663bool 1664glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 1665{ 1666 ir_rvalue *src_ir = ir; 1667 bool negate = true; 1668 bool switch_order = false; 1669 1670 ir_expression *const expr = ir->as_expression(); 1671 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 1672 bool zero_on_left = false; 1673 1674 if (expr->operands[0]->is_zero()) { 1675 src_ir = expr->operands[1]; 1676 zero_on_left = true; 1677 } else if (expr->operands[1]->is_zero()) { 1678 src_ir = expr->operands[0]; 1679 zero_on_left = false; 1680 } 1681 1682 /* a is - 0 + - 0 + 1683 * (a < 0) T F F ( a < 0) T F F 1684 * (0 < a) F F T (-a < 0) F F T 1685 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 1686 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 1687 * (a > 0) F F T (-a < 0) F F T 1688 * (0 > a) T F F ( a < 0) T F F 1689 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 1690 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 1691 * 1692 * Note that exchanging the order of 0 and 'a' in the comparison simply 1693 * means that the value of 'a' should be negated. 1694 */ 1695 if (src_ir != ir) { 1696 switch (expr->operation) { 1697 case ir_binop_less: 1698 switch_order = false; 1699 negate = zero_on_left; 1700 break; 1701 1702 case ir_binop_greater: 1703 switch_order = false; 1704 negate = !zero_on_left; 1705 break; 1706 1707 case ir_binop_lequal: 1708 switch_order = true; 1709 negate = !zero_on_left; 1710 break; 1711 1712 case ir_binop_gequal: 1713 switch_order = true; 1714 negate = zero_on_left; 1715 break; 1716 1717 default: 1718 /* This isn't the right kind of comparison afterall, so make sure 1719 * the whole condition is visited. 1720 */ 1721 src_ir = ir; 1722 break; 1723 } 1724 } 1725 } 1726 1727 src_ir->accept(this); 1728 1729 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 1730 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 1731 * choose which value OPCODE_CMP produces without an extra instruction 1732 * computing the condition. 1733 */ 1734 if (negate) 1735 this->result.negate = ~this->result.negate; 1736 1737 return switch_order; 1738} 1739 1740void 1741glsl_to_tgsi_visitor::visit(ir_assignment *ir) 1742{ 1743 st_dst_reg l; 1744 st_src_reg r; 1745 int i; 1746 1747 ir->rhs->accept(this); 1748 r = this->result; 1749 1750 l = get_assignment_lhs(ir->lhs, this); 1751 1752 /* FINISHME: This should really set to the correct maximal writemask for each 1753 * FINISHME: component written (in the loops below). This case can only 1754 * FINISHME: occur for matrices, arrays, and structures. 1755 */ 1756 if (ir->write_mask == 0) { 1757 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1758 l.writemask = WRITEMASK_XYZW; 1759 } else if (ir->lhs->type->is_scalar()) { 1760 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 1761 * FINISHME: W component of fragment shader output zero, work correctly. 1762 */ 1763 l.writemask = WRITEMASK_XYZW; 1764 } else { 1765 int swizzles[4]; 1766 int first_enabled_chan = 0; 1767 int rhs_chan = 0; 1768 1769 assert(ir->lhs->type->is_vector()); 1770 l.writemask = ir->write_mask; 1771 1772 for (int i = 0; i < 4; i++) { 1773 if (l.writemask & (1 << i)) { 1774 first_enabled_chan = GET_SWZ(r.swizzle, i); 1775 break; 1776 } 1777 } 1778 1779 /* Swizzle a small RHS vector into the channels being written. 1780 * 1781 * glsl ir treats write_mask as dictating how many channels are 1782 * present on the RHS while Mesa IR treats write_mask as just 1783 * showing which channels of the vec4 RHS get written. 1784 */ 1785 for (int i = 0; i < 4; i++) { 1786 if (l.writemask & (1 << i)) 1787 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 1788 else 1789 swizzles[i] = first_enabled_chan; 1790 } 1791 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 1792 swizzles[2], swizzles[3]); 1793 } 1794 1795 assert(l.file != PROGRAM_UNDEFINED); 1796 assert(r.file != PROGRAM_UNDEFINED); 1797 1798 if (ir->condition) { 1799 const bool switch_order = this->process_move_condition(ir->condition); 1800 st_src_reg condition = this->result; 1801 1802 for (i = 0; i < type_size(ir->lhs->type); i++) { 1803 if (switch_order) { 1804 emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r); 1805 } else { 1806 emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l)); 1807 } 1808 1809 l.index++; 1810 r.index++; 1811 } 1812 } else { 1813 for (i = 0; i < type_size(ir->lhs->type); i++) { 1814 emit(ir, OPCODE_MOV, l, r); 1815 l.index++; 1816 r.index++; 1817 } 1818 } 1819} 1820 1821 1822void 1823glsl_to_tgsi_visitor::visit(ir_constant *ir) 1824{ 1825 st_src_reg src; 1826 GLfloat stack_vals[4] = { 0 }; 1827 GLfloat *values = stack_vals; 1828 unsigned int i; 1829 1830 /* Unfortunately, 4 floats is all we can get into 1831 * _mesa_add_unnamed_constant. So, make a temp to store an 1832 * aggregate constant and move each constant value into it. If we 1833 * get lucky, copy propagation will eliminate the extra moves. 1834 */ 1835 1836 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1837 st_src_reg temp_base = get_temp(ir->type); 1838 st_dst_reg temp = st_dst_reg(temp_base); 1839 1840 foreach_iter(exec_list_iterator, iter, ir->components) { 1841 ir_constant *field_value = (ir_constant *)iter.get(); 1842 int size = type_size(field_value->type); 1843 1844 assert(size > 0); 1845 1846 field_value->accept(this); 1847 src = this->result; 1848 1849 for (i = 0; i < (unsigned int)size; i++) { 1850 emit(ir, OPCODE_MOV, temp, src); 1851 1852 src.index++; 1853 temp.index++; 1854 } 1855 } 1856 this->result = temp_base; 1857 return; 1858 } 1859 1860 if (ir->type->is_array()) { 1861 st_src_reg temp_base = get_temp(ir->type); 1862 st_dst_reg temp = st_dst_reg(temp_base); 1863 int size = type_size(ir->type->fields.array); 1864 1865 assert(size > 0); 1866 1867 for (i = 0; i < ir->type->length; i++) { 1868 ir->array_elements[i]->accept(this); 1869 src = this->result; 1870 for (int j = 0; j < size; j++) { 1871 emit(ir, OPCODE_MOV, temp, src); 1872 1873 src.index++; 1874 temp.index++; 1875 } 1876 } 1877 this->result = temp_base; 1878 return; 1879 } 1880 1881 if (ir->type->is_matrix()) { 1882 st_src_reg mat = get_temp(ir->type); 1883 st_dst_reg mat_column = st_dst_reg(mat); 1884 1885 for (i = 0; i < ir->type->matrix_columns; i++) { 1886 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 1887 values = &ir->value.f[i * ir->type->vector_elements]; 1888 1889 src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); 1890 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1891 values, 1892 ir->type->vector_elements, 1893 &src.swizzle); 1894 emit(ir, OPCODE_MOV, mat_column, src); 1895 1896 mat_column.index++; 1897 } 1898 1899 this->result = mat; 1900 return; 1901 } 1902 1903 src.file = PROGRAM_CONSTANT; 1904 switch (ir->type->base_type) { 1905 case GLSL_TYPE_FLOAT: 1906 values = &ir->value.f[0]; 1907 break; 1908 case GLSL_TYPE_UINT: 1909 for (i = 0; i < ir->type->vector_elements; i++) { 1910 values[i] = ir->value.u[i]; 1911 } 1912 break; 1913 case GLSL_TYPE_INT: 1914 for (i = 0; i < ir->type->vector_elements; i++) { 1915 values[i] = ir->value.i[i]; 1916 } 1917 break; 1918 case GLSL_TYPE_BOOL: 1919 for (i = 0; i < ir->type->vector_elements; i++) { 1920 values[i] = ir->value.b[i]; 1921 } 1922 break; 1923 default: 1924 assert(!"Non-float/uint/int/bool constant"); 1925 } 1926 1927 this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); 1928 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1929 values, 1930 ir->type->vector_elements, 1931 &this->result.swizzle); 1932} 1933 1934function_entry * 1935glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 1936{ 1937 function_entry *entry; 1938 1939 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 1940 entry = (function_entry *)iter.get(); 1941 1942 if (entry->sig == sig) 1943 return entry; 1944 } 1945 1946 entry = ralloc(mem_ctx, function_entry); 1947 entry->sig = sig; 1948 entry->sig_id = this->next_signature_id++; 1949 entry->bgn_inst = NULL; 1950 1951 /* Allocate storage for all the parameters. */ 1952 foreach_iter(exec_list_iterator, iter, sig->parameters) { 1953 ir_variable *param = (ir_variable *)iter.get(); 1954 variable_storage *storage; 1955 1956 storage = find_variable_storage(param); 1957 assert(!storage); 1958 1959 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 1960 this->next_temp); 1961 this->variables.push_tail(storage); 1962 1963 this->next_temp += type_size(param->type); 1964 } 1965 1966 if (!sig->return_type->is_void()) { 1967 entry->return_reg = get_temp(sig->return_type); 1968 } else { 1969 entry->return_reg = undef_src; 1970 } 1971 1972 this->function_signatures.push_tail(entry); 1973 return entry; 1974} 1975 1976void 1977glsl_to_tgsi_visitor::visit(ir_call *ir) 1978{ 1979 glsl_to_tgsi_instruction *call_inst; 1980 ir_function_signature *sig = ir->get_callee(); 1981 function_entry *entry = get_function_signature(sig); 1982 int i; 1983 1984 /* Process in parameters. */ 1985 exec_list_iterator sig_iter = sig->parameters.iterator(); 1986 foreach_iter(exec_list_iterator, iter, *ir) { 1987 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 1988 ir_variable *param = (ir_variable *)sig_iter.get(); 1989 1990 if (param->mode == ir_var_in || 1991 param->mode == ir_var_inout) { 1992 variable_storage *storage = find_variable_storage(param); 1993 assert(storage); 1994 1995 param_rval->accept(this); 1996 st_src_reg r = this->result; 1997 1998 st_dst_reg l; 1999 l.file = storage->file; 2000 l.index = storage->index; 2001 l.reladdr = NULL; 2002 l.writemask = WRITEMASK_XYZW; 2003 l.cond_mask = COND_TR; 2004 2005 for (i = 0; i < type_size(param->type); i++) { 2006 emit(ir, OPCODE_MOV, l, r); 2007 l.index++; 2008 r.index++; 2009 } 2010 } 2011 2012 sig_iter.next(); 2013 } 2014 assert(!sig_iter.has_next()); 2015 2016 /* Emit call instruction */ 2017 call_inst = emit(ir, OPCODE_CAL); 2018 call_inst->function = entry; 2019 2020 /* Process out parameters. */ 2021 sig_iter = sig->parameters.iterator(); 2022 foreach_iter(exec_list_iterator, iter, *ir) { 2023 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2024 ir_variable *param = (ir_variable *)sig_iter.get(); 2025 2026 if (param->mode == ir_var_out || 2027 param->mode == ir_var_inout) { 2028 variable_storage *storage = find_variable_storage(param); 2029 assert(storage); 2030 2031 st_src_reg r; 2032 r.file = storage->file; 2033 r.index = storage->index; 2034 r.reladdr = NULL; 2035 r.swizzle = SWIZZLE_NOOP; 2036 r.negate = 0; 2037 2038 param_rval->accept(this); 2039 st_dst_reg l = st_dst_reg(this->result); 2040 2041 for (i = 0; i < type_size(param->type); i++) { 2042 emit(ir, OPCODE_MOV, l, r); 2043 l.index++; 2044 r.index++; 2045 } 2046 } 2047 2048 sig_iter.next(); 2049 } 2050 assert(!sig_iter.has_next()); 2051 2052 /* Process return value. */ 2053 this->result = entry->return_reg; 2054} 2055 2056void 2057glsl_to_tgsi_visitor::visit(ir_texture *ir) 2058{ 2059 st_src_reg result_src, coord, lod_info, projector, dx, dy; 2060 st_dst_reg result_dst, coord_dst; 2061 glsl_to_tgsi_instruction *inst = NULL; 2062 prog_opcode opcode = OPCODE_NOP; 2063 2064 ir->coordinate->accept(this); 2065 2066 /* Put our coords in a temp. We'll need to modify them for shadow, 2067 * projection, or LOD, so the only case we'd use it as is is if 2068 * we're doing plain old texturing. Mesa IR optimization should 2069 * handle cleaning up our mess in that case. 2070 */ 2071 coord = get_temp(glsl_type::vec4_type); 2072 coord_dst = st_dst_reg(coord); 2073 emit(ir, OPCODE_MOV, coord_dst, this->result); 2074 2075 if (ir->projector) { 2076 ir->projector->accept(this); 2077 projector = this->result; 2078 } 2079 2080 /* Storage for our result. Ideally for an assignment we'd be using 2081 * the actual storage for the result here, instead. 2082 */ 2083 result_src = get_temp(glsl_type::vec4_type); 2084 result_dst = st_dst_reg(result_src); 2085 2086 switch (ir->op) { 2087 case ir_tex: 2088 opcode = OPCODE_TEX; 2089 break; 2090 case ir_txb: 2091 opcode = OPCODE_TXB; 2092 ir->lod_info.bias->accept(this); 2093 lod_info = this->result; 2094 break; 2095 case ir_txl: 2096 opcode = OPCODE_TXL; 2097 ir->lod_info.lod->accept(this); 2098 lod_info = this->result; 2099 break; 2100 case ir_txd: 2101 opcode = OPCODE_TXD; 2102 ir->lod_info.grad.dPdx->accept(this); 2103 dx = this->result; 2104 ir->lod_info.grad.dPdy->accept(this); 2105 dy = this->result; 2106 break; 2107 case ir_txf: // TODO: use TGSI_OPCODE_TXF here 2108 assert(!"GLSL 1.30 features unsupported"); 2109 break; 2110 } 2111 2112 if (ir->projector) { 2113 if (opcode == OPCODE_TEX) { 2114 /* Slot the projector in as the last component of the coord. */ 2115 coord_dst.writemask = WRITEMASK_W; 2116 emit(ir, OPCODE_MOV, coord_dst, projector); 2117 coord_dst.writemask = WRITEMASK_XYZW; 2118 opcode = OPCODE_TXP; 2119 } else { 2120 st_src_reg coord_w = coord; 2121 coord_w.swizzle = SWIZZLE_WWWW; 2122 2123 /* For the other TEX opcodes there's no projective version 2124 * since the last slot is taken up by lod info. Do the 2125 * projective divide now. 2126 */ 2127 coord_dst.writemask = WRITEMASK_W; 2128 emit(ir, OPCODE_RCP, coord_dst, projector); 2129 2130 /* In the case where we have to project the coordinates "by hand," 2131 * the shadow comparitor value must also be projected. 2132 */ 2133 st_src_reg tmp_src = coord; 2134 if (ir->shadow_comparitor) { 2135 /* Slot the shadow value in as the second to last component of the 2136 * coord. 2137 */ 2138 ir->shadow_comparitor->accept(this); 2139 2140 tmp_src = get_temp(glsl_type::vec4_type); 2141 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2142 2143 tmp_dst.writemask = WRITEMASK_Z; 2144 emit(ir, OPCODE_MOV, tmp_dst, this->result); 2145 2146 tmp_dst.writemask = WRITEMASK_XY; 2147 emit(ir, OPCODE_MOV, tmp_dst, coord); 2148 } 2149 2150 coord_dst.writemask = WRITEMASK_XYZ; 2151 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); 2152 2153 coord_dst.writemask = WRITEMASK_XYZW; 2154 coord.swizzle = SWIZZLE_XYZW; 2155 } 2156 } 2157 2158 /* If projection is done and the opcode is not OPCODE_TXP, then the shadow 2159 * comparitor was put in the correct place (and projected) by the code, 2160 * above, that handles by-hand projection. 2161 */ 2162 if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) { 2163 /* Slot the shadow value in as the second to last component of the 2164 * coord. 2165 */ 2166 ir->shadow_comparitor->accept(this); 2167 coord_dst.writemask = WRITEMASK_Z; 2168 emit(ir, OPCODE_MOV, coord_dst, this->result); 2169 coord_dst.writemask = WRITEMASK_XYZW; 2170 } 2171 2172 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { 2173 /* Mesa IR stores lod or lod bias in the last channel of the coords. */ 2174 coord_dst.writemask = WRITEMASK_W; 2175 emit(ir, OPCODE_MOV, coord_dst, lod_info); 2176 coord_dst.writemask = WRITEMASK_XYZW; 2177 } 2178 2179 if (opcode == OPCODE_TXD) 2180 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2181 else 2182 inst = emit(ir, opcode, result_dst, coord); 2183 2184 if (ir->shadow_comparitor) 2185 inst->tex_shadow = GL_TRUE; 2186 2187 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2188 this->shader_program, 2189 this->prog); 2190 2191 const glsl_type *sampler_type = ir->sampler->type; 2192 2193 switch (sampler_type->sampler_dimensionality) { 2194 case GLSL_SAMPLER_DIM_1D: 2195 inst->tex_target = (sampler_type->sampler_array) 2196 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2197 break; 2198 case GLSL_SAMPLER_DIM_2D: 2199 inst->tex_target = (sampler_type->sampler_array) 2200 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2201 break; 2202 case GLSL_SAMPLER_DIM_3D: 2203 inst->tex_target = TEXTURE_3D_INDEX; 2204 break; 2205 case GLSL_SAMPLER_DIM_CUBE: 2206 inst->tex_target = TEXTURE_CUBE_INDEX; 2207 break; 2208 case GLSL_SAMPLER_DIM_RECT: 2209 inst->tex_target = TEXTURE_RECT_INDEX; 2210 break; 2211 case GLSL_SAMPLER_DIM_BUF: 2212 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2213 break; 2214 default: 2215 assert(!"Should not get here."); 2216 } 2217 2218 this->result = result_src; 2219} 2220 2221void 2222glsl_to_tgsi_visitor::visit(ir_return *ir) 2223{ 2224 if (ir->get_value()) { 2225 st_dst_reg l; 2226 int i; 2227 2228 assert(current_function); 2229 2230 ir->get_value()->accept(this); 2231 st_src_reg r = this->result; 2232 2233 l = st_dst_reg(current_function->return_reg); 2234 2235 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2236 emit(ir, OPCODE_MOV, l, r); 2237 l.index++; 2238 r.index++; 2239 } 2240 } 2241 2242 emit(ir, OPCODE_RET); 2243} 2244 2245void 2246glsl_to_tgsi_visitor::visit(ir_discard *ir) 2247{ 2248 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 2249 2250 if (ir->condition) { 2251 ir->condition->accept(this); 2252 this->result.negate = ~this->result.negate; 2253 emit(ir, OPCODE_KIL, undef_dst, this->result); 2254 } else { 2255 emit(ir, OPCODE_KIL_NV); 2256 } 2257 2258 fp->UsesKill = GL_TRUE; 2259} 2260 2261void 2262glsl_to_tgsi_visitor::visit(ir_if *ir) 2263{ 2264 glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; 2265 glsl_to_tgsi_instruction *prev_inst; 2266 2267 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2268 2269 ir->condition->accept(this); 2270 assert(this->result.file != PROGRAM_UNDEFINED); 2271 2272 if (this->options->EmitCondCodes) { 2273 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2274 2275 /* See if we actually generated any instruction for generating 2276 * the condition. If not, then cook up a move to a temp so we 2277 * have something to set cond_update on. 2278 */ 2279 if (cond_inst == prev_inst) { 2280 st_src_reg temp = get_temp(glsl_type::bool_type); 2281 cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result); 2282 } 2283 cond_inst->cond_update = GL_TRUE; 2284 2285 if_inst = emit(ir->condition, OPCODE_IF); 2286 if_inst->dst.cond_mask = COND_NE; 2287 } else { 2288 if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); 2289 } 2290 2291 this->instructions.push_tail(if_inst); 2292 2293 visit_exec_list(&ir->then_instructions, this); 2294 2295 if (!ir->else_instructions.is_empty()) { 2296 else_inst = emit(ir->condition, OPCODE_ELSE); 2297 visit_exec_list(&ir->else_instructions, this); 2298 } 2299 2300 if_inst = emit(ir->condition, OPCODE_ENDIF); 2301} 2302 2303glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2304{ 2305 result.file = PROGRAM_UNDEFINED; 2306 next_temp = 1; 2307 next_signature_id = 1; 2308 current_function = NULL; 2309 num_address_regs = 0; 2310 indirect_addr_temps = false; 2311 indirect_addr_consts = false; 2312 mem_ctx = ralloc_context(NULL); 2313} 2314 2315glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2316{ 2317 ralloc_free(mem_ctx); 2318} 2319 2320extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2321{ 2322 delete v; 2323} 2324 2325 2326/** 2327 * Count resources used by the given gpu program (number of texture 2328 * samplers, etc). 2329 */ 2330static void 2331count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2332{ 2333 v->samplers_used = 0; 2334 2335 foreach_iter(exec_list_iterator, iter, v->instructions) { 2336 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2337 2338 if (_mesa_is_tex_instruction(inst->op)) { 2339 v->samplers_used |= 1 << inst->sampler; 2340 2341 prog->SamplerTargets[inst->sampler] = 2342 (gl_texture_index)inst->tex_target; 2343 if (inst->tex_shadow) { 2344 prog->ShadowSamplers |= 1 << inst->sampler; 2345 } 2346 } 2347 } 2348 2349 prog->SamplersUsed = v->samplers_used; 2350 _mesa_update_shader_textures_used(prog); 2351} 2352 2353 2354/** 2355 * Check if the given vertex/fragment/shader program is within the 2356 * resource limits of the context (number of texture units, etc). 2357 * If any of those checks fail, record a linker error. 2358 * 2359 * XXX more checks are needed... 2360 */ 2361static void 2362check_resources(const struct gl_context *ctx, 2363 struct gl_shader_program *shader_program, 2364 glsl_to_tgsi_visitor *prog, 2365 struct gl_program *proginfo) 2366{ 2367 switch (proginfo->Target) { 2368 case GL_VERTEX_PROGRAM_ARB: 2369 if (_mesa_bitcount(prog->samplers_used) > 2370 ctx->Const.MaxVertexTextureImageUnits) { 2371 fail_link(shader_program, "Too many vertex shader texture samplers"); 2372 } 2373 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2374 fail_link(shader_program, "Too many vertex shader constants"); 2375 } 2376 break; 2377 case MESA_GEOMETRY_PROGRAM: 2378 if (_mesa_bitcount(prog->samplers_used) > 2379 ctx->Const.MaxGeometryTextureImageUnits) { 2380 fail_link(shader_program, "Too many geometry shader texture samplers"); 2381 } 2382 if (proginfo->Parameters->NumParameters > 2383 MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { 2384 fail_link(shader_program, "Too many geometry shader constants"); 2385 } 2386 break; 2387 case GL_FRAGMENT_PROGRAM_ARB: 2388 if (_mesa_bitcount(prog->samplers_used) > 2389 ctx->Const.MaxTextureImageUnits) { 2390 fail_link(shader_program, "Too many fragment shader texture samplers"); 2391 } 2392 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2393 fail_link(shader_program, "Too many fragment shader constants"); 2394 } 2395 break; 2396 default: 2397 _mesa_problem(ctx, "unexpected program type in check_resources()"); 2398 } 2399} 2400 2401 2402 2403struct uniform_sort { 2404 struct gl_uniform *u; 2405 int pos; 2406}; 2407 2408/* The shader_program->Uniforms list is almost sorted in increasing 2409 * uniform->{Frag,Vert}Pos locations, but not quite when there are 2410 * uniforms shared between targets. We need to add parameters in 2411 * increasing order for the targets. 2412 */ 2413static int 2414sort_uniforms(const void *a, const void *b) 2415{ 2416 struct uniform_sort *u1 = (struct uniform_sort *)a; 2417 struct uniform_sort *u2 = (struct uniform_sort *)b; 2418 2419 return u1->pos - u2->pos; 2420} 2421 2422/* Add the uniforms to the parameters. The linker chose locations 2423 * in our parameters lists (which weren't created yet), which the 2424 * uniforms code will use to poke values into our parameters list 2425 * when uniforms are updated. 2426 */ 2427static void 2428add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, 2429 struct gl_shader *shader, 2430 struct gl_program *prog) 2431{ 2432 unsigned int i; 2433 unsigned int next_sampler = 0, num_uniforms = 0; 2434 struct uniform_sort *sorted_uniforms; 2435 2436 sorted_uniforms = ralloc_array(NULL, struct uniform_sort, 2437 shader_program->Uniforms->NumUniforms); 2438 2439 for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { 2440 struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; 2441 int parameter_index = -1; 2442 2443 switch (shader->Type) { 2444 case GL_VERTEX_SHADER: 2445 parameter_index = uniform->VertPos; 2446 break; 2447 case GL_FRAGMENT_SHADER: 2448 parameter_index = uniform->FragPos; 2449 break; 2450 case GL_GEOMETRY_SHADER: 2451 parameter_index = uniform->GeomPos; 2452 break; 2453 } 2454 2455 /* Only add uniforms used in our target. */ 2456 if (parameter_index != -1) { 2457 sorted_uniforms[num_uniforms].pos = parameter_index; 2458 sorted_uniforms[num_uniforms].u = uniform; 2459 num_uniforms++; 2460 } 2461 } 2462 2463 qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), 2464 sort_uniforms); 2465 2466 for (i = 0; i < num_uniforms; i++) { 2467 struct gl_uniform *uniform = sorted_uniforms[i].u; 2468 int parameter_index = sorted_uniforms[i].pos; 2469 const glsl_type *type = uniform->Type; 2470 unsigned int size; 2471 2472 if (type->is_vector() || 2473 type->is_scalar()) { 2474 size = type->vector_elements; 2475 } else { 2476 size = type_size(type) * 4; 2477 } 2478 2479 gl_register_file file; 2480 if (type->is_sampler() || 2481 (type->is_array() && type->fields.array->is_sampler())) { 2482 file = PROGRAM_SAMPLER; 2483 } else { 2484 file = PROGRAM_UNIFORM; 2485 } 2486 2487 GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, 2488 uniform->Name); 2489 2490 if (index < 0) { 2491 index = _mesa_add_parameter(prog->Parameters, file, 2492 uniform->Name, size, type->gl_type, 2493 NULL, NULL, 0x0); 2494 2495 /* Sampler uniform values are stored in prog->SamplerUnits, 2496 * and the entry in that array is selected by this index we 2497 * store in ParameterValues[]. 2498 */ 2499 if (file == PROGRAM_SAMPLER) { 2500 for (unsigned int j = 0; j < size / 4; j++) 2501 prog->Parameters->ParameterValues[index + j][0] = next_sampler++; 2502 } 2503 2504 /* The location chosen in the Parameters list here (returned 2505 * from _mesa_add_uniform) has to match what the linker chose. 2506 */ 2507 if (index != parameter_index) { 2508 fail_link(shader_program, "Allocation of uniform `%s' to target " 2509 "failed (%d vs %d)\n", 2510 uniform->Name, index, parameter_index); 2511 } 2512 } 2513 } 2514 2515 ralloc_free(sorted_uniforms); 2516} 2517 2518static void 2519set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2520 struct gl_shader_program *shader_program, 2521 const char *name, const glsl_type *type, 2522 ir_constant *val) 2523{ 2524 if (type->is_record()) { 2525 ir_constant *field_constant; 2526 2527 field_constant = (ir_constant *)val->components.get_head(); 2528 2529 for (unsigned int i = 0; i < type->length; i++) { 2530 const glsl_type *field_type = type->fields.structure[i].type; 2531 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2532 type->fields.structure[i].name); 2533 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2534 field_type, field_constant); 2535 field_constant = (ir_constant *)field_constant->next; 2536 } 2537 return; 2538 } 2539 2540 int loc = _mesa_get_uniform_location(ctx, shader_program, name); 2541 2542 if (loc == -1) { 2543 fail_link(shader_program, 2544 "Couldn't find uniform for initializer %s\n", name); 2545 return; 2546 } 2547 2548 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2549 ir_constant *element; 2550 const glsl_type *element_type; 2551 if (type->is_array()) { 2552 element = val->array_elements[i]; 2553 element_type = type->fields.array; 2554 } else { 2555 element = val; 2556 element_type = type; 2557 } 2558 2559 void *values; 2560 2561 if (element_type->base_type == GLSL_TYPE_BOOL) { 2562 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2563 for (unsigned int j = 0; j < element_type->components(); j++) { 2564 conv[j] = element->value.b[j]; 2565 } 2566 values = (void *)conv; 2567 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2568 element_type->vector_elements, 2569 1); 2570 } else { 2571 values = &element->value; 2572 } 2573 2574 if (element_type->is_matrix()) { 2575 _mesa_uniform_matrix(ctx, shader_program, 2576 element_type->matrix_columns, 2577 element_type->vector_elements, 2578 loc, 1, GL_FALSE, (GLfloat *)values); 2579 loc += element_type->matrix_columns; 2580 } else { 2581 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2582 values, element_type->gl_type); 2583 loc += type_size(element_type); 2584 } 2585 } 2586} 2587 2588static void 2589set_uniform_initializers(struct gl_context *ctx, 2590 struct gl_shader_program *shader_program) 2591{ 2592 void *mem_ctx = NULL; 2593 2594 for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { 2595 struct gl_shader *shader = shader_program->_LinkedShaders[i]; 2596 2597 if (shader == NULL) 2598 continue; 2599 2600 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2601 ir_instruction *ir = (ir_instruction *)iter.get(); 2602 ir_variable *var = ir->as_variable(); 2603 2604 if (!var || var->mode != ir_var_uniform || !var->constant_value) 2605 continue; 2606 2607 if (!mem_ctx) 2608 mem_ctx = ralloc_context(NULL); 2609 2610 set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, 2611 var->type, var->constant_value); 2612 } 2613 } 2614 2615 ralloc_free(mem_ctx); 2616} 2617 2618/* 2619 * Scan/rewrite program to remove reads of custom (output) registers. 2620 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING 2621 * (for vertex shaders). 2622 * In GLSL shaders, varying vars can be read and written. 2623 * On some hardware, trying to read an output register causes trouble. 2624 * So, rewrite the program to use a temporary register in this case. 2625 * 2626 * Based on _mesa_remove_output_reads from programopt.c. 2627 */ 2628void 2629glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) 2630{ 2631 GLuint i; 2632 GLint outputMap[VERT_RESULT_MAX]; 2633 GLuint numVaryingReads = 0; 2634 GLboolean usedTemps[MAX_PROGRAM_TEMPS]; 2635 GLuint firstTemp = 0; 2636 2637 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, 2638 usedTemps, MAX_PROGRAM_TEMPS); 2639 2640 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); 2641 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); 2642 2643 for (i = 0; i < VERT_RESULT_MAX; i++) 2644 outputMap[i] = -1; 2645 2646 /* look for instructions which read from varying vars */ 2647 foreach_iter(exec_list_iterator, iter, this->instructions) { 2648 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2649 const GLuint numSrc = _mesa_num_inst_src_regs(inst->op); 2650 GLuint j; 2651 for (j = 0; j < numSrc; j++) { 2652 if (inst->src[j].file == type) { 2653 /* replace the read with a temp reg */ 2654 const GLuint var = inst->src[j].index; 2655 if (outputMap[var] == -1) { 2656 numVaryingReads++; 2657 outputMap[var] = _mesa_find_free_register(usedTemps, 2658 MAX_PROGRAM_TEMPS, 2659 firstTemp); 2660 firstTemp = outputMap[var] + 1; 2661 } 2662 inst->src[j].file = PROGRAM_TEMPORARY; 2663 inst->src[j].index = outputMap[var]; 2664 } 2665 } 2666 } 2667 2668 if (numVaryingReads == 0) 2669 return; /* nothing to be done */ 2670 2671 /* look for instructions which write to the varying vars identified above */ 2672 foreach_iter(exec_list_iterator, iter, this->instructions) { 2673 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2674 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { 2675 /* change inst to write to the temp reg, instead of the varying */ 2676 inst->dst.file = PROGRAM_TEMPORARY; 2677 inst->dst.index = outputMap[inst->dst.index]; 2678 } 2679 } 2680 2681 /* insert new MOV instructions at the end */ 2682 for (i = 0; i < VERT_RESULT_MAX; i++) { 2683 if (outputMap[i] >= 0) { 2684 /* MOV VAR[i], TEMP[tmp]; */ 2685 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); 2686 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); 2687 dst.index = i; 2688 this->emit(NULL, OPCODE_MOV, dst, src); 2689 } 2690 } 2691} 2692 2693/* Replaces all references to a temporary register index with another index. */ 2694void 2695glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 2696{ 2697 foreach_iter(exec_list_iterator, iter, this->instructions) { 2698 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2699 unsigned j; 2700 2701 for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { 2702 if (inst->src[j].file == PROGRAM_TEMPORARY && 2703 inst->src[j].index == index) { 2704 inst->src[j].index = new_index; 2705 } 2706 } 2707 2708 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 2709 inst->dst.index = new_index; 2710 } 2711 } 2712} 2713 2714int 2715glsl_to_tgsi_visitor::get_first_temp_read(int index) 2716{ 2717 int depth = 0; /* loop depth */ 2718 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 2719 unsigned i = 0, j; 2720 2721 foreach_iter(exec_list_iterator, iter, this->instructions) { 2722 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2723 2724 for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { 2725 if (inst->src[j].file == PROGRAM_TEMPORARY && 2726 inst->src[j].index == index) { 2727 return (depth == 0) ? i : loop_start; 2728 } 2729 } 2730 2731 if (inst->op == OPCODE_BGNLOOP) { 2732 if(depth++ == 0) 2733 loop_start = i; 2734 } else if (inst->op == OPCODE_ENDLOOP) { 2735 if (--depth == 0) 2736 loop_start = -1; 2737 } 2738 assert(depth >= 0); 2739 2740 i++; 2741 } 2742 2743 return -1; 2744} 2745 2746int 2747glsl_to_tgsi_visitor::get_first_temp_write(int index) 2748{ 2749 int depth = 0; /* loop depth */ 2750 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 2751 int i = 0; 2752 2753 foreach_iter(exec_list_iterator, iter, this->instructions) { 2754 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2755 2756 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 2757 return (depth == 0) ? i : loop_start; 2758 } 2759 2760 if (inst->op == OPCODE_BGNLOOP) { 2761 if(depth++ == 0) 2762 loop_start = i; 2763 } else if (inst->op == OPCODE_ENDLOOP) { 2764 if (--depth == 0) 2765 loop_start = -1; 2766 } 2767 assert(depth >= 0); 2768 2769 i++; 2770 } 2771 2772 return -1; 2773} 2774 2775int 2776glsl_to_tgsi_visitor::get_last_temp_read(int index) 2777{ 2778 int depth = 0; /* loop depth */ 2779 int last = -1; /* index of last instruction that reads the temporary */ 2780 unsigned i = 0, j; 2781 2782 foreach_iter(exec_list_iterator, iter, this->instructions) { 2783 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2784 2785 for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { 2786 if (inst->src[j].file == PROGRAM_TEMPORARY && 2787 inst->src[j].index == index) { 2788 last = (depth == 0) ? i : -2; 2789 } 2790 } 2791 2792 if (inst->op == OPCODE_BGNLOOP) 2793 depth++; 2794 else if (inst->op == OPCODE_ENDLOOP) 2795 if (--depth == 0 && last == -2) 2796 last = i; 2797 assert(depth >= 0); 2798 2799 i++; 2800 } 2801 2802 assert(last >= -1); 2803 return last; 2804} 2805 2806int 2807glsl_to_tgsi_visitor::get_last_temp_write(int index) 2808{ 2809 int depth = 0; /* loop depth */ 2810 int last = -1; /* index of last instruction that writes to the temporary */ 2811 int i = 0; 2812 2813 foreach_iter(exec_list_iterator, iter, this->instructions) { 2814 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2815 2816 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 2817 last = (depth == 0) ? i : -2; 2818 2819 if (inst->op == OPCODE_BGNLOOP) 2820 depth++; 2821 else if (inst->op == OPCODE_ENDLOOP) 2822 if (--depth == 0 && last == -2) 2823 last = i; 2824 assert(depth >= 0); 2825 2826 i++; 2827 } 2828 2829 assert(last >= -1); 2830 return last; 2831} 2832 2833/* 2834 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 2835 * channels for copy propagation and updates following instructions to 2836 * use the original versions. 2837 * 2838 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 2839 * will occur. As an example, a TXP production before this pass: 2840 * 2841 * 0: MOV TEMP[1], INPUT[4].xyyy; 2842 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2843 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 2844 * 2845 * and after: 2846 * 2847 * 0: MOV TEMP[1], INPUT[4].xyyy; 2848 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2849 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 2850 * 2851 * which allows for dead code elimination on TEMP[1]'s writes. 2852 */ 2853void 2854glsl_to_tgsi_visitor::copy_propagate(void) 2855{ 2856 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 2857 glsl_to_tgsi_instruction *, 2858 this->next_temp * 4); 2859 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 2860 int level = 0; 2861 2862 foreach_iter(exec_list_iterator, iter, this->instructions) { 2863 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2864 2865 assert(inst->dst.file != PROGRAM_TEMPORARY 2866 || inst->dst.index < this->next_temp); 2867 2868 /* First, do any copy propagation possible into the src regs. */ 2869 for (int r = 0; r < 3; r++) { 2870 glsl_to_tgsi_instruction *first = NULL; 2871 bool good = true; 2872 int acp_base = inst->src[r].index * 4; 2873 2874 if (inst->src[r].file != PROGRAM_TEMPORARY || 2875 inst->src[r].reladdr) 2876 continue; 2877 2878 /* See if we can find entries in the ACP consisting of MOVs 2879 * from the same src register for all the swizzled channels 2880 * of this src register reference. 2881 */ 2882 for (int i = 0; i < 4; i++) { 2883 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2884 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 2885 2886 if (!copy_chan) { 2887 good = false; 2888 break; 2889 } 2890 2891 assert(acp_level[acp_base + src_chan] <= level); 2892 2893 if (!first) { 2894 first = copy_chan; 2895 } else { 2896 if (first->src[0].file != copy_chan->src[0].file || 2897 first->src[0].index != copy_chan->src[0].index) { 2898 good = false; 2899 break; 2900 } 2901 } 2902 } 2903 2904 if (good) { 2905 /* We've now validated that we can copy-propagate to 2906 * replace this src register reference. Do it. 2907 */ 2908 inst->src[r].file = first->src[0].file; 2909 inst->src[r].index = first->src[0].index; 2910 2911 int swizzle = 0; 2912 for (int i = 0; i < 4; i++) { 2913 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2914 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 2915 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 2916 (3 * i)); 2917 } 2918 inst->src[r].swizzle = swizzle; 2919 } 2920 } 2921 2922 switch (inst->op) { 2923 case OPCODE_BGNLOOP: 2924 case OPCODE_ENDLOOP: 2925 /* End of a basic block, clear the ACP entirely. */ 2926 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2927 break; 2928 2929 case OPCODE_IF: 2930 ++level; 2931 break; 2932 2933 case OPCODE_ENDIF: 2934 case OPCODE_ELSE: 2935 /* Clear all channels written inside the block from the ACP, but 2936 * leaving those that were not touched. 2937 */ 2938 for (int r = 0; r < this->next_temp; r++) { 2939 for (int c = 0; c < 4; c++) { 2940 if (!acp[4 * r + c]) 2941 continue; 2942 2943 if (acp_level[4 * r + c] >= level) 2944 acp[4 * r + c] = NULL; 2945 } 2946 } 2947 if (inst->op == OPCODE_ENDIF) 2948 --level; 2949 break; 2950 2951 default: 2952 /* Continuing the block, clear any written channels from 2953 * the ACP. 2954 */ 2955 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 2956 /* Any temporary might be written, so no copy propagation 2957 * across this instruction. 2958 */ 2959 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2960 } else if (inst->dst.file == PROGRAM_OUTPUT && 2961 inst->dst.reladdr) { 2962 /* Any output might be written, so no copy propagation 2963 * from outputs across this instruction. 2964 */ 2965 for (int r = 0; r < this->next_temp; r++) { 2966 for (int c = 0; c < 4; c++) { 2967 if (!acp[4 * r + c]) 2968 continue; 2969 2970 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 2971 acp[4 * r + c] = NULL; 2972 } 2973 } 2974 } else if (inst->dst.file == PROGRAM_TEMPORARY || 2975 inst->dst.file == PROGRAM_OUTPUT) { 2976 /* Clear where it's used as dst. */ 2977 if (inst->dst.file == PROGRAM_TEMPORARY) { 2978 for (int c = 0; c < 4; c++) { 2979 if (inst->dst.writemask & (1 << c)) { 2980 acp[4 * inst->dst.index + c] = NULL; 2981 } 2982 } 2983 } 2984 2985 /* Clear where it's used as src. */ 2986 for (int r = 0; r < this->next_temp; r++) { 2987 for (int c = 0; c < 4; c++) { 2988 if (!acp[4 * r + c]) 2989 continue; 2990 2991 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 2992 2993 if (acp[4 * r + c]->src[0].file == inst->dst.file && 2994 acp[4 * r + c]->src[0].index == inst->dst.index && 2995 inst->dst.writemask & (1 << src_chan)) 2996 { 2997 acp[4 * r + c] = NULL; 2998 } 2999 } 3000 } 3001 } 3002 break; 3003 } 3004 3005 /* If this is a copy, add it to the ACP. */ 3006 if (inst->op == OPCODE_MOV && 3007 inst->dst.file == PROGRAM_TEMPORARY && 3008 !inst->dst.reladdr && 3009 !inst->saturate && 3010 !inst->src[0].reladdr && 3011 !inst->src[0].negate) { 3012 for (int i = 0; i < 4; i++) { 3013 if (inst->dst.writemask & (1 << i)) { 3014 acp[4 * inst->dst.index + i] = inst; 3015 acp_level[4 * inst->dst.index + i] = level; 3016 } 3017 } 3018 } 3019 } 3020 3021 ralloc_free(acp_level); 3022 ralloc_free(acp); 3023} 3024 3025/* 3026 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3027 * 3028 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3029 * will occur. As an example, a TXP production after copy propagation but 3030 * before this pass: 3031 * 3032 * 0: MOV TEMP[1], INPUT[4].xyyy; 3033 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3034 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3035 * 3036 * and after this pass: 3037 * 3038 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3039 * 3040 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3041 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3042 */ 3043void 3044glsl_to_tgsi_visitor::eliminate_dead_code(void) 3045{ 3046 int i; 3047 3048 for (i=0; i < this->next_temp; i++) { 3049 int last_read = get_last_temp_read(i); 3050 int j = 0; 3051 3052 foreach_iter(exec_list_iterator, iter, this->instructions) { 3053 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3054 3055 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3056 j > last_read) 3057 { 3058 iter.remove(); 3059 delete inst; 3060 } 3061 3062 j++; 3063 } 3064 } 3065} 3066 3067/* Merges temporary registers together where possible to reduce the number of 3068 * registers needed to run a program. 3069 * 3070 * Produces optimal code only after copy propagation and dead code elimination 3071 * have been run. */ 3072void 3073glsl_to_tgsi_visitor::merge_registers(void) 3074{ 3075 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3076 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3077 int i, j; 3078 3079 /* Read the indices of the last read and first write to each temp register 3080 * into an array so that we don't have to traverse the instruction list as 3081 * much. */ 3082 for (i=0; i < this->next_temp; i++) { 3083 last_reads[i] = get_last_temp_read(i); 3084 first_writes[i] = get_first_temp_write(i); 3085 } 3086 3087 /* Start looking for registers with non-overlapping usages that can be 3088 * merged together. */ 3089 for (i=0; i < this->next_temp; i++) { 3090 /* Don't touch unused registers. */ 3091 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3092 3093 for (j=0; j < this->next_temp; j++) { 3094 /* Don't touch unused registers. */ 3095 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3096 3097 /* We can merge the two registers if the first write to j is after or 3098 * in the same instruction as the last read from i. Note that the 3099 * register at index i will always be used earlier or at the same time 3100 * as the register at index j. */ 3101 if (first_writes[i] <= first_writes[j] && 3102 last_reads[i] <= first_writes[j]) 3103 { 3104 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3105 3106 /* Update the first_writes and last_reads arrays with the new 3107 * values for the merged register index, and mark the newly unused 3108 * register index as such. */ 3109 last_reads[i] = last_reads[j]; 3110 first_writes[j] = -1; 3111 last_reads[j] = -1; 3112 } 3113 } 3114 } 3115 3116 ralloc_free(last_reads); 3117 ralloc_free(first_writes); 3118} 3119 3120/* Reassign indices to temporary registers by reusing unused indices created 3121 * by optimization passes. */ 3122void 3123glsl_to_tgsi_visitor::renumber_registers(void) 3124{ 3125 int i = 0; 3126 int new_index = 0; 3127 3128 for (i=0; i < this->next_temp; i++) { 3129 if (get_first_temp_read(i) < 0) continue; 3130 if (i != new_index) 3131 rename_temp_register(i, new_index); 3132 new_index++; 3133 } 3134 3135 this->next_temp = new_index; 3136} 3137 3138/* ------------------------- TGSI conversion stuff -------------------------- */ 3139struct label { 3140 unsigned branch_target; 3141 unsigned token; 3142}; 3143 3144/** 3145 * Intermediate state used during shader translation. 3146 */ 3147struct st_translate { 3148 struct ureg_program *ureg; 3149 3150 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 3151 struct ureg_src *constants; 3152 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3153 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3154 struct ureg_dst address[1]; 3155 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3156 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3157 3158 /* Extra info for handling point size clamping in vertex shader */ 3159 struct ureg_dst pointSizeResult; /**< Actual point size output register */ 3160 struct ureg_src pointSizeConst; /**< Point size range constant register */ 3161 GLint pointSizeOutIndex; /**< Temp point size output register */ 3162 GLboolean prevInstWrotePointSize; 3163 3164 const GLuint *inputMapping; 3165 const GLuint *outputMapping; 3166 3167 /* For every instruction that contains a label (eg CALL), keep 3168 * details so that we can go back afterwards and emit the correct 3169 * tgsi instruction number for each label. 3170 */ 3171 struct label *labels; 3172 unsigned labels_size; 3173 unsigned labels_count; 3174 3175 /* Keep a record of the tgsi instruction number that each mesa 3176 * instruction starts at, will be used to fix up labels after 3177 * translation. 3178 */ 3179 unsigned *insn; 3180 unsigned insn_size; 3181 unsigned insn_count; 3182 3183 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3184 3185 boolean error; 3186}; 3187 3188/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3189static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3190 TGSI_SEMANTIC_FACE, 3191 TGSI_SEMANTIC_INSTANCEID 3192}; 3193 3194/** 3195 * Make note of a branch to a label in the TGSI code. 3196 * After we've emitted all instructions, we'll go over the list 3197 * of labels built here and patch the TGSI code with the actual 3198 * location of each label. 3199 */ 3200static unsigned *get_label( struct st_translate *t, 3201 unsigned branch_target ) 3202{ 3203 unsigned i; 3204 3205 if (t->labels_count + 1 >= t->labels_size) { 3206 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3207 t->labels = (struct label *)realloc(t->labels, 3208 t->labels_size * sizeof t->labels[0]); 3209 if (t->labels == NULL) { 3210 static unsigned dummy; 3211 t->error = TRUE; 3212 return &dummy; 3213 } 3214 } 3215 3216 i = t->labels_count++; 3217 t->labels[i].branch_target = branch_target; 3218 return &t->labels[i].token; 3219} 3220 3221/** 3222 * Called prior to emitting the TGSI code for each Mesa instruction. 3223 * Allocate additional space for instructions if needed. 3224 * Update the insn[] array so the next Mesa instruction points to 3225 * the next TGSI instruction. 3226 */ 3227static void set_insn_start( struct st_translate *t, 3228 unsigned start ) 3229{ 3230 if (t->insn_count + 1 >= t->insn_size) { 3231 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 3232 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]); 3233 if (t->insn == NULL) { 3234 t->error = TRUE; 3235 return; 3236 } 3237 } 3238 3239 t->insn[t->insn_count++] = start; 3240} 3241 3242/** 3243 * Map a Mesa dst register to a TGSI ureg_dst register. 3244 */ 3245static struct ureg_dst 3246dst_register( struct st_translate *t, 3247 gl_register_file file, 3248 GLuint index ) 3249{ 3250 switch( file ) { 3251 case PROGRAM_UNDEFINED: 3252 return ureg_dst_undef(); 3253 3254 case PROGRAM_TEMPORARY: 3255 if (ureg_dst_is_undef(t->temps[index])) 3256 t->temps[index] = ureg_DECL_temporary( t->ureg ); 3257 3258 return t->temps[index]; 3259 3260 case PROGRAM_OUTPUT: 3261 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 3262 t->prevInstWrotePointSize = GL_TRUE; 3263 3264 if (t->procType == TGSI_PROCESSOR_VERTEX) 3265 assert(index < VERT_RESULT_MAX); 3266 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 3267 assert(index < FRAG_RESULT_MAX); 3268 else 3269 assert(index < GEOM_RESULT_MAX); 3270 3271 assert(t->outputMapping[index] < Elements(t->outputs)); 3272 3273 return t->outputs[t->outputMapping[index]]; 3274 3275 case PROGRAM_ADDRESS: 3276 return t->address[index]; 3277 3278 default: 3279 debug_assert( 0 ); 3280 return ureg_dst_undef(); 3281 } 3282} 3283 3284/** 3285 * Map a Mesa src register to a TGSI ureg_src register. 3286 */ 3287static struct ureg_src 3288src_register( struct st_translate *t, 3289 gl_register_file file, 3290 GLuint index ) 3291{ 3292 switch( file ) { 3293 case PROGRAM_UNDEFINED: 3294 return ureg_src_undef(); 3295 3296 case PROGRAM_TEMPORARY: 3297 assert(index >= 0); 3298 assert(index < Elements(t->temps)); 3299 if (ureg_dst_is_undef(t->temps[index])) 3300 t->temps[index] = ureg_DECL_temporary( t->ureg ); 3301 return ureg_src(t->temps[index]); 3302 3303 case PROGRAM_NAMED_PARAM: 3304 case PROGRAM_ENV_PARAM: 3305 case PROGRAM_LOCAL_PARAM: 3306 case PROGRAM_UNIFORM: 3307 assert(index >= 0); 3308 return t->constants[index]; 3309 case PROGRAM_STATE_VAR: 3310 case PROGRAM_CONSTANT: /* ie, immediate */ 3311 if (index < 0) 3312 return ureg_DECL_constant( t->ureg, 0 ); 3313 else 3314 return t->constants[index]; 3315 3316 case PROGRAM_INPUT: 3317 assert(t->inputMapping[index] < Elements(t->inputs)); 3318 return t->inputs[t->inputMapping[index]]; 3319 3320 case PROGRAM_OUTPUT: 3321 assert(t->outputMapping[index] < Elements(t->outputs)); 3322 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 3323 3324 case PROGRAM_ADDRESS: 3325 return ureg_src(t->address[index]); 3326 3327 case PROGRAM_SYSTEM_VALUE: 3328 assert(index < Elements(t->systemValues)); 3329 return t->systemValues[index]; 3330 3331 default: 3332 debug_assert( 0 ); 3333 return ureg_src_undef(); 3334 } 3335} 3336 3337/** 3338 * Create a TGSI ureg_dst register from a Mesa dest register. 3339 */ 3340static struct ureg_dst 3341translate_dst( struct st_translate *t, 3342 const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg, 3343 boolean saturate ) 3344{ 3345 struct ureg_dst dst = dst_register( t, 3346 dst_reg->file, 3347 dst_reg->index ); 3348 3349 dst = ureg_writemask( dst, 3350 dst_reg->writemask ); 3351 3352 if (saturate) 3353 dst = ureg_saturate( dst ); 3354 3355 if (dst_reg->reladdr != NULL) 3356 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); 3357 3358 return dst; 3359} 3360 3361/** 3362 * Create a TGSI ureg_src register from a Mesa src register. 3363 */ 3364static struct ureg_src 3365translate_src( struct st_translate *t, 3366 const st_src_reg *src_reg ) 3367{ 3368 struct ureg_src src = src_register( t, src_reg->file, src_reg->index ); 3369 3370 src = ureg_swizzle( src, 3371 GET_SWZ( src_reg->swizzle, 0 ) & 0x3, 3372 GET_SWZ( src_reg->swizzle, 1 ) & 0x3, 3373 GET_SWZ( src_reg->swizzle, 2 ) & 0x3, 3374 GET_SWZ( src_reg->swizzle, 3 ) & 0x3); 3375 3376 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 3377 src = ureg_negate(src); 3378 3379#if 0 3380 // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR 3381 if (src_reg->abs) 3382 src = ureg_abs(src); 3383#endif 3384 3385 if (src_reg->reladdr != NULL) { 3386 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 3387 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 3388 * set the bit for src.Negate. So we have to do the operation manually 3389 * here to work around the compiler's problems. */ 3390 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 3391 struct ureg_src addr = ureg_src(t->address[0]); 3392 src.Indirect = 1; 3393 src.IndirectFile = addr.File; 3394 src.IndirectIndex = addr.Index; 3395 src.IndirectSwizzle = addr.SwizzleX; 3396 3397 if (src_reg->file != PROGRAM_INPUT && 3398 src_reg->file != PROGRAM_OUTPUT) { 3399 /* If src_reg->index was negative, it was set to zero in 3400 * src_register(). Reassign it now. But don't do this 3401 * for input/output regs since they get remapped while 3402 * const buffers don't. 3403 */ 3404 src.Index = src_reg->index; 3405 } 3406 } 3407 3408 return src; 3409} 3410 3411static void 3412compile_tgsi_instruction(struct st_translate *t, 3413 const struct glsl_to_tgsi_instruction *inst) 3414{ 3415 struct ureg_program *ureg = t->ureg; 3416 GLuint i; 3417 struct ureg_dst dst[1]; 3418 struct ureg_src src[4]; 3419 unsigned num_dst; 3420 unsigned num_src; 3421 3422 num_dst = _mesa_num_inst_dst_regs( inst->op ); 3423 num_src = _mesa_num_inst_src_regs( inst->op ); 3424 3425 if (num_dst) 3426 dst[0] = translate_dst( t, 3427 &inst->dst, 3428 inst->saturate); // inst->SaturateMode 3429 3430 for (i = 0; i < num_src; i++) 3431 src[i] = translate_src( t, &inst->src[i] ); 3432 3433 switch( inst->op ) { 3434 case OPCODE_SWZ: 3435 // TODO: copy emit_swz function from st_mesa_to_tgsi.c 3436 //emit_swz( t, dst[0], &inst->src[0] ); 3437 assert(!"OPCODE_SWZ"); 3438 return; 3439 3440 case OPCODE_BGNLOOP: 3441 case OPCODE_CAL: 3442 case OPCODE_ELSE: 3443 case OPCODE_ENDLOOP: 3444 case OPCODE_IF: 3445 debug_assert(num_dst == 0); 3446 ureg_label_insn( ureg, 3447 translate_opcode( inst->op ), 3448 src, num_src, 3449 get_label( t, 3450 inst->op == OPCODE_CAL ? inst->function->sig_id : 0 )); 3451 return; 3452 3453 case OPCODE_TEX: 3454 case OPCODE_TXB: 3455 case OPCODE_TXD: 3456 case OPCODE_TXL: 3457 case OPCODE_TXP: 3458 src[num_src++] = t->samplers[inst->sampler]; 3459 ureg_tex_insn( ureg, 3460 translate_opcode( inst->op ), 3461 dst, num_dst, 3462 translate_texture_target( inst->tex_target, 3463 inst->tex_shadow ), 3464 src, num_src ); 3465 return; 3466 3467 case OPCODE_SCS: 3468 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); 3469 ureg_insn( ureg, 3470 translate_opcode( inst->op ), 3471 dst, num_dst, 3472 src, num_src ); 3473 break; 3474 3475 case OPCODE_XPD: 3476 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); 3477 ureg_insn( ureg, 3478 translate_opcode( inst->op ), 3479 dst, num_dst, 3480 src, num_src ); 3481 break; 3482 3483 case OPCODE_NOISE1: 3484 case OPCODE_NOISE2: 3485 case OPCODE_NOISE3: 3486 case OPCODE_NOISE4: 3487 /* At some point, a motivated person could add a better 3488 * implementation of noise. Currently not even the nvidia 3489 * binary drivers do anything more than this. In any case, the 3490 * place to do this is in the GL state tracker, not the poor 3491 * driver. 3492 */ 3493 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); 3494 break; 3495 3496 case OPCODE_DDY: 3497 // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c 3498 assert(!"OPCODE_DDY"); 3499 //emit_ddy( t, dst[0], &inst->src[0] ); 3500 break; 3501 3502 default: 3503 ureg_insn( ureg, 3504 translate_opcode( inst->op ), 3505 dst, num_dst, 3506 src, num_src ); 3507 break; 3508 } 3509} 3510 3511/** 3512 * Emit the TGSI instructions to adjust the WPOS pixel center convention 3513 * Basically, add (adjX, adjY) to the fragment position. 3514 */ 3515static void 3516emit_adjusted_wpos( struct st_translate *t, 3517 const struct gl_program *program, 3518 GLfloat adjX, GLfloat adjY) 3519{ 3520 struct ureg_program *ureg = t->ureg; 3521 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 3522 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 3523 3524 /* Note that we bias X and Y and pass Z and W through unchanged. 3525 * The shader might also use gl_FragCoord.w and .z. 3526 */ 3527 ureg_ADD(ureg, wpos_temp, wpos_input, 3528 ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); 3529 3530 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 3531} 3532 3533 3534/** 3535 * Emit the TGSI instructions for inverting the WPOS y coordinate. 3536 * This code is unavoidable because it also depends on whether 3537 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 3538 */ 3539static void 3540emit_wpos_inversion( struct st_translate *t, 3541 const struct gl_program *program, 3542 boolean invert) 3543{ 3544 struct ureg_program *ureg = t->ureg; 3545 3546 /* Fragment program uses fragment position input. 3547 * Need to replace instances of INPUT[WPOS] with temp T 3548 * where T = INPUT[WPOS] by y is inverted. 3549 */ 3550 static const gl_state_index wposTransformState[STATE_LENGTH] 3551 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 3552 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 3553 3554 /* XXX: note we are modifying the incoming shader here! Need to 3555 * do this before emitting the constant decls below, or this 3556 * will be missed: 3557 */ 3558 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 3559 wposTransformState); 3560 3561 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 3562 struct ureg_dst wpos_temp; 3563 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 3564 3565 /* MOV wpos_temp, input[wpos] 3566 */ 3567 if (wpos_input.File == TGSI_FILE_TEMPORARY) 3568 wpos_temp = ureg_dst(wpos_input); 3569 else { 3570 wpos_temp = ureg_DECL_temporary( ureg ); 3571 ureg_MOV( ureg, wpos_temp, wpos_input ); 3572 } 3573 3574 if (invert) { 3575 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 3576 */ 3577 ureg_MAD( ureg, 3578 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 3579 wpos_input, 3580 ureg_scalar(wpostrans, 0), 3581 ureg_scalar(wpostrans, 1)); 3582 } else { 3583 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 3584 */ 3585 ureg_MAD( ureg, 3586 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 3587 wpos_input, 3588 ureg_scalar(wpostrans, 2), 3589 ureg_scalar(wpostrans, 3)); 3590 } 3591 3592 /* Use wpos_temp as position input from here on: 3593 */ 3594 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 3595} 3596 3597 3598/** 3599 * Emit fragment position/ooordinate code. 3600 */ 3601static void 3602emit_wpos(struct st_context *st, 3603 struct st_translate *t, 3604 const struct gl_program *program, 3605 struct ureg_program *ureg) 3606{ 3607 const struct gl_fragment_program *fp = 3608 (const struct gl_fragment_program *) program; 3609 struct pipe_screen *pscreen = st->pipe->screen; 3610 boolean invert = FALSE; 3611 3612 if (fp->OriginUpperLeft) { 3613 /* Fragment shader wants origin in upper-left */ 3614 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 3615 /* the driver supports upper-left origin */ 3616 } 3617 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 3618 /* the driver supports lower-left origin, need to invert Y */ 3619 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 3620 invert = TRUE; 3621 } 3622 else 3623 assert(0); 3624 } 3625 else { 3626 /* Fragment shader wants origin in lower-left */ 3627 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 3628 /* the driver supports lower-left origin */ 3629 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 3630 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 3631 /* the driver supports upper-left origin, need to invert Y */ 3632 invert = TRUE; 3633 else 3634 assert(0); 3635 } 3636 3637 if (fp->PixelCenterInteger) { 3638 /* Fragment shader wants pixel center integer */ 3639 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 3640 /* the driver supports pixel center integer */ 3641 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 3642 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 3643 /* the driver supports pixel center half integer, need to bias X,Y */ 3644 emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); 3645 else 3646 assert(0); 3647 } 3648 else { 3649 /* Fragment shader wants pixel center half integer */ 3650 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 3651 /* the driver supports pixel center half integer */ 3652 } 3653 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 3654 /* the driver supports pixel center integer, need to bias X,Y */ 3655 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 3656 emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); 3657 } 3658 else 3659 assert(0); 3660 } 3661 3662 /* we invert after adjustment so that we avoid the MOV to temporary, 3663 * and reuse the adjustment ADD instead */ 3664 emit_wpos_inversion(t, program, invert); 3665} 3666 3667/** 3668 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 3669 * \param program the program to translate 3670 * \param numInputs number of input registers used 3671 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 3672 * input indexes 3673 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 3674 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 3675 * each input 3676 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 3677 * \param numOutputs number of output registers used 3678 * \param outputMapping maps Mesa fragment program outputs to TGSI 3679 * generic outputs 3680 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 3681 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 3682 * each output 3683 * 3684 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 3685 */ 3686extern "C" enum pipe_error 3687st_translate_program( 3688 struct gl_context *ctx, 3689 uint procType, 3690 struct ureg_program *ureg, 3691 glsl_to_tgsi_visitor *program, 3692 const struct gl_program *proginfo, 3693 GLuint numInputs, 3694 const GLuint inputMapping[], 3695 const ubyte inputSemanticName[], 3696 const ubyte inputSemanticIndex[], 3697 const GLuint interpMode[], 3698 GLuint numOutputs, 3699 const GLuint outputMapping[], 3700 const ubyte outputSemanticName[], 3701 const ubyte outputSemanticIndex[], 3702 boolean passthrough_edgeflags ) 3703{ 3704 struct st_translate translate, *t; 3705 unsigned i; 3706 enum pipe_error ret = PIPE_OK; 3707 3708 assert(numInputs <= Elements(t->inputs)); 3709 assert(numOutputs <= Elements(t->outputs)); 3710 3711 t = &translate; 3712 memset(t, 0, sizeof *t); 3713 3714 t->procType = procType; 3715 t->inputMapping = inputMapping; 3716 t->outputMapping = outputMapping; 3717 t->ureg = ureg; 3718 t->pointSizeOutIndex = -1; 3719 t->prevInstWrotePointSize = GL_FALSE; 3720 3721 /* 3722 * Declare input attributes. 3723 */ 3724 if (procType == TGSI_PROCESSOR_FRAGMENT) { 3725 for (i = 0; i < numInputs; i++) { 3726 t->inputs[i] = ureg_DECL_fs_input(ureg, 3727 inputSemanticName[i], 3728 inputSemanticIndex[i], 3729 interpMode[i]); 3730 } 3731 3732 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 3733 /* Must do this after setting up t->inputs, and before 3734 * emitting constant references, below: 3735 */ 3736 printf("FRAG_BIT_WPOS\n"); 3737 emit_wpos(st_context(ctx), t, proginfo, ureg); 3738 } 3739 3740 if (proginfo->InputsRead & FRAG_BIT_FACE) { 3741 // TODO: uncomment 3742 printf("FRAG_BIT_FACE\n"); 3743 //emit_face_var( t, program ); 3744 } 3745 3746 /* 3747 * Declare output attributes. 3748 */ 3749 for (i = 0; i < numOutputs; i++) { 3750 switch (outputSemanticName[i]) { 3751 case TGSI_SEMANTIC_POSITION: 3752 t->outputs[i] = ureg_DECL_output( ureg, 3753 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 3754 outputSemanticIndex[i] ); 3755 3756 t->outputs[i] = ureg_writemask( t->outputs[i], 3757 TGSI_WRITEMASK_Z ); 3758 break; 3759 case TGSI_SEMANTIC_STENCIL: 3760 t->outputs[i] = ureg_DECL_output( ureg, 3761 TGSI_SEMANTIC_STENCIL, /* Stencil */ 3762 outputSemanticIndex[i] ); 3763 t->outputs[i] = ureg_writemask( t->outputs[i], 3764 TGSI_WRITEMASK_Y ); 3765 break; 3766 case TGSI_SEMANTIC_COLOR: 3767 t->outputs[i] = ureg_DECL_output( ureg, 3768 TGSI_SEMANTIC_COLOR, 3769 outputSemanticIndex[i] ); 3770 break; 3771 default: 3772 debug_assert(0); 3773 return PIPE_ERROR_BAD_INPUT; 3774 } 3775 } 3776 } 3777 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 3778 for (i = 0; i < numInputs; i++) { 3779 t->inputs[i] = ureg_DECL_gs_input(ureg, 3780 i, 3781 inputSemanticName[i], 3782 inputSemanticIndex[i]); 3783 } 3784 3785 for (i = 0; i < numOutputs; i++) { 3786 t->outputs[i] = ureg_DECL_output( ureg, 3787 outputSemanticName[i], 3788 outputSemanticIndex[i] ); 3789 } 3790 } 3791 else { 3792 assert(procType == TGSI_PROCESSOR_VERTEX); 3793 3794 for (i = 0; i < numInputs; i++) { 3795 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 3796 } 3797 3798 for (i = 0; i < numOutputs; i++) { 3799 t->outputs[i] = ureg_DECL_output( ureg, 3800 outputSemanticName[i], 3801 outputSemanticIndex[i] ); 3802 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { 3803 /* Writing to the point size result register requires special 3804 * handling to implement clamping. 3805 */ 3806 static const gl_state_index pointSizeClampState[STATE_LENGTH] 3807 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 3808 /* XXX: note we are modifying the incoming shader here! Need to 3809 * do this before emitting the constant decls below, or this 3810 * will be missed. 3811 * XXX: depends on "Parameters" field specific to Mesa IR 3812 */ 3813 unsigned pointSizeClampConst = 3814 _mesa_add_state_reference(proginfo->Parameters, 3815 pointSizeClampState); 3816 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); 3817 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); 3818 t->pointSizeResult = t->outputs[i]; 3819 t->pointSizeOutIndex = i; 3820 t->outputs[i] = psizregtemp; 3821 } 3822 } 3823 /*if (passthrough_edgeflags) 3824 emit_edgeflags( t, program ); */ // TODO: uncomment 3825 } 3826 3827 /* Declare address register. 3828 */ 3829 if (program->num_address_regs > 0) { 3830 debug_assert( program->num_address_regs == 1 ); 3831 t->address[0] = ureg_DECL_address( ureg ); 3832 } 3833 3834 /* Declare misc input registers 3835 */ 3836 { 3837 GLbitfield sysInputs = proginfo->SystemValuesRead; 3838 unsigned numSys = 0; 3839 for (i = 0; sysInputs; i++) { 3840 if (sysInputs & (1 << i)) { 3841 unsigned semName = mesa_sysval_to_semantic[i]; 3842 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 3843 numSys++; 3844 sysInputs &= ~(1 << i); 3845 } 3846 } 3847 } 3848 3849 if (program->indirect_addr_temps) { 3850 /* If temps are accessed with indirect addressing, declare temporaries 3851 * in sequential order. Else, we declare them on demand elsewhere. 3852 * (Note: the number of temporaries is equal to program->next_temp) 3853 */ 3854 for (i = 0; i < (unsigned)program->next_temp; i++) { 3855 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 3856 t->temps[i] = ureg_DECL_temporary( t->ureg ); 3857 } 3858 } 3859 3860 /* Emit constants and immediates. Mesa uses a single index space 3861 * for these, so we put all the translated regs in t->constants. 3862 * XXX: this entire if block depends on proginfo->Parameters from Mesa IR 3863 */ 3864 if (proginfo->Parameters) { 3865 t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); 3866 if (t->constants == NULL) { 3867 ret = PIPE_ERROR_OUT_OF_MEMORY; 3868 goto out; 3869 } 3870 3871 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 3872 switch (proginfo->Parameters->Parameters[i].Type) { 3873 case PROGRAM_ENV_PARAM: 3874 case PROGRAM_LOCAL_PARAM: 3875 case PROGRAM_STATE_VAR: 3876 case PROGRAM_NAMED_PARAM: 3877 case PROGRAM_UNIFORM: 3878 t->constants[i] = ureg_DECL_constant( ureg, i ); 3879 break; 3880 3881 /* Emit immediates only when there's no indirect addressing of 3882 * the const buffer. 3883 * FIXME: Be smarter and recognize param arrays: 3884 * indirect addressing is only valid within the referenced 3885 * array. 3886 */ 3887 case PROGRAM_CONSTANT: 3888 if (program->indirect_addr_consts) 3889 t->constants[i] = ureg_DECL_constant( ureg, i ); 3890 else 3891 t->constants[i] = 3892 ureg_DECL_immediate( ureg, 3893 proginfo->Parameters->ParameterValues[i], 3894 4 ); 3895 break; 3896 default: 3897 break; 3898 } 3899 } 3900 } 3901 3902 /* texture samplers */ 3903 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 3904 if (program->samplers_used & (1 << i)) { 3905 t->samplers[i] = ureg_DECL_sampler( ureg, i ); 3906 } 3907 } 3908 3909 /* Emit each instruction in turn: 3910 */ 3911 foreach_iter(exec_list_iterator, iter, program->instructions) { 3912 set_insn_start( t, ureg_get_instruction_number( ureg )); 3913 compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() ); 3914 3915 if (t->prevInstWrotePointSize && proginfo->Id) { 3916 /* The previous instruction wrote to the (fake) vertex point size 3917 * result register. Now we need to clamp that value to the min/max 3918 * point size range, putting the result into the real point size 3919 * register. 3920 * Note that we can't do this easily at the end of program due to 3921 * possible early return. 3922 */ 3923 set_insn_start( t, ureg_get_instruction_number( ureg )); 3924 ureg_MAX( t->ureg, 3925 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), 3926 ureg_src(t->outputs[t->pointSizeOutIndex]), 3927 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 3928 ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), 3929 ureg_src(t->outputs[t->pointSizeOutIndex]), 3930 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 3931 } 3932 t->prevInstWrotePointSize = GL_FALSE; 3933 } 3934 3935 /* Fix up all emitted labels: 3936 */ 3937 for (i = 0; i < t->labels_count; i++) { 3938 ureg_fixup_label( ureg, 3939 t->labels[i].token, 3940 t->insn[t->labels[i].branch_target] ); 3941 } 3942 3943out: 3944 FREE(t->insn); 3945 FREE(t->labels); 3946 FREE(t->constants); 3947 3948 if (t->error) { 3949 debug_printf("%s: translate error flag set\n", __FUNCTION__); 3950 } 3951 3952 return ret; 3953} 3954/* ----------------------------- End TGSI code ------------------------------ */ 3955 3956/** 3957 * Convert a shader's GLSL IR into a Mesa gl_program, although without 3958 * generating Mesa IR. 3959 */ 3960static struct gl_program * 3961get_mesa_program(struct gl_context *ctx, 3962 struct gl_shader_program *shader_program, 3963 struct gl_shader *shader) 3964{ 3965 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); 3966 struct gl_program *prog; 3967 GLenum target; 3968 const char *target_string; 3969 GLboolean progress; 3970 struct gl_shader_compiler_options *options = 3971 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 3972 3973 switch (shader->Type) { 3974 case GL_VERTEX_SHADER: 3975 target = GL_VERTEX_PROGRAM_ARB; 3976 target_string = "vertex"; 3977 break; 3978 case GL_FRAGMENT_SHADER: 3979 target = GL_FRAGMENT_PROGRAM_ARB; 3980 target_string = "fragment"; 3981 break; 3982 case GL_GEOMETRY_SHADER: 3983 target = GL_GEOMETRY_PROGRAM_NV; 3984 target_string = "geometry"; 3985 break; 3986 default: 3987 assert(!"should not be reached"); 3988 return NULL; 3989 } 3990 3991 validate_ir_tree(shader->ir); 3992 3993 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 3994 if (!prog) 3995 return NULL; 3996 prog->Parameters = _mesa_new_parameter_list(); 3997 prog->Varying = _mesa_new_parameter_list(); 3998 prog->Attributes = _mesa_new_parameter_list(); 3999 v->ctx = ctx; 4000 v->prog = prog; 4001 v->shader_program = shader_program; 4002 v->options = options; 4003 4004 add_uniforms_to_parameters_list(shader_program, shader, prog); 4005 4006 /* Emit Mesa IR for main(). */ 4007 visit_exec_list(shader->ir, v); 4008 v->emit(NULL, OPCODE_END); 4009 4010 /* Now emit bodies for any functions that were used. */ 4011 do { 4012 progress = GL_FALSE; 4013 4014 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4015 function_entry *entry = (function_entry *)iter.get(); 4016 4017 if (!entry->bgn_inst) { 4018 v->current_function = entry; 4019 4020 entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB); 4021 entry->bgn_inst->function = entry; 4022 4023 visit_exec_list(&entry->sig->body, v); 4024 4025 glsl_to_tgsi_instruction *last; 4026 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4027 if (last->op != OPCODE_RET) 4028 v->emit(NULL, OPCODE_RET); 4029 4030 glsl_to_tgsi_instruction *end; 4031 end = v->emit(NULL, OPCODE_ENDSUB); 4032 end->function = entry; 4033 4034 progress = GL_TRUE; 4035 } 4036 } 4037 } while (progress); 4038 4039#if 0 4040 /* Print out some information (for debugging purposes) used by the 4041 * optimization passes. */ 4042 for (i=0; i < v->next_temp; i++) { 4043 int fr = v->get_first_temp_read(i); 4044 int fw = v->get_first_temp_write(i); 4045 int lr = v->get_last_temp_read(i); 4046 int lw = v->get_last_temp_write(i); 4047 4048 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4049 assert(fw <= fr); 4050 } 4051#endif 4052 4053 /* Remove reads to output registers, and to varyings in vertex shaders. */ 4054 v->remove_output_reads(PROGRAM_OUTPUT); 4055 if (target == GL_VERTEX_PROGRAM_ARB) 4056 v->remove_output_reads(PROGRAM_VARYING); 4057 4058 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 4059 v->copy_propagate(); 4060 v->eliminate_dead_code(); 4061 v->merge_registers(); 4062 v->renumber_registers(); 4063 4064 if (ctx->Shader.Flags & GLSL_DUMP) { 4065 printf("\n"); 4066 printf("GLSL IR for linked %s program %d:\n", target_string, 4067 shader_program->Name); 4068 _mesa_print_ir(shader->ir, NULL); 4069 printf("\n"); 4070 printf("\n"); 4071 } 4072 4073 prog->Instructions = NULL; 4074 prog->NumInstructions = 0; 4075 4076 do_set_program_inouts(shader->ir, prog); 4077 count_resources(v, prog); 4078 4079 check_resources(ctx, shader_program, v, prog); 4080 4081 _mesa_reference_program(ctx, &shader->Program, prog); 4082 4083 struct st_vertex_program *stvp; 4084 struct st_fragment_program *stfp; 4085 struct st_geometry_program *stgp; 4086 4087 switch (shader->Type) { 4088 case GL_VERTEX_SHADER: 4089 stvp = (struct st_vertex_program *)prog; 4090 stvp->glsl_to_tgsi = v; 4091 break; 4092 case GL_FRAGMENT_SHADER: 4093 stfp = (struct st_fragment_program *)prog; 4094 stfp->glsl_to_tgsi = v; 4095 break; 4096 case GL_GEOMETRY_SHADER: 4097 stgp = (struct st_geometry_program *)prog; 4098 stgp->glsl_to_tgsi = v; 4099 break; 4100 default: 4101 assert(!"should not be reached"); 4102 return NULL; 4103 } 4104 4105 return prog; 4106} 4107 4108extern "C" { 4109 4110struct gl_shader * 4111st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 4112{ 4113 struct gl_shader *shader; 4114 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 4115 type == GL_GEOMETRY_SHADER_ARB); 4116 shader = rzalloc(NULL, struct gl_shader); 4117 if (shader) { 4118 shader->Type = type; 4119 shader->Name = name; 4120 _mesa_init_shader(ctx, shader); 4121 } 4122 return shader; 4123} 4124 4125struct gl_shader_program * 4126st_new_shader_program(struct gl_context *ctx, GLuint name) 4127{ 4128 struct gl_shader_program *shProg; 4129 shProg = rzalloc(NULL, struct gl_shader_program); 4130 if (shProg) { 4131 shProg->Name = name; 4132 _mesa_init_shader_program(ctx, shProg); 4133 } 4134 return shProg; 4135} 4136 4137/** 4138 * Link a shader. 4139 * Called via ctx->Driver.LinkShader() 4140 * This actually involves converting GLSL IR into Mesa gl_programs with 4141 * code lowering and other optimizations. 4142 */ 4143GLboolean 4144st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4145{ 4146 assert(prog->LinkStatus); 4147 4148 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4149 if (prog->_LinkedShaders[i] == NULL) 4150 continue; 4151 4152 bool progress; 4153 exec_list *ir = prog->_LinkedShaders[i]->ir; 4154 const struct gl_shader_compiler_options *options = 4155 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 4156 4157 do { 4158 progress = false; 4159 4160 /* Lowering */ 4161 do_mat_op_to_vec(ir); 4162 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 4163 | LOG_TO_LOG2 4164 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 4165 4166 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 4167 4168 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; 4169 4170 progress = lower_quadop_vector(ir, true) || progress; 4171 4172 if (options->EmitNoIfs) { 4173 progress = lower_discard(ir) || progress; 4174 progress = lower_if_to_cond_assign(ir) || progress; 4175 } 4176 4177 if (options->EmitNoNoise) 4178 progress = lower_noise(ir) || progress; 4179 4180 /* If there are forms of indirect addressing that the driver 4181 * cannot handle, perform the lowering pass. 4182 */ 4183 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 4184 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 4185 progress = 4186 lower_variable_index_to_cond_assign(ir, 4187 options->EmitNoIndirectInput, 4188 options->EmitNoIndirectOutput, 4189 options->EmitNoIndirectTemp, 4190 options->EmitNoIndirectUniform) 4191 || progress; 4192 4193 progress = do_vec_index_to_cond_assign(ir) || progress; 4194 } while (progress); 4195 4196 validate_ir_tree(ir); 4197 } 4198 4199 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4200 struct gl_program *linked_prog; 4201 4202 if (prog->_LinkedShaders[i] == NULL) 4203 continue; 4204 4205 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 4206 4207 if (linked_prog) { 4208 bool ok = true; 4209 4210 switch (prog->_LinkedShaders[i]->Type) { 4211 case GL_VERTEX_SHADER: 4212 _mesa_reference_vertprog(ctx, &prog->VertexProgram, 4213 (struct gl_vertex_program *)linked_prog); 4214 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, 4215 linked_prog); 4216 break; 4217 case GL_FRAGMENT_SHADER: 4218 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, 4219 (struct gl_fragment_program *)linked_prog); 4220 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, 4221 linked_prog); 4222 break; 4223 case GL_GEOMETRY_SHADER: 4224 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, 4225 (struct gl_geometry_program *)linked_prog); 4226 ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, 4227 linked_prog); 4228 break; 4229 } 4230 if (!ok) { 4231 return GL_FALSE; 4232 } 4233 } 4234 4235 _mesa_reference_program(ctx, &linked_prog, NULL); 4236 } 4237 4238 return GL_TRUE; 4239} 4240 4241 4242/** 4243 * Link a GLSL shader program. Called via glLinkProgram(). 4244 */ 4245void 4246st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4247{ 4248 unsigned int i; 4249 4250 _mesa_clear_shader_program_data(ctx, prog); 4251 4252 prog->LinkStatus = GL_TRUE; 4253 4254 for (i = 0; i < prog->NumShaders; i++) { 4255 if (!prog->Shaders[i]->CompileStatus) { 4256 fail_link(prog, "linking with uncompiled shader"); 4257 prog->LinkStatus = GL_FALSE; 4258 } 4259 } 4260 4261 prog->Varying = _mesa_new_parameter_list(); 4262 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); 4263 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); 4264 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); 4265 4266 if (prog->LinkStatus) { 4267 link_shaders(ctx, prog); 4268 } 4269 4270 if (prog->LinkStatus) { 4271 if (!ctx->Driver.LinkShader(ctx, prog)) { 4272 prog->LinkStatus = GL_FALSE; 4273 } 4274 } 4275 4276 set_uniform_initializers(ctx, prog); 4277 4278 if (ctx->Shader.Flags & GLSL_DUMP) { 4279 if (!prog->LinkStatus) { 4280 printf("GLSL shader program %d failed to link\n", prog->Name); 4281 } 4282 4283 if (prog->InfoLog && prog->InfoLog[0] != 0) { 4284 printf("GLSL shader program %d info log:\n", prog->Name); 4285 printf("%s\n", prog->InfoLog); 4286 } 4287 } 4288} 4289 4290} /* extern "C" */ 4291